
# RRL con Sharpe Diferencial (Dempster & Leemans, 2004)


In [None]:

import numpy as np
import pandas as pd

np.random.seed(7)
n = 5000
steps = np.random.normal(0, 0.001, size=n)
regime = np.sign(np.sin(np.linspace(0, 30, n))) * 0.0007
price = 100*np.ones(n)
for t in range(1, n):
    price[t] = price[t-1]*(1+steps[t]+regime[t])
df = pd.DataFrame({"close": price})
df["ret"] = df["close"].pct_change().fillna(0.0)

M = 10
for i in range(1, M+1):
    df[f"ret_lag_{i}"] = df["ret"].shift(i).fillna(0.0)
df = df.iloc[M+1:].reset_index(drop=True)

X = df[[f"ret_lag_{i}" for i in range(1, M+1)]].values.astype(np.float64)
r = df["ret"].values.astype(np.float64)

T = len(df)
delta = 0.0001
eta_sr = 0.01
lr = 0.05
epochs = 3

w = np.zeros(M+2)

def policy(x, prevF):
    z = np.dot(w[:M], x) + w[M]*prevF + w[M+1]
    f = np.tanh(z)
    F = np.sign(f + 1e-8)
    return f, F, z

def train(X, r, epochs):
    global w
    A = 0.0; B = 1e-6
    prevF_exec = 0.0
    prevF_cont = 0.0
    for ep in range(epochs):
        dL = np.zeros_like(w)
        for t in range(len(X)):
            f_cont, F_exec, z = policy(X[t], prevF_cont)
            Rt = prevF_exec*r[t] - delta*abs(F_exec - prevF_exec)
            A = A + eta_sr*(Rt - A)
            B = B + eta_sr*(Rt*Rt - B)
            # gradiente simplificado
            dS_dRt = eta_sr*(1/np.sqrt(B) - (A*Rt)/(B**1.5 + 1e-12))
            dz = np.zeros_like(w)
            dz[:M] += X[t]
            dz[M]  += prevF_cont
            dz[M+1]+= 1.0
            df_dz = 1 - np.tanh(z)**2
            df_dw = df_dz * dz
            dRt_dFprev = r[t]
            dRt_dw = dRt_dFprev * df_dw
            dL += dS_dRt * dRt_dw
            prevF_cont = f_cont
            prevF_exec = F_exec
        w += lr * dL / (np.linalg.norm(dL)+1e-8)
    return w

w = train(X, r, epochs)

prevF_exec = 0.0
equity = [1.0]
for t in range(len(X)):
    f_cont, F_exec, _ = policy(X[t], np.tanh(0.0 if t==0 else 0.5))
    Rt = prevF_exec*r[t] - delta*abs(F_exec - prevF_exec)
    equity.append(equity[-1]*(1+Rt))
    prevF_exec = F_exec

print("Equity final:", equity[-1])
