# Ritter (2017) — Hedging con RL y utilidad exponencial (sin redes)
Entorno de cobertura de una opción europea en subyacente GBM. Política lineal en features para delta discreta. Objetivo: maximizar utilidad exponencial de la riqueza final (aversión al riesgo).


In [None]:

import numpy as np
import matplotlib.pyplot as plt
rng = np.random.default_rng(17)


In [None]:

def gbm_paths(T=50, N=2000, S0=100.0, mu=0.0, sigma=0.2, dt=1/252, seed=17):
    rng = np.random.default_rng(seed)
    S = np.empty((N, T+1))
    S[:,0] = S0
    for t in range(T):
        z = rng.normal(0,1,size=N)
        S[:,t+1] = S[:,t] * np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z)
    return S

def european_call_payoff(S_T, K): return np.maximum(S_T - K, 0.0)

# Features: [1, (S_t - K)/K, t/T]
def features(S_t, t, T, K):
    x1 = 1.0
    x2 = (S_t - K)/max(1e-9, K)
    x3 = t/T
    return np.array([x1, x2, x3])

def simulate_hedge(W, S, K=100.0, rf=0.0, dt=1/252, lam=3.0, cost=0.0002):
    N, T = S.shape[0], S.shape[1]-1
    wealth = np.zeros(N); pos = np.zeros(N)  # posición en activo
    cash = np.zeros(N)
    for t in range(T):
        x = np.stack([features(S[i,t], t, T, K) for i in range(N)])
        delta = np.tanh(x @ W)  # [-1,1]
        dpos = delta - pos
        # costos de rebalanceo proporcionales
        cash -= cost * np.abs(dpos) * S[:,t]
        # ejecutar rebalanceo
        cash -= dpos * S[:,t]
        pos = delta
        # evolución de portafolio
        cash *= np.exp(rf*dt)
    # liquidación contra payoff de la opción short
    payoff = european_call_payoff(S[:,-1], K)
    wealth = cash + pos * S[:,-1] - payoff
    util = -np.exp(-lam * wealth)
    return wealth, util

# Entrenamiento por Policy Gradient simple sobre utilidad esperada
S = gbm_paths(T=60, N=1000, sigma=0.25)
K = 100.0; lam = 3.0
W = np.zeros(3)
lr = 0.1

for it in range(80):
    wealth, util = simulate_hedge(W, S, K=K, lam=lam)
    J = util.mean()
    # Gradiente por REINFORCE-like con baseline
    eps = 1e-3
    g = np.zeros_like(W)
    for j in range(len(W)):
        Wp = W.copy(); Wp[j] += eps
        _, util_p = simulate_hedge(Wp, S, K=K, lam=lam)
        g[j] = (util_p.mean() - J)/eps
    W += lr * g
    if it % 10 == 0:
        print(f"iter {it}, E[U]: {J:.6f}")

wealth, util = simulate_hedge(W, S, K=K, lam=lam)
print("E[wealth]:", wealth.mean(), "Var:", wealth.var())
plt.figure(); plt.hist(wealth, bins=50); plt.title('Distribución de riqueza final'); plt.show()
