# Framework RL con Indicadores Técnicos (discreto y continuo)

In [None]:

import numpy as np
import pandas as pd

def sma(x, w): return x.rolling(w).mean()
def rsi(x, w=14):
    d = x.diff()
    up = d.clip(lower=0).rolling(w).mean()
    dn = -d.clip(upper=0).rolling(w).mean()
    rs = up/(dn+1e-9)
    return 100 - 100/(1+rs)

n=3000
rng=np.random.default_rng(123)
close = 50*np.exp(np.cumsum(rng.normal(0, 0.002, size=n)))
high = close*(1+rng.normal(0.0005,0.001,size=n))
low  = close*(1-rng.normal(0.0005,0.001,size=n))
vol  = 1e5*(1 + rng.normal(0,0.05,size=n)).clip(1e-3)
df = pd.DataFrame(dict(close=close, high=high, low=low, volume=vol))
df["ret"]=df["close"].pct_change().fillna(0.0)
df["sma20"]=sma(df["close"],20)
df["sma50"]=sma(df["close"],50)
df["rsi14"]=rsi(df["close"],14)
df = df.dropna().reset_index(drop=True)

class TradingEnv:
    def __init__(self, data, fee=0.0005):
        self.d=data; self.fee=fee; self.t=0; self.pos=0.0; self.cash=1.0
    def reset(self):
        self.t=0; self.pos=0.0; self.cash=1.0
        return self.obs()
    def obs(self):
        row=self.d.iloc[self.t]
        return row[["ret","sma20","sma50","rsi14"]].fillna(0).values.astype(np.float32)
    def step(self,a):
        done=False
        price=self.d.loc[self.t,"close"]
        if a==1 and self.pos==0.0:
            self.pos=self.cash*(1-self.fee)/price; self.cash=0.0
        if a==2 and self.pos>0.0:
            self.cash=self.pos*price*(1-self.fee); self.pos=0.0
        self.t+=1
        if self.t>=len(self.d)-1:
            if self.pos>0.0:
                self.cash=self.pos*self.d.loc[self.t,"close"]*(1-self.fee); self.pos=0.0
            done=True
        nav = self.cash + self.pos*self.d.loc[self.t,"close"]
        reward = np.log(nav+1e-12)
        return self.obs(), float(reward), done, {"nav": nav}

env = TradingEnv(df)
_ = env.reset()
print("Framework listo. Reemplaza con tu agente preferido (DQN/PPO).")
