In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

!conda install -c conda-forge ta-lib -y

# Gpu

In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[]

# Set_Up

In [None]:
process = 'Train'
symbol = 'XAUUSD'
direction = 'L'
root_data = f'/content/drive/MyDrive/Course Folder/Forex/{symbol}/'
print(root_data)

rolling_window = 100

/content/drive/MyDrive/Course Folder/Forex/XAUUSD/


# Libraries

In [None]:
!conda list ta-lib
import talib as ta
print(ta.__version__)

In [None]:
!pip install -q stable_baselines3[extra]

In [None]:
import numpy as np
import pandas as pd
import os
import importlib
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

import tensorflow as tf

import gymnasium as gym
from gymnasium import spaces

from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement

from stable_baselines3 import A2C, PPO, DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor

from stable_baselines3.common.evaluation import evaluate_policy as sb3_eval
from gym.utils import seeding

import sys
sys.path.append("..")

import warnings
warnings.filterwarnings('ignore')


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("seaborn-v0_8-darkgrid")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Functions

In [None]:
def kalman_line(source, kalman_length: int, smooth: int):

    n = len(source)
    kf_c = np.empty(n)            # núcleo del filtro
    velo_c = np.zeros(n)          # componente de velocidad

    sqrt_term   = np.sqrt(kalman_length / 10000.0 * 2.0)
    length_term = kalman_length / 10000.0

    # --------- inicialización (mismo efecto que `var` en Pine) ----------
    kf_c[0]   = source.iloc[0]    # nz(kf_c[1], source) para la primera barra
    velo_c[0] = 0.0

    # ------------------- bucle recursivo -------------------------------
    for i in range(1, n):
        prev_kf = kf_c[i-1] if not np.isnan(kf_c[i-1]) else source.iloc[i]
        dk      = source.iloc[i] - prev_kf
        smooth_c = prev_kf + dk * sqrt_term          # parte "suave"
        velo_c[i] = velo_c[i-1] + length_term * dk   # acumulamos velocidad
        kf_c[i]   = smooth_c + velo_c[i]             # estimación final

    # -------------------- EMA final (ta.ema) ----------------------------
    kf_c_series = pd.Series(kf_c, index=source.index)
    kalman_line = kf_c_series.ewm(span=smooth, adjust=False).mean()
    return kalman_line

In [None]:
def slope(src: pd.Series,
          length_kal: int,
          smooth_kal: int,
          slopeLen: int,
          offset: int) -> pd.DataFrame:

    n = len(src)
    kf_state = np.full(n, np.nan)
    kf_velo  = np.zeros(n)
    sqrt_factor = np.sqrt(length_kal / 10000.0 * 2.0)
    vel_factor  = length_kal / 10000.0

    for i in range(n):
        if i == 0:
            prev_state = src.iloc[0]
            prev_velo  = 0.0
        else:
            prev_state = kf_state[i-1] if not np.isnan(kf_state[i-1]) else src.iloc[i]
            prev_velo  = kf_velo[i-1]

        dk = src.iloc[i] - prev_state
        smooth = prev_state + dk * sqrt_factor
        kf_velo[i]  = prev_velo + vel_factor * dk
        kf_state[i] = smooth + kf_velo[i]

    # 2) EMA smoothing --------------------------------------------------
    kal = pd.Series(kf_state, index=src.index).ewm(span=smooth_kal, adjust=False).mean()

    # 3) Slope/divergence -----------------------------------------------
    validLen = max(slopeLen, 1)
    slope_div = kal.diff(validLen) / validLen
    slope_signal = (slope_div > slope_div.shift(1)).astype(int)

    # 4) Angle in degrees -----------------------------------------------
    price_change = kal - kal.shift(validLen)
    slope_angle = np.degrees(np.arctan(price_change))
    slope_angle_signal = (slope_angle > slope_angle.shift(1)).astype(int)

    # 5) Linear regression prediction ----------------------------------
    def _linreg(y):
        x = np.arange(len(y))
        m, b = np.polyfit(x, y, 1)
        return b + m * (len(y)-1)

    slope_lin_reg = kal.rolling(window=slopeLen).apply(_linreg, raw=False)
    slope_lin_reg = slope_lin_reg.shift(-offset)  # apply Pine-style offset
    slope_lin_reg_signal = (slope_lin_reg > slope_lin_reg.shift(1)).astype(int)

    # 6) Pack results ---------------------------------------------------
    return pd.DataFrame({
        'slope_div':            slope_div,
        'slope_signal':         slope_signal,
        'slope_angle':          slope_angle,
        'slope_angle_signal':   slope_angle_signal,
        'slope_lin_reg':        slope_lin_reg,
        'slope_lin_reg_signal': slope_lin_reg_signal
    })

# Data



In [None]:
df_o = pd.read_csv(root_data + 'Results/XAUUSD_M5_Labels.csv', index_col=0)

#df['atr_mult_low'].tail(5)
print('Shape   : ',df_o.shape)
print('Columns : ',df_o.columns)
df_o = df_o.reset_index(drop=True)


Shape   :  (100000, 51)
Columns :  Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Spread', 'ATR',
       'hl2', 'lag_1', 'it_1', 'lag_2', 'it_2', 'lag_3', 'it_3', 'Open_Trade',
       'st_Close_Trade', 'Entry_Date', 'Type', 'Trade_Number', 'st_Exit_Date',
       'trade_type', 'trade type', 'st_Duration', 'st_row_PnL_close',
       'st_row_PnL_high', 'st_row_PnL_Low', 'st_row_PnL_low', 'st_Max',
       'st_Min', 'st_PnL', 'dyn_stoploss', 'SL_PnL', 'SL_Exit_Date',
       'SL_Duration', 'SL_PnL_-100_100_50', 'atr_mult_low', 'atr_mult_close',
       'atr_mult_high', 'atr_dyn', 'atr_PnL', 'atr_Exit_Date', 'atr_Duration',
       'atr_PnL_dollar', 'atr_PnL_dollar_-1.0_0.5_2.0', 'atr_H_dyn',
       'atr_H_PnL', 'atr_H_Exit_Date', 'atr_H_Duration', 'atr_H_PnL_dollar',
       'atr_H_PnL_dollar_-3.0_0.1'],
      dtype='object')


In [None]:
start_mask = df_o["Entry_Date"].notna() & df_o["Type"].notna()

df_o["Trade_Number"] = start_mask.cumsum()
first_trade = df_o["Trade_Number"].ne(0).idxmax()
df_o.loc[: first_trade - 1, "Trade_Number"] = np.nan

df_o.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Spread', 'ATR',
       'hl2', 'lag_1', 'it_1', 'lag_2', 'it_2', 'lag_3', 'it_3', 'Open_Trade',
       'st_Close_Trade', 'Entry_Date', 'Type', 'Trade_Number', 'st_Exit_Date',
       'trade_type', 'trade type', 'st_Duration', 'st_row_PnL_close',
       'st_row_PnL_high', 'st_row_PnL_Low', 'st_row_PnL_low', 'st_Max',
       'st_Min', 'st_PnL', 'dyn_stoploss', 'SL_PnL', 'SL_Exit_Date',
       'SL_Duration', 'SL_PnL_-100_100_50', 'atr_mult_low', 'atr_mult_close',
       'atr_mult_high', 'atr_dyn', 'atr_PnL', 'atr_Exit_Date', 'atr_Duration',
       'atr_PnL_dollar', 'atr_PnL_dollar_-1.0_0.5_2.0', 'atr_H_dyn',
       'atr_H_PnL', 'atr_H_Exit_Date', 'atr_H_Duration', 'atr_H_PnL_dollar',
       'atr_H_PnL_dollar_-3.0_0.1'],
      dtype='object')

In [None]:
df_o.loc[df_o['Entry_Date'].notna(),['Date', 'Open_Trade','Entry_Date', 'st_Exit_Date', 'trade_type','Type', 'Trade_Number']]
df_o.loc[60:73,['Date', 'Open_Trade','Entry_Date', 'st_Exit_Date', 'trade_type','Type', 'Trade_Number']]

Unnamed: 0,Date,Open_Trade,Entry_Date,st_Exit_Date,trade_type,Type,Trade_Number
60,2024-02-07 10:10:00,,,,,,3.0
61,2024-02-07 10:15:00,-1.0,2024-02-07 10:15:00,2024-02-07 11:35:00,,Short,4.0
62,2024-02-07 10:20:00,,,,,,4.0
63,2024-02-07 10:25:00,,,,,,4.0
64,2024-02-07 10:30:00,,,,,,4.0
65,2024-02-07 10:35:00,,,,,,4.0
66,2024-02-07 10:40:00,,,,,,4.0
67,2024-02-07 10:45:00,,,,,,4.0
68,2024-02-07 10:50:00,,,,,,4.0
69,2024-02-07 10:55:00,,,,,,4.0


In [None]:
columns_to_work = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Spread', 'ATR', 'hl2', 'lag_1', 'it_1', 'lag_2', 'it_2',
                   'lag_3', 'it_3', 'Open_Trade', 'st_Close_Trade', 'Entry_Date', 'Type', 'Trade_Number', 'st_Exit_Date',
                    'trade type', 'st_Duration','st_row_PnL_close']

df_o['kal_300'] = kalman_line(df_o['Close'], 300, 5)
df_o['kal_600'] = kalman_line(df_o['Close'], 600, 5)
df_o['kal_900'] = kalman_line(df_o['Close'], 900, 5)

df_o['kal_300_diff'] = df_o['kal_300'].diff()
df_o['kal_300_pct']  = df_o['kal_300'].pct_change()

df_o['kal_600_diff'] = df_o['kal_600'].diff()
df_o['kal_600_pct']  = df_o['kal_600'].pct_change()

df_o['kal_900_diff'] = df_o['kal_900'].diff()
df_o['kal_900_pct']  = df_o['kal_900'].pct_change()

df_o['lag_it_1'] = df_o['lag_1'] - df_o['it_1']
df_o['lag_it_2'] = df_o['lag_2'] - df_o['it_2']
df_o['lag_it_3'] = df_o['lag_3'] - df_o['it_3']

df_o['lag_it_1'] = np.where(df_o['trade type']==0, df_o['lag_it_1']*-1, df_o['lag_it_1'])
df_o['lag_it_2'] = np.where(df_o['trade type']==0, df_o['lag_it_2']*-1, df_o['lag_it_2'])
df_o['lag_it_3'] = np.where(df_o['trade type']==0, df_o['lag_it_3']*-1, df_o['lag_it_3'])

df_o['kal_300_diff'] = np.where(df_o['trade type']==0, df_o['kal_300_diff']*-1, df_o['kal_300_diff'])
df_o['kal_600_diff'] =  np.where(df_o['trade type']==0, df_o['kal_600_diff']*-1, df_o['kal_600_diff'])
df_o['kal_900_diff'] =  np.where(df_o['trade type']==0, df_o['kal_900_diff']*-1, df_o['kal_900_diff'])

df_o['kal_300_pct'] =  np.where(df_o['trade type']==0, df_o['kal_300_pct']*-1, df_o['kal_300_pct'])
df_o['kal_600_pct'] =  np.where(df_o['trade type']==0, df_o['kal_600_pct']*-1, df_o['kal_600_pct'])
df_o['kal_900_pct'] =  np.where(df_o['trade type']==0, df_o['kal_900_pct']*-1, df_o['kal_900_pct'])


In [None]:
df = df_o.loc[:,['Date','trade type','Close','Trade_Number', 'atr_mult_close', 'atr_mult_high',
                 'atr_mult_low', 'kal_300_diff', 'kal_600_diff', 'kal_900_diff', 'kal_300_pct',
                 'kal_600_pct', 'kal_900_pct']]

df = df.iloc[0:10000,:]
df['Date'] = pd.to_datetime(df['Date'])

print('Columns :', df.columns)
print('Shape :', df.shape)
print('Days :', (df['Date'].max() - df['Date'].min()).days)
print(df.isnull().sum())


Columns : Index(['Date', 'trade type', 'Close', 'Trade_Number', 'atr_mult_close',
       'atr_mult_high', 'atr_mult_low', 'kal_300_diff', 'kal_600_diff',
       'kal_900_diff', 'kal_300_pct', 'kal_600_pct', 'kal_900_pct'],
      dtype='object')
Shape : (10000, 13)
Days : 50
Date               0
trade type        13
Close              0
Trade_Number      13
atr_mult_close    13
atr_mult_high     13
atr_mult_low      13
kal_300_diff       1
kal_600_diff       1
kal_900_diff       1
kal_300_pct        1
kal_600_pct        1
kal_900_pct        1
dtype: int64


In [None]:
df_f = df.iloc[-30000:,:]
df_f['Date'] = pd.to_datetime(df_f['Date'])

print('Columns :', df_f.columns)
print('Shape :', df_f.shape)
print('Days :', (df_f['Date'].max() - df_f['Date'].min()).days)
df_f.tail(20)

Columns : Index(['Date', 'trade type', 'Close', 'Trade_Number', 'atr_mult_close',
       'atr_mult_high', 'atr_mult_low', 'kal_300_diff', 'kal_600_diff',
       'kal_900_diff', 'kal_300_pct', 'kal_600_pct', 'kal_900_pct'],
      dtype='object')
Shape : (10000, 13)
Days : 50


Unnamed: 0,Date,trade type,Close,Trade_Number,atr_mult_close,atr_mult_high,atr_mult_low,kal_300_diff,kal_600_diff,kal_900_diff,kal_300_pct,kal_600_pct,kal_900_pct
9980,2024-03-28 11:30:00,0.0,2194.37,953.0,0.6975,0.0476,0.6975,-0.166476,-0.333375,-0.379228,-7.6e-05,-0.000152,-0.000173
9981,2024-03-28 11:35:00,0.0,2196.07,953.0,0.325,-1.4901,0.325,-0.353839,-0.549492,-0.616356,-0.000161,-0.000251,-0.000281
9982,2024-03-28 11:40:00,1.0,2196.28,954.0,-0.2095,0.8031,-0.2095,0.461181,0.633878,0.676932,0.00021,0.000289,0.000309
9983,2024-03-28 11:45:00,1.0,2195.84,954.0,-0.3422,0.5098,-0.3422,0.455246,0.551213,0.532111,0.000208,0.000251,0.000242
9984,2024-03-28 11:50:00,1.0,2196.02,954.0,-0.5377,0.014,-0.5377,0.440765,0.472778,0.414741,0.000201,0.000215,0.000189
9985,2024-03-28 11:55:00,1.0,2194.83,954.0,-1.0126,0.0489,-1.0126,0.295286,0.216931,0.089659,0.000135,9.9e-05,4.1e-05
9986,2024-03-28 12:00:00,1.0,2193.54,954.0,-2.3815,-0.9987,-2.3815,0.074638,-0.118128,-0.305201,3.4e-05,-5.4e-05,-0.000139
9987,2024-03-28 12:05:00,0.0,2192.9,955.0,0.3072,-0.4215,0.3072,0.115493,0.370992,0.565431,5.3e-05,0.000169,0.000258
9988,2024-03-28 12:10:00,0.0,2194.32,955.0,0.1072,-1.2715,0.1072,0.087384,0.275041,0.37068,4e-05,0.000125,0.000169
9989,2024-03-28 12:15:00,0.0,2195.35,955.0,-1.0001,-1.8573,-1.0001,-0.027055,0.060951,0.053657,-1.2e-05,2.8e-05,2.4e-05


In [None]:
df.to_csv(root_data+'Results/'+symbol+'_RL_Features.csv')
#df.tail(5)

# Code

In [None]:
#### Closing Function

def atr_H_dyn(
    data: pd.DataFrame,
    initial_H_atr: float = -2.0,
    gap_H_atr: float = 1.0
) -> pd.DataFrame:
    """
    Calcula un trailing‑stop en múltiplos de ATR y lo guarda como 'atr_H_dyn'.

    • La primera barra del trade fija el stop en `initial_H_atr`.
    • A partir de la barra siguiente:
         stop = max(stop_anterior, best_pnl – gap_H_atr)
    • Si cualquiera de los múltiplos (high/low/close) cruza el stop anterior,
      se considera "roto" y devuelve NaN hasta el cierre oficial.
    """
    df = data.copy()
    col_name = "atr_H_dyn"
    df[col_name] = np.nan

    in_trade     = False
    trade_active = False
    broken       = False
    sl_val       = initial_H_atr
    prev_sl      = initial_H_atr

    for idx, row in df.iterrows():

        # Si hay apertura de trade, inicializamos
        if pd.notna(row["Entry_Date"]):
            in_trade     = True
            trade_active = True
            broken       = False
            sl_val       = initial_H_atr
            prev_sl      = sl_val
            df.at[idx, col_name] = sl_val  # primer valor stoploss
            continue

        # Si no estamos en trade, seguimos
        if not in_trade:
            continue

        # Calculamos el mejor múltiplo de ATR de la barra
        best_pnl = np.nanmax([
            row["atr_mult_high"],
            row["atr_mult_low"],
            row["atr_mult_close"]
        ])
        best_pnl = 0 if np.isnan(best_pnl) else best_pnl

        # Actualizamos el stoploss solo si está activo
        if trade_active and not broken:
            candidate = best_pnl - gap_H_atr

            # Si el precio perfora el stop anterior, rompemos el stoploss
            if (
                row["atr_mult_high"] < prev_sl or
                row["atr_mult_low"] < prev_sl or
                row["atr_mult_close"] < prev_sl
            ):
                broken       = True
                trade_active = False
                df.at[idx, col_name] = np.nan
            else:
                # Ajustamos el stoploss sin permitir retroceso
                sl_val = max(prev_sl, candidate)
                df.at[idx, col_name] = sl_val
                prev_sl = sl_val

        else:
            df.at[idx, col_name] = np.nan

        if pd.notna(row["st_Exit_Date"]) and row["Date"] == row["st_Exit_Date"]:
            in_trade     = False
            trade_active = False

    return df

In [None]:
def split_by_trade(df, test=0.2, val=0.1):
    trades = df["Trade_Number"].dropna().unique()

    if len(trades) < 3:                      # ← comprobación preventiva
        raise ValueError("El DataFrame necesita ≥3 trades únicos.")

    # redimensiona automáticamente los porcentajes si hiciera falta
    test_size = min(test, 1.0 - 1.0 / len(trades))
    val_size  = min(val,  1.0 - test_size - 1.0 / len(trades))

    tr_ids, te_ids = train_test_split(trades, test_size=test_size,
                                      random_state=SEED, shuffle=True)
    tr_ids, va_ids = train_test_split(tr_ids, test_size=val_size,
                                      random_state=SEED, shuffle=True)

    _sub = lambda ids: df[df["Trade_Number"].isin(ids)].reset_index(drop=True)
    return _sub(tr_ids), _sub(va_ids), _sub(te_ids)

In [None]:
# ───────── parámetros específicos del nuevo stop dinámico ─────────
FEATURE_COLS = ["atr_mult_close", "atr_mult_high", "atr_mult_low", "kal_300_diff",   "kal_600_diff",  "kal_900_diff",
    "kal_300_pct",    "kal_600_pct",   "kal_900_pct"]

FEATURE_COLS = ["Encoded_0", "Encoded_1", "Encoded_2", "Encoded_3", "Encoded_4", "Encoded_5", "Encoded_6", "Encoded_7"]

root_results = '/content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL/'

SEED = 42

MIN_GAP_H_ATR = 0.10
MAX_GAP_H_ATR = 5.00

INITIAL_H_ATR = -2.0
TOTAL_TIMESTEPS = 50_000

In [None]:
class StopLossEnv(gym.Env):
    """
    Entorno RL que permite al agente decidir el gap_H_atr en cada paso.
    El SL se actualiza con: sl = max(sl_prev, best_pnl − gap_H_atr)
    El episodio termina cuando atr_mult_low ≤ sl (hit del stop) o fin de los datos.
    """
    metadata = {"render.modes": ["none"]}

    # StopLossEnv.__init__  (reemplaza solo este método)
    def __init__(self, df_trade):
        super().__init__()
        self.df      = df_trade.reset_index(drop=True)
        self.trades  = self.df['Trade_Number'].unique()
        self.current_trade_idx = 0

        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(len(FEATURE_COLS) + 3,), dtype=np.float32
        )
        self.action_space = spaces.Box(
            low=np.array([MIN_GAP_H_ATR], dtype=np.float32),
            high=np.array([MAX_GAP_H_ATR], dtype=np.float32),
            dtype=np.float32
        )

        # ⚠️ Importante: antes llamabas a self._seed(...). Eso NO existe.
        self.seed(SEED)   # ← usa el método público.


    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.step_idx = 0
        self.gap      = 1.0                          # gap inicial (puedes variarlo)
        self.sl_level = INITIAL_H_ATR                # SL arranca en -2 ATR
        return self._get_obs(), {}

    def step(self, action):
        self.gap = float(np.clip(action[0], MIN_GAP_H_ATR, MAX_GAP_H_ATR))

        row = self.df.loc[self.step_idx]
        best_pnl = np.nanmax([row["atr_mult_high"],
                              row["atr_mult_low"],
                              row["atr_mult_close"]])
        best_pnl = 0.0 if np.isnan(best_pnl) else best_pnl
        candidate_sl = best_pnl - self.gap
        self.sl_level = max(self.sl_level, candidate_sl)

        terminated = row["atr_mult_low"] <= self.sl_level or self.step_idx >= self.n_steps - 1
        reward = self.sl_level if terminated else 0.0

        self.step_idx += 1
        truncated = False
        return self._get_obs(), reward, terminated, truncated, {}

    def _get_obs(self):
        row_feats = self.df.loc[self.step_idx, FEATURE_COLS].to_numpy(dtype=np.float32)
        extra     = np.array([self.sl_level, self.gap, self.step_idx], dtype=np.float32)
        return np.concatenate([row_feats, extra])

    def render(self):
        pass  # no visualización necesaria


In [None]:
def make_vector_env(df_subset: pd.DataFrame, n_envs: int = 8):
    from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor

    trade_ids = df_subset["Trade_Number"].unique()

    def _factory(tid):
        df_trade = df_subset[df_subset["Trade_Number"] == tid]
        return lambda: StopLossEnv(df_trade)

    envs = DummyVecEnv([_factory(t) for t in trade_ids])
    return VecMonitor(envs)


In [None]:
def evaluate(df_test: pd.DataFrame):
    from stable_baselines3.common.vec_env import DummyVecEnv

    results = {}
    for alg_name in ALGORITHMS.keys():
        model_path = MODEL_DIR / alg_name / "best_model.zip"
        if not model_path.exists():
            print(f"❌ No model {alg_name} entrenado.")
            continue

        env = DummyVecEnv([
            lambda tid=tid: StopLossEnv(df_test[df_test["Trade_Number"] == tid])
            for tid in df_test["Trade_Number"].unique()
        ])

        obs, _ = env.reset()
        dones  = [False] * env.num_envs
        total_reward = 0.0
        while not all(dones):
            action, _ = env.predict(obs, deterministic=True)
            obs, rewards, dones, _, _ = env.step(action)
            total_reward += rewards.sum()

        results[alg_name] = total_reward / env.num_envs
        env.close()
    return results


def baseline_fixed_sl(df_test: pd.DataFrame, fixed_sl: float = 1.0):
    """Referencia sencilla: SL fijo sin mover (gap = ∞)."""
    rewards = []
    for _, df_tr in df_test.groupby("Trade_Number"):
        crossed = (df_tr["atr_mult_low"] <= fixed_sl).any()
        rewards.append(fixed_sl if crossed else 0.0)
    return np.mean(rewards)


In [None]:
def split_by_trade(df, test=0.2, val=0.1):
    trades = df['Trade_Number'].unique()
    tr_ids, te_ids = train_test_split(trades, test_size=test,   random_state=SEED)
    tr_ids, va_ids = train_test_split(tr_ids, test_size=val,    random_state=SEED)
    _sub = lambda ids: df[df['Trade_Number'].isin(ids)].reset_index(drop=True)
    return _sub(tr_ids), _sub(va_ids), _sub(te_ids)

class StopLossEnv(gym.Env):
    metadata = {"render.modes": []}
    def __init__(self, df):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.trades = self.df['Trade_Number'].unique()
        self.current_trade_idx = 0

        self.observation_space = spaces.Box(-np.inf, np.inf,
                                            shape=(len(FEATURE_COLS)+3,), dtype=np.float32)
        self.action_space      = spaces.Box(np.array([MIN_GAP_H_ATR],np.float32),
                                            np.array([MAX_GAP_H_ATR],np.float32), dtype=np.float32)
        self.seed(SEED)

    # StopLossEnv.seed  (añade o reemplaza)
    def seed(self, seed: int | None = None):
        # compatible con Gym/Gymnasium y SB3: debe devolver [seed]
        self.np_random, seed = seeding.np_random(seed)
        np.random.seed(seed)
        return [seed]


    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.current_trade = self.df[self.df['Trade_Number']
                                     == self.trades[self.current_trade_idx]].reset_index(drop=True)
        self.n = len(self.current_trade)
        self.step_idx, self.gap, self.sl = 0, 1.0, INITIAL_H_ATR
        return self._obs(), {}

    def step(self, action):
        self.gap = float(np.clip(action[0], MIN_GAP_H_ATR, MAX_GAP_H_ATR))
        row      = self.current_trade.loc[self.step_idx]
        best_pnl = np.nanmax(row[["atr_mult_high","atr_mult_low","atr_mult_close"]].values)
        best_pnl = 0.0 if np.isnan(best_pnl) else best_pnl
        self.sl  = max(self.sl, best_pnl - self.gap)

        terminated = bool(row["atr_mult_low"] <= self.sl or self.step_idx == self.n-1)
        reward = self.sl if terminated else 0.0

        self.step_idx += 1
        truncated = False

        if terminated:
            self.current_trade_idx = (self.current_trade_idx + 1) % len(self.trades)
            self.step_idx = 0  # Reset step_idx for the new trade

        return self._obs(), reward, terminated, truncated, {}

    def _obs(self):
        if self.step_idx >= self.n:

            self.current_trade_idx = (self.current_trade_idx + 1) % len(self.trades)
            self.current_trade = self.df[self.df['Trade_Number'] == self.trades[self.current_trade_idx]].reset_index(drop=True)
            self.n = len(self.current_trade)
            self.step_idx = 0
        feats = self.current_trade.loc[self.step_idx, FEATURE_COLS].astype(np.float32).values
        extra = np.array([self.sl, self.gap, self.step_idx], np.float32)
        return np.concatenate([feats, extra])


# --- Wrapper para usar DQN (discretizamos la acción) -------------------
class DiscreteGapWrapper(gym.ActionWrapper):
    def __init__(self, env, n_bins=N_BINS_DQN):
        super().__init__(env)
        self.bins = np.linspace(MIN_GAP_H_ATR, MAX_GAP_H_ATR, n_bins)
        self.action_space = spaces.Discrete(n_bins)
    def action(self, a):     # int -> np.array([gap])
        return np.array([self.bins[int(a)]], dtype=np.float32)

In [None]:
def evaluate(model, df_slice, wrap=False):
    """
    Evalúa un modelo SB3 sobre df_slice (un trade por sub‑entorno).
    Maneja correctamente:
        • acciones continuas (A2C, PPO, SAC…): act shape (nenv, 1) ó (nenv,)
        • acciones discretas con DiscreteGapWrapper (DQN): ints
    Retorna:
        mean_reward, dict acciones, dict sl_levels, dict pnl_final
    """
    trade_ids = df_slice["Trade_Number"].unique()

    # crear un DummyVecEnv con un trade por entorno
    env_fns = [(
        lambda tid=tid: DiscreteGapWrapper(
            StopLossEnv(df_slice[df_slice["Trade_Number"] == tid])
        ) if wrap else StopLossEnv(
            df_slice[df_slice["Trade_Number"] == tid]
        )
    ) for tid in trade_ids]
    env = DummyVecEnv(env_fns)

    obs   = env.reset()          # Gym (solo obs)
    n_env = env.num_envs
    done  = np.zeros(n_env, dtype=bool)

    # recoger resultados
    actions_dict   = {tid: [] for tid in trade_ids}
    sl_levels_dict = {tid: [] for tid in trade_ids}
    pnl_dict       = {}
    idx2tid        = {i: tid for i, tid in enumerate(trade_ids)}

    # bins para DQN (discreto)
    gap_bins = np.linspace(MIN_GAP_H_ATR, MAX_GAP_H_ATR, N_BINS_DQN) if wrap else None

    while not done.all():
        raw_act, _ = model.predict(obs, deterministic=True)

        # convertir a gaps numéricos
        if wrap:                              # DQN -> ints
            act_int = np.asarray(raw_act).reshape(-1).astype(int)
            gaps    = gap_bins[act_int]
        else:                                 # continuos
            gaps = np.asarray(raw_act).reshape(-1)

        for i, tid in idx2tid.items():
            if not done[i]:
                actions_dict[tid].append(float(gaps[i]))
                sl_levels_dict[tid].append(float(obs[i, -3]))  # sl_level

        # avanzar un paso (Gym / Gymnasium)
        step = env.step(raw_act)
        if len(step) == 4:
            obs_next, rewards, done_flag, _ = step
            done = done_flag
        else:
            obs_next, rewards, terminated, truncated, _ = step
            done = np.logical_or(terminated, truncated)

        # guardar PnL final
        for i, tid in idx2tid.items():
            if done[i] and tid not in pnl_dict:
                pnl_dict[tid] = float(rewards[i])

        obs = obs_next

    env.close()
    mean_r = np.mean(list(pnl_dict.values())) if pnl_dict else 0.0
    return mean_r, actions_dict, sl_levels_dict, pnl_dict


In [None]:
# 1) ──────────────────────────────────────────────────────────────
# Renómbrala para que no choque con `stable_baselines3.common.evaluation.evaluate_policy`
def evaluate_trades(model, df_slice, wrap=False):
    """
    Evalúa un modelo SB3 sobre un subconjunto del DataFrame (un trade = un sub-entorno).

    Retorna:
        mean_reward, acciones_por_trade, sl_levels_por_trade, pnl_por_trade
    """
    trade_ids = df_slice["Trade_Number"].unique()

    # --- 1 env por trade -----------------------------------------------------
    env_fns = [(
        lambda tid=tid: DiscreteGapWrapper(
            StopLossEnv(df_slice[df_slice["Trade_Number"] == tid])
        ) if wrap else StopLossEnv(
            df_slice[df_slice["Trade_Number"] == tid]
        )
    ) for tid in trade_ids]
    env = DummyVecEnv(env_fns)

    obs   = env.reset()
    n_env = env.num_envs
    done  = np.zeros(n_env, dtype=bool)

    actions_dict   = {tid: [] for tid in trade_ids}
    sl_levels_dict = {tid: [] for tid in trade_ids}
    pnl_dict       = {}
    idx2tid        = {i: tid for i, tid in enumerate(trade_ids)}

    gap_bins = np.linspace(MIN_GAP_H_ATR, MAX_GAP_H_ATR, N_BINS_DQN) if wrap else None

    while not done.all():
        raw_act, _ = model.predict(obs, deterministic=True)

        # pasar a gaps reales
        if wrap:          # DQN
            gaps = gap_bins[np.asarray(raw_act, dtype=int).ravel()]
        else:             # continuos (A2C, PPO…)
            gaps = np.asarray(raw_act).ravel()

        for i, tid in idx2tid.items():
            if not done[i]:
                actions_dict[tid].append(float(gaps[i]))
                sl_levels_dict[tid].append(float(obs[i, -3]))    # sl actual

        # step compatible con Gym/Gymnasium
        step = env.step(raw_act)
        if len(step) == 4:                                     # Gym < 0.26
            obs, rewards, done_flag, _ = step
            done = done_flag
        else:                                                  # Gymnasium ≥ 0.26
            obs, rewards, terminated, truncated, _ = step
            done = np.logical_or(terminated, truncated)

        for i, tid in idx2tid.items():
            if done[i] and tid not in pnl_dict:
                pnl_dict[tid] = float(rewards[i])

    env.close()
    mean_r = np.mean(list(pnl_dict.values())) if pnl_dict else 0.0
    return mean_r, actions_dict, sl_levels_dict, pnl_dict


# Implementation

In [None]:

ROOT = Path('/content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL')
DATA_DIR, MODEL_DIR, LOG_DIR = (ROOT/p for p in ('data', 'models', 'logs'))
for p in (DATA_DIR, MODEL_DIR, LOG_DIR): p.mkdir(parents=True, exist_ok=True)
print('✅ Carpeta raíz:', ROOT)


✅ Carpeta raíz: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL


In [None]:

df = df.copy()
PROCESSED = DATA_DIR / 'processed.pkl'
df.to_pickle(PROCESSED);  print('✅ Datos →', PROCESSED)


✅ Datos → /content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL/data/processed.pkl


In [None]:
#### SET_UP

SEED              = 42

FEATURE_COLS      = ["atr_mult_close","atr_mult_high","atr_mult_low", "kal_300_diff","kal_600_diff","kal_900_diff",
                     "kal_300_pct","kal_600_pct","kal_900_pct"]

INITIAL_H_ATR     = -2.0
MIN_GAP_H_ATR     = 0.10
MAX_GAP_H_ATR     = 5.00
N_BINS_DQN        = 11
TOTAL_TIMESTEPS   = 50_000


In [None]:

df = pd.read_pickle(PROCESSED)
NEEDED = FEATURE_COLS + [
    "Trade_Number",
    "atr_mult_high", "atr_mult_low", "atr_mult_close"]
df = df.dropna(subset=NEEDED).copy()

print(f"✅ Shape limpio : {df.shape}")
print(f"✅ Trades únicos: {df['Trade_Number'].nunique()}")

# Ahora sí, divide en train/val/test
df_train, df_val, df_test = split_by_trade(df)

def make_vec(df_subset, n_envs=1):
    "Crea un VecEnv con un trade por sub‑entorno (no se necesita wrapper)."
    return VecMonitor(DummyVecEnv([
        lambda tid=tid: StopLossEnv(df_subset[df_subset["Trade_Number"] == tid])
        for tid in df_subset["Trade_Number"].unique()
    ]))

# solo entrenaremos A2C
ALGOS = {
    "a2c": (A2C, {"n_steps": 512, "learning_rate": 7e-4})}

name, (cls, kwargs) = next(iter(ALGOS.items()))
env_tr  = make_vec(df_train)

model = cls("MlpPolicy", env_tr, verbose=0, seed=SEED,
            tensorboard_log=str(LOG_DIR / name), **kwargs)
model.learn(total_timesteps=TOTAL_TIMESTEPS)
model.save(MODEL_DIR / f"{name}_final")
env_tr.close()
print("✅ A2C entrenado")

mdl = cls.load(MODEL_DIR / f"{name}_final")
mean_r, actions, sl_levels, _ = evaluate(mdl, df_test, wrap=False)
print(f"A2C | mean reward: {mean_r:6.3f} ATR")

for col in ("A2C_Action", "A2C_SL_Level"):
    if col not in df.columns:
        df[col] = np.nan

# ---------------------------------------------------------------
for tid, lvl_list in sl_levels.items():
    act_list = actions[tid]

    idx_trade = df.index[df["Trade_Number"] == tid].tolist()
    n_rows    = len(idx_trade)           # filas totales en df para ese trade
    n_vals    = min(len(lvl_list), n_rows)

    if n_vals == 0:
        continue                         # nada que escribir

    idx_target = idx_trade[:n_vals]

    # recortamos listas al mismo tamaño que idx_target
    df.loc[idx_target, "A2C_SL_Level"] = pd.Series(lvl_list[:n_vals], index=idx_target)
    df.loc[idx_target, "A2C_Action"]   = pd.Series(act_list[:n_vals], index=idx_target)
# ---------------------------------------------------------------

print("✅ Columnas escritas sin error")
df.head(15)

✅ Shape limpio : (9987, 12)
✅ Trades únicos: 956
✅ A2C entrenado


# No_Seen_Data

In [None]:

ROOT = Path('/content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL')
DATA_DIR, MODEL_DIR, LOG_DIR = (ROOT/p for p in ('data', 'models', 'logs'))
for p in (DATA_DIR, MODEL_DIR, LOG_DIR): p.mkdir(parents=True, exist_ok=True)
print('✅ Carpeta raíz:', ROOT)


✅ Carpeta raíz: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/RL


In [None]:
SEED              = 42

FEATURE_COLS      = ["atr_mult_close","atr_mult_high","atr_mult_low", "kal_300_diff","kal_600_diff","kal_900_diff",
                     "kal_300_pct","kal_600_pct","kal_900_pct"]

INITIAL_H_ATR     = -2.0
MIN_GAP_H_ATR     = 0.10
MAX_GAP_H_ATR     = 5.00
N_BINS_DQN        = 11
TOTAL_TIMESTEPS   = 50_000

In [None]:
#df_f = df.iloc[-30000:,:]
df_f = df.iloc[-1000:,:]
df_f['Date'] = pd.to_datetime(df_f['Date'])
df_f = df_f.dropna()

print('Columns :', df_f.columns)
print('Shape :', df_f.shape)
print('Days :', (df_f['Date'].max() - df_f['Date'].min()).days)
#df_f.tail(20)

Columns : Index(['Date', 'trade type', 'Close', 'Trade_Number', 'atr_mult_close',
       'atr_mult_high', 'atr_mult_low', 'kal_300_diff', 'kal_600_diff',
       'kal_900_diff', 'kal_300_pct', 'kal_600_pct', 'kal_900_pct'],
      dtype='object')
Shape : (1000, 13)
Days : 5


In [None]:
# ── 1. cargar el modelo y evaluarlo en df_f ───────────────────────────
mdl = A2C.load(MODEL_DIR / "a2c_final")        # modelo entrenado

# ───────── 1. volver a evaluar el modelo sobre df_f ─────────
mean_r_f, actions_f, sl_levels_f, _ = evaluate(mdl, df_f, wrap=False)
print(f"A2C (df_f) | mean reward: {mean_r_f:6.3f} ATR")

# ───────── 2. crear columnas destino si no existen ─────────
for col in ("A2C_Action", "A2C_SL_Level"):
    if col not in df_f.columns:
        df_f[col] = np.nan

# ───────── 3. volcar acción y stop-loss trade por trade ────
for tid, lvl_list in sl_levels_f.items():
    act_list  = actions_f.get(tid, [])
    idx_trade = df_f.index[df_f["Trade_Number"] == tid]        # filas de ese trade

    n_vals = min(len(idx_trade), len(lvl_list), len(act_list))
    if n_vals == 0:
        continue

    idx_target = idx_trade[:n_vals]
    df_f.loc[idx_target, "A2C_SL_Level"] = lvl_list[:n_vals]
    df_f.loc[idx_target, "A2C_Action"]   = act_list[:n_vals]

print("✅ Columnas escritas en df_f sin error")

A2C (df_f) | mean reward:  0.673 ATR
✅ Columnas escritas en df_f sin error


In [None]:
# Implementation
mean_r_f, actions_f, sl_levels_f, _ = evaluate_trades(mdl, df_f,   wrap=False)

In [None]:
df_f["step"] = df_f.groupby("Trade_Number").cumcount()
df_f.columns

Index(['Date', 'trade type', 'Close', 'Trade_Number', 'atr_mult_close',
       'atr_mult_high', 'atr_mult_low', 'kal_300_diff', 'kal_600_diff',
       'kal_900_diff', 'kal_300_pct', 'kal_600_pct', 'kal_900_pct',
       'A2C_Action', 'A2C_SL_Level'],
      dtype='object')

In [None]:
def_columns = ['Date', 'trade type','Trade_Number', 'atr_mult_close', 'atr_mult_high',
       'atr_mult_low', 'A2C_SL_Level', 'step']

df_f.loc[df_f["A2C_SL_Level"].notna(),def_columns].tail(10)

Unnamed: 0,Date,trade type,Trade_Number,atr_mult_close,atr_mult_high,atr_mult_low,A2C_SL_Level,A2C_Action,step
9978,2024-03-28 11:20:00,0.0,953.0,1.2364,-0.0555,1.2364,1.3425,0.1,12
9979,2024-03-28 11:25:00,0.0,953.0,0.6341,-0.5231,0.6341,0.1298,0.1,13
9980,2024-03-28 11:30:00,0.0,953.0,0.6975,0.0476,0.6975,0.4469,0.1,14
9981,2024-03-28 11:35:00,0.0,953.0,0.325,-1.4901,0.325,1.3425,0.1,15
9982,2024-03-28 11:40:00,1.0,954.0,-0.2095,0.8031,-0.2095,-2.0,0.1,0
9987,2024-03-28 12:05:00,0.0,955.0,0.3072,-0.4215,0.3072,-2.0,0.1,0
9988,2024-03-28 12:10:00,0.0,955.0,0.1072,-1.2715,0.1072,0.2072,0.1,1
9989,2024-03-28 12:15:00,0.0,955.0,-1.0001,-1.8573,-1.0001,0.2072,0.1,2
9990,2024-03-28 12:20:00,0.0,955.0,-1.493,-2.4002,-1.493,0.2072,0.1,3
9991,2024-03-28 12:25:00,1.0,956.0,-0.3098,0.1697,-0.3098,-2.0,0.1,0


In [None]:
df_f.loc[df_f['Trade_Number'] == 865,def_columns]

Unnamed: 0,Date,trade type,Trade_Number,atr_mult_close,atr_mult_high,atr_mult_low,A2C_SL_Level,A2C_Action,step
9011,2024-03-22 22:45:00,1.0,865.0,-0.457,0.329,-0.457,-2.0,0.1,0
9012,2024-03-22 22:50:00,1.0,865.0,-0.8957,0.4661,-0.8957,,,1
9013,2024-03-22 22:55:00,1.0,865.0,-1.1242,-0.585,-1.1242,,,2
9014,2024-03-22 23:00:00,1.0,865.0,-0.9049,-0.2194,-0.9049,,,3
9015,2024-03-22 23:05:00,1.0,865.0,-0.8774,0.0914,-0.8774,,,4
9016,2024-03-22 23:10:00,1.0,865.0,-0.064,1.3984,-0.064,,,5
9017,2024-03-22 23:15:00,1.0,865.0,0.8043,2.349,0.8043,,,6
9018,2024-03-22 23:20:00,1.0,865.0,0.786,1.8097,0.786,,,7
9019,2024-03-22 23:25:00,1.0,865.0,1.1699,1.3893,1.1699,,,8
9020,2024-03-22 23:30:00,1.0,865.0,0.3565,1.3436,0.3565,,,9
