# Data Prep

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import datetime
import json
import sys, os
sys.path.append(os.path.abspath(".."))


In [2]:
from finHRL.preprocess.preprocessor import YahooDownloader

with open("../finHRL/preprocess/tickers/ticker_lists.json", "r") as f:
    data = json.load(f)

dow_30 = data["DOW_30"]
cryptos = data["CRYPTO_7"]

TRAIN_START_DATE = '2017-01-01'
TRAIN_END_DATE = '2022-01-01'
TEST_START_DATE = '2022-01-01'
TEST_END_DATE = '2023-01-01'



df = YahooDownloader(start_date = pd.to_datetime(TRAIN_START_DATE) - datetime.timedelta(days=30),
                     end_date = TEST_END_DATE,
                     ticker_list = dow_30).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (45900, 8)


In [3]:
INDICATORS = ['macd',
              'rsi_30',
              'cci_30']

from finHRL.preprocess.preprocessor import FeatureEngineer
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=False,
                     user_defined_feature = False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf,0)

processed = processed[processed.date >= TRAIN_START_DATE].reset_index(drop=True)

Successfully added technical indicators


In [4]:
stock_dimension = len(processed.tic.unique())
print(stock_dimension)

30


In [5]:
df_train = processed[processed.date < TEST_START_DATE]
df_test = processed[processed.date >= TEST_START_DATE]


df_train["dayorder"] = df_train["date"].astype("category").cat.codes
df_test["dayorder"] = df_test["date"].astype("category").cat.codes

# Base RL

In [6]:
from finHRL.env_stocktrading.trading_env_RL import StockTradingEnv

# state_space_noHRL = [balance, close prices_i, stock_shares_i, MACD_i, rsi30_i, cci30_i, turbulences_i]
episode_len = df_train.dayorder.nunique()
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension 

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

tr_env = StockTradingEnv(
    df = df_train,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=False,
    print_verbosity=1
)



In [7]:
# TRAINING

from finHRL.agent.models import baseRLAgent

agent = baseRLAgent(env=tr_env)

n_episodes = 70
learning_rate = 0.0001
gamma = 0.99
max_grad_norm = 0.5
n_steps = 256
ent_coef = 0.0001

model = agent.get_model("a2c",
                            learning_rate = learning_rate,
                            gamma = gamma,
                            max_grad_norm = max_grad_norm,
                            n_steps = n_steps,
                            ent_coef = ent_coef,
                            verbose=1)


trained_model = agent.train_model(
    model,
    tb_log_name="a2c_test1",
    total_timesteps= n_episodes*episode_len
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
day: 1258, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 2139391.32
total_reward: 1139391.32
total_cost: 236364.09
total_trades: 35236
Sharpe: 0.877
day: 1258, episode: 2
begin_total_asset: 1000000.00
end_total_asset: 1929656.16
total_reward: 929656.16
total_cost: 230435.41
total_trades: 35045
Sharpe: 0.756
day: 1258, episode: 3
begin_total_asset: 1000000.00
end_total_asset: 2173411.25
total_reward: 1173411.25
total_cost: 230242.03
total_trades: 35043
Sharpe: 0.863
day: 1258, episode: 4
begin_total_asset: 1000000.00
end_total_asset: 2242601.33
total_reward: 1242601.33
total_cost: 228796.98
total_trades: 34984
Sharpe: 0.865
day: 1258, episode: 5
begin_total_asset: 1000000.00
end_total_asset: 1905364.98
total_reward: 905364.98
total_cost: 233622.92
total_trades: 35260
Sharpe: 0.747
day: 1258, episode: 6
begin_total_asset: 1000000.00
end_total_asset: 1999131.77
total_reward: 999131.77

In [8]:
# TESTING
test_env = StockTradingEnv(
    df = df_test,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=False,
    print_verbosity=1
)

df_account_value_a2c, df_actions_a2c = baseRLAgent.predict_RL(
    model=trained_model, 
    environment = test_env)

Starting prediction...
day: 250, episode: 2
begin_total_asset: 1000000.00
end_total_asset: 1037854.17
total_reward: 37854.17
total_cost: 1383.70
total_trades: 3489
Sharpe: 0.294
hit end!


In [6]:
from dev.hyperparameter_searching.base_RL_hs import hyperparams_opt_RL

hs_opt = hyperparams_opt_RL(
    df_train=df_train,
    df_test=df_test,
    indicators=INDICATORS,
    n_episodes_train=10,
    n_trials=80
)

hs_opt.run_opt()

[I 2025-11-23 15:07:07,849] A new study created in memory with name: no-name-04e9bb65-f0b1-4361-a627-d597b99f341b


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| time/                 |          |
|    fps                | 87       |
|    iterations         | 100      |
|    time_elapsed       | 9        |
|    total_timesteps    | 800      |
| train/                |          |
|    entropy_loss       | -42.7    |
|    explained_variance | 1.19e-07 |
|    learning_rate      | 0.00408  |
|    n_updates          | 99       |
|    policy_loss        | -4.86    |
|    std                | 1.01     |
|    value_loss         | 0.0136   |
------------------------------------
day: 1258, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 2039735.02
total_reward: 1039735.02
total_cost: 19155.59
total_trades: 23473
Sharpe: 0.923
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.26e+03 |
|    ep_rew_mean        | 0.711    |
| time/                 |          |
|    f

[W 2025-11-23 15:07:28,198] Trial 0 failed with parameters: {'gamma': 0.9825412090417985, 'max_grad_norm': 3.022986292922892, 'n_steps': 8, 'learning_rate': 0.004082650230163378, 'ent_coef': 5.716252431384161e-08} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/andoniiribarren/Desktop/TFM/repoTFM/HRLMAMLforST/hrlmamlenv/lib/python3.12/site-packages/optuna/study/_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/Users/andoniiribarren/Desktop/TFM/repoTFM/HRLMAMLforST/dev/hyperparameter_searching/base_RL_hs.py", line 117, in objective
    trained_model = agent.train_model(
                    ^^^^^^^^^^^^^^^^^^
  File "/Users/andoniiribarren/Desktop/TFM/repoTFM/HRLMAMLforST/finHRL/agent/models.py", line 52, in train_model
    model = model.learn(
            ^^^^^^^^^^^^
  File "/Users/andoniiribarren/Desktop/TFM/repoTFM/HRLMAMLforST/hrlmamlenv/lib/python3.12/site-packag

KeyboardInterrupt: 

# HRL

In [None]:
from finHRL.env_stocktrading.trading_env_HRL import StockTradingEnvHRL

# state_space_manager = [close prices_i, MACD_i, rsi30_i, cci30_i, turbulences_i] Quizas quitar algún indicador
# state_space_worker = [balance, close_prices_i, stock_shares_i, manager_actions_i]                 # QUizás añadir agún indicador de riesgo a estudiar y hacer pruebas
# state_space_noHRL = [balance, close prices_i, stock_shares_i, MACD_i, rsi30_i, cci30_i, turbulences_i]

# action_space_manager = {-1, 0, 1} * 30
# action_space_manager = {0,1} * 30 (para calcular cuántas, multiplicar por hmax)

episode_len = processed.dayorder.nunique()
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension

state_space_manager = (len(INDICATORS) + 1)*stock_dimension
state_space_worker = (1 + 3*stock_dimension)



buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension


tr_env = StockTradingEnvHRL(
    df = processed,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=True,
    print_verbosity=1
)

# Pendiente

- LR está en 0.0003: Probar a bajar a 5e-5 o 1e-5
- Incluir train -> Test etc