# Data Prep

In [None]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import datetime
import json
import sys, os
sys.path.append(os.path.abspath(".."))


In [None]:
from finHRL.preprocess.preprocessor import YahooDownloader

with open("../finHRL/preprocess/tickers/ticker_lists.json", "r") as f:
    data = json.load(f)

dow_30 = data["DOW_30"]
cryptos = data["CRYPTO_7"]

TRAIN_START_DATE = '2017-01-01'
TRAIN_END_DATE = '2022-01-01'
TEST_START_DATE = '2022-01-01'
TEST_END_DATE = '2023-01-01'



df = YahooDownloader(start_date = pd.to_datetime(TRAIN_START_DATE) - datetime.timedelta(days=30),
                     end_date = TEST_END_DATE,
                     ticker_list = dow_30).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (45900, 8)


In [None]:
INDICATORS = ['macd',
              'rsi_30',
              'cci_30']

from finHRL.preprocess.preprocessor import FeatureEngineer
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=False,
                     user_defined_feature = False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf,0)

processed = processed[processed.date >= TRAIN_START_DATE].reset_index(drop=True)

Successfully added technical indicators


In [None]:
stock_dimension = len(processed.tic.unique())
print(stock_dimension)

30


In [None]:
df_train = processed[processed.date < TEST_START_DATE]
df_test = processed[processed.date >= TEST_START_DATE]


df_train["dayorder"] = df_train["date"].astype("category").cat.codes
df_test["dayorder"] = df_test["date"].astype("category").cat.codes

# Base RL

## Hyperparameter optimization

In [None]:
from dev.hyperparameter_searching.base_RL_hs import hyperparams_opt_RL

hs_opt = hyperparams_opt_RL(
    df_train=df_train,
    df_test=df_test,
    indicators=INDICATORS,
    n_episodes_train=10,
    n_trials=80
)

hs_opt.run_opt()

## Training

In [None]:
# TRAINING with BEST HPs
from finHRL.env_stocktrading.trading_env_RL import StockTradingEnv
from finHRL.agent.models import baseRLAgent

# state_space_noHRL = [balance, close prices_i, stock_shares_i, MACD_i, rsi30_i, cci30_i, turbulences_i]
episode_len = df_train.dayorder.nunique()
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension 

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

tr_env = StockTradingEnv(
    df = df_train,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=True,
    print_verbosity=2
)

agent = baseRLAgent(env=tr_env)

n_episodes = 50

best_hiperparams = {'gamma': 0.9460266042874034,
                    'max_grad_norm': 0.5723516016378004,
                    'n_steps': 8,
                    'learning_rate': 4.3336870145809356e-05,
                    'ent_coef': 7.323986049778401e-08}

model = agent.get_model("a2c",
                        learning_rate = best_hiperparams['learning_rate'],
                        gamma = best_hiperparams['gamma'],
                        max_grad_norm = best_hiperparams['max_grad_norm'],
                        n_steps = best_hiperparams['n_steps'],
                        ent_coef = best_hiperparams['ent_coef'],
                        verbose=1)


trained_model = agent.train_model(
    model,
    tb_log_name="a2c_best_hp",
    total_timesteps= n_episodes*episode_len
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| time/                 |          |
|    fps                | 76       |
|    iterations         | 100      |
|    time_elapsed       | 10       |
|    total_timesteps    | 800      |
| train/                |          |
|    entropy_loss       | -42.6    |
|    explained_variance | -10.9    |
|    learning_rate      | 4.33e-05 |
|    n_updates          | 99       |
|    policy_loss        | 2.41     |
|    std                | 1        |
|    value_loss         | 0.0163   |
------------------------------------
day: 1258, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 1873833.71
total_reward: 873833.71
total_cost: 234241.71
total_trades: 35094
Sharpe: 0.737
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 1.26e+03 |
|    ep_rew_mean        | 0.628    |
| time/                 |          |
|    f

KeyboardInterrupt: 

In [8]:
# TESTING
test_env = StockTradingEnv(
    df = df_test,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=False,
    print_verbosity=1
)

df_account_value_a2c, df_actions_a2c = baseRLAgent.predict_RL(
    model=trained_model, 
    environment = test_env)

Starting prediction...
day: 250, episode: 2
begin_total_asset: 1000000.00
end_total_asset: 1037854.17
total_reward: 37854.17
total_cost: 1383.70
total_trades: 3489
Sharpe: 0.294
hit end!


# HRL

In [None]:
from finHRL.env_stocktrading.trading_env_HRL import StockTradingEnvHRL

# state_space_manager = [close prices_i, MACD_i, rsi30_i, cci30_i] Quizas quitar algún indicador

# state_space_worker = [balance, close_prices_i, stock_shares_i, manager_actions_i]                 # QUizás añadir agún indicador de riesgo a estudiar y hacer pruebas



# state_space_noHRL = [balance, close prices_i, stock_shares_i, MACD_i, rsi30_i, cci30_i]


# action_space_manager = {-1, 0, 1} * 30
# action_space_manager = {0,1} * 30 (para calcular cuántas, multiplicar por hmax)

episode_len = processed.dayorder.nunique()
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension

state_space_manager = (len(INDICATORS) + 1)*stock_dimension
state_space_worker = (1 + 3*stock_dimension)



buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension


tr_env = StockTradingEnvHRL(
    df = processed,
    stock_dim=stock_dimension,
    hmax= 100,
    initial_amount=1000000,
    num_stock_shares=num_stock_shares,
    buy_cost_pct=buy_cost_list,
    sell_cost_pct=sell_cost_list,
    state_space= state_space,
    action_space= stock_dimension,
    tech_indicator_list=INDICATORS,
    make_plots=True,
    print_verbosity=1
)

# Pendiente

- LR está en 0.0003: Probar a bajar a 5e-5 o 1e-5
- Incluir train -> Test etc