#### Importing libraries

In [None]:
!pip install git+https://github.com/asa-eve/Trading_Bot_RL.git

In [1]:
import sys
import os

# Path to directory above Trading_Bot_RL on 1 level
sys.path.insert(0, f'{os.path.dirname(os.getcwd())}')

from trading_bot_rl.agent import *
from trading_bot_rl.env import *

from trading_bot_rl.functions.general import *
from trading_bot_rl.functions.callbacks import *
from trading_bot_rl.functions.env_functions import *
from trading_bot_rl.functions.data_preprocessing import *

from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback

def env_kwargs_reinit():
    return {
    "hmax": kwarg_hmax,
    "initial_amount": kwarg_initial_amount,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list, # buy_cost_list[0],
    "sell_cost_pct": sell_cost_list, #sell_cost_list[0],
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": kwarg_reward_scaling,
    "make_plots": MAKE_PLOTS,
    "print_verbosity": VERBOSITY_PRINT,
    "discrete_action_space": discrete_action_space,
    "seed": seed_value
}
    
def print_training_info():
    print('---')
    print('    Training with next initial parameters:')
    print('')
    print(f"          Mode - {'normal'*(1-only_forecasts_data) + (' |' * ((df_name_forecasts != None) and (only_forecasts_data != True))) + ' forecasted'*(df_name_forecasts!=None)}")
    print('')
    print(f'       Number of Stocks to trade  - {kwarg_hmax}')
    print(f'       Initial money amount       - {kwarg_initial_amount}')
    print(f'       Commision                  - {kwarg_buy_sell_cost}')
    print(f'       Env reward scaling         - {kwarg_reward_scaling}')
    print(f"       discrete action space      - {('No' * (1 - discrete_action_space)) + ('Yes' * (discrete_action_space))}")
    print('')
    print(f"   Training loop from {start_training_episode} to {end_training_episode - 1} (step = {step_training_episodes})")
    print('')
    print(f"       RL model                   - {model_name}")
    print(f"       Fixed seed                 - {('No' * (1 - fixed_seed)) + ('Yes' * (fixed_seed))}")
    print(f"       Device                     - {algorithm_parameters['device']}")
    print('')
    print(f"   Data processing info")
    print('')
    print(f"       Valid + Trade              - {int(test_and_valid_pct * 100)} %") if valid_split else print(f"       Trade                      - {int(test_and_valid_pct * 100)} %")
    print(f"       Tech Indicators usage      - {('No' * (1 - dict_args['tech_indicators_usage'])) + ('Yes' * (dict_args['tech_indicators_usage']))}")
    print('')
    print('---')
    for i in range(3): print('')

  from .autonotebook import tqdm as notebook_tqdm


#### Parameters

In [10]:
trained_models = {}                       # dictionary for 'saving' models
last_model_trained = None                 # 'for testing' - takes last trained model as one for test

# RL parameters -----------------
discrete_action_space = False             # For discrete 'action_space' in env [21 ~ 0.1 step, 11 ~ 0.2 step, etc..]
if discrete_action_space: 
    discrete_actions = 11   

VERBOSITY_PRINT = 1                       # in 'episodes' 
#VERBOSE_INFO_TRAINING = False             # verbosity for 'stable baselines training'
#VERBOSITY_PRINT = 1000*(1-VERBOSE_INFO_TRAINING) + 1*VERBOSE_INFO_TRAINING
MAKE_PLOTS = False

chosen_callback = 'tensorboard'           # 'tensorboard', 'eval', 'checkpoint', None
if chosen_callback == 'eval':
    eval_freq = 5

fixed_seed = True                        # only for replicating results or hyperparameters tuning
if fixed_seed: 
    seed_values = [1]

# {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO, "trpo": TRPO, "lstm_ppo": RecurrentPPO}
model_name = 'lstm_ppo'
algorithm_parameters = {"learning_rate": 1e-4,
                        "device": 'cuda',
                         }


# Threshold parameters -----------------
#quantile = None
#turbulence_threshold = quantile      # turbulence_threshold_define() to get turbulence
#risk_indicator_col = None            # 'vix' column

# Env parameters -----------------
kwarg_hmax = 100
kwarg_initial_amount = 1000000
kwarg_reward_scaling = 1e-4              
kwarg_buy_sell_cost = 0.001

# Iterative Training parameters ------------
times_loop_training = 40
start_training_episode = 100                                              # start == episode until which to train 'first time'
step_training_episodes = start_training_episode                          # step == number of episodes to train afterwards
end_training_episode = start_training_episode * times_loop_training + 1

In [11]:
# All features (except 'date' are chosen to by RL feature ---> want to remove something ---> drop with pandas)
# Must have column names ('date', 'open', 'close', 'volume', 'high', 'low') ---> use pandas rename your df
# -----------------------

df_names = ['^GSPC_ta_my_features'] # ['all_ta_features', 'filtered_features', '^GSPC_ta_my_features']
df_name_forecasts = '_with_forecasts_LSTM_1_120'            # '_with_forecasts_LSTM_1_120'
only_forecasts_data = True
unwanted_features = ['date', 'tic']
encode_normalize_data = True

path_to_datasets = (os.path.dirname(os.getcwd())+'\\datasets\\').replace("\\","/")
path_to_models = (os.path.dirname(os.getcwd())+'\\trained_models\\').replace("\\","/")

test_and_valid_pct = 0.15
valid_split = False
BOOL_TO_INT = True

dict_args={
            "test_and_valid_pct": test_and_valid_pct,
            "tic_name": 'SPY',
            "valid_split": valid_split,
            "BOOL_TO_INT": BOOL_TO_INT,
            "tech_indicators_usage": False,
            "use_vix": False,
            "use_turbulence": False,
            "user_defined_feature": False,
}

print_training_info()

---
    Training with next initial parameters:

          Mode -  forecasted

       Number of Stocks to trade  - 100
       Initial money amount       - 1000000
       Commision                  - 0.001
       Env reward scaling         - 0.0001
       discrete action space      - No

   Training loop from 100 to 4000 (step = 100)

       RL model                   - lstm_ppo
       Fixed seed                 - Yes
       Device                     - cuda

   Data processing info

       Trade                      - 15 %
       Tech Indicators usage      - No

---





#### Iterative Training Process
- works with - 'data' & (data, data_with_forecasts)
- iterated through all df's (in df_names)
    - iterated through all seeds (in seed_values)

In [None]:
# For any 'data' in list
for df_name in df_names:

# Data unpackage + preprocessing
    df_main_file = path_to_datasets+f"{df_name}.csv"
    df_forecasts_file = path_to_datasets+f"{df_name+df_name_forecasts}.csv" if (df_name_forecasts != None) else (None)
    train_main, valid_main, trade_main, train_forecasts, valid_forecasts, trade_forecasts = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)

# Defining features (of state_space)
    INDICATORS_MAIN = train_main.columns.tolist()
    for feature in unwanted_features: 
        if feature in INDICATORS_MAIN: INDICATORS_MAIN.remove(feature)
    if df_forecasts_file != None:
        INDICATORS_FORECASTS = train_forecasts.columns.tolist()
        for feature in unwanted_features: 
            if feature in INDICATORS_FORECASTS: INDICATORS_FORECASTS.remove(feature)

# RL Env parameters defining
    stock_dimension = len(train_main.tic.unique())
    state_space_main = 1 + 2*stock_dimension + len(INDICATORS_MAIN)*stock_dimension
    state_space_forecasts = 1 + 2*stock_dimension + len(INDICATORS_FORECASTS)*stock_dimension if (df_forecasts_file != None) else (None)
    buy_cost_list = sell_cost_list = [kwarg_buy_sell_cost] * stock_dimension
    num_stock_shares = [0] * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space_main}, State Space Forecasts: {state_space_forecasts}")

    sharpe_forecastFalse = []
    sharpe_forecastTrue = []

# Main loop (for 'seeds' -> iteratively train 'data' (and if there 'data_with_forecasts') -> save each model and calculate sharpe ->  )
    for seed_value in (seed_values*fixed_seed + (1 - fixed_seed) * [None]):
        if fixed_seed: set_seed(seed_value)

        # Training only 'data' or with 'data_with_forecasts'
        is_forecast_list = [False, True] if (df_name_forecasts != None and (not only_forecasts_data)) \
                                                                        else ([False] if (not only_forecasts_data) else ([True] if (only_forecasts_data and df_name_forecasts != None) else []))

        for is_forecast in is_forecast_list:

            state_space = state_space_forecasts if is_forecast else state_space_main
            INDICATORS = INDICATORS_FORECASTS if is_forecast else INDICATORS_MAIN
            train = train_forecasts if is_forecast else train_main
            trade = trade_forecasts if is_forecast else trade_main
            if valid_split: valid = valid_forecasts if is_forecast else valid_main

            # Define 'env' parapeters 
            env_kwargs = env_kwargs_reinit()

            # Counter for training (if k=0 -> no model, initialize) ELSE (model = trained)
            k=0

            # Iterative training loop
            for i in range(start_training_episode,end_training_episode,step_training_episodes):
                for g in range(2):print('------')
                print(f'LOOP NUMBER {int(i/step_training_episodes)} --- {i-start_training_episode} EPISODES TRAINED')
                for g in range(2):print('------')
                NUM_EPISODES = step_training_episodes 
                EPISODE_LENGTH = len(train)
                NUM_TRAINING_STEPS_FOR_1_TRIAL = NUM_EPISODES * EPISODE_LENGTH 

                name_of_the_save_file = f"{path_to_models}{model_name}_techindicators{dict_args['tech_indicators_usage']}_vix{dict_args['use_vix']}_turbulence{dict_args['use_turbulence']}\\{df_name}_{model_name}_lr{algorithm_parameters['learning_rate']}_Forecast{is_forecast}_Seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}_Episodes{i}_Step{step_training_episodes}_Sharpe{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}"
                name_of_the_save_file_zip = name_of_the_save_file + ".zip"

                # MODEL TRAINING
                if not os.path.exists(name_of_the_save_file_zip):
                    
                    # Initialize env (correcting number of episodes passed)
                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    e_train_gym.episode = i-start_training_episode + 1
                    
                    # Callback
                    if chosen_callback == 'eval':
                        callback_arg = EvalCallback(eval_env=env_train, eval_freq=eval_freq, best_model_save_path=name_of_the_save_file+'\\eval_callback', log_path=name_of_the_save_file+'\\eval_callback', deterministic=True)
                    elif chosen_callback == 'tensorboard':
                        callback_arg = TensorboardCallback(BaseCallback)
                    
                    # MODEL TRAINING
                    #e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value) if (k == 0) else model.load(prev_name, env = env_train)
                    trained = agent.train_model(model=model, 
                                                    tb_log_name=model_name,
                                                    total_timesteps=NUM_TRAINING_STEPS_FOR_1_TRIAL,
                                                    callback=callback_arg)

                    # CALCULATING sharpe on 'valid / test'
                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)

                    # MODEL SAVING
                    trained.save(name_of_the_save_file_zip)

                    prev_name = name_of_the_save_file_zip
                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')
                    k+=1

                # MODEL LOADING + sharpe calculating
                else:
                    print('Model already exists')
                    k+=1

                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)
                    trained = model.load(name_of_the_save_file, env = env_train)

                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)
                    
                    prev_name = name_of_the_save_file_zip
                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')


        print('------')
        print(df_name)
        print('Seed average no forecast = ', np.mean(sharpe_forecastFalse))
        print('Seed average with forecast = ', np.mean(sharpe_forecastTrue))
        print('------')
        print('No forecast = ', sharpe_forecastFalse)
        print('With forecast =', sharpe_forecastTrue)
        print('------')

train  1999-03-19   2019-06-06
trade  2019-06-07   2022-12-27
train  1999-03-19   2019-06-06
trade  2019-06-07   2022-12-27
Stock Dimension: 1, State Space: 126, State Space Forecasts: 133
------
------
LOOP NUMBER 1 --- 0 EPISODES TRAINED
------
------
{'learning_rate': 0.0001, 'device': 'cuda'}




day: 5086, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 1821066.69
total_reward: 821066.69
total_cost: 413519.02
total_trades: 4590
Sharpe: 0.287
day: 5086, episode: 2
begin_total_asset: 1000000.00
end_total_asset: 724453.10
total_reward: -275546.90
total_cost: 374745.60
total_trades: 4275
Sharpe: -0.028
day: 5086, episode: 3
begin_total_asset: 1000000.00
end_total_asset: 1580334.38
total_reward: 580334.38
total_cost: 349730.58
total_trades: 3985
Sharpe: 0.221
day: 5086, episode: 4
begin_total_asset: 1000000.00
end_total_asset: 1158792.91
total_reward: 158792.91
total_cost: 360179.86
total_trades: 3924
Sharpe: 0.125
day: 5086, episode: 5
begin_total_asset: 1000000.00
end_total_asset: 1725827.62
total_reward: 725827.62
total_cost: 360336.57
total_trades: 4098
Sharpe: 0.251
day: 5086, episode: 6
begin_total_asset: 1000000.00
end_total_asset: 1648165.09
total_reward: 648165.09
total_cost: 231724.89
total_trades: 2946
Sharpe: 0.227
day: 5086, episode: 7
begin_total_asset: 1000