#### Importing libraries

In [6]:
import sys

# Path to directory above Trading_Bot_RL on 1 level
sys.path.insert(0, f'{os.path.dirname(os.getcwd())}')

In [8]:
#pip install 'shimmy>=0.2.1
#!pip install git+https://github.com/optuna/optuna.git
#!pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib

from trading_bot_rl.agent import *
from trading_bot_rl.env import *

from trading_bot_rl.functions.general import *
from trading_bot_rl.functions.callbacks import *
from trading_bot_rl.functions.env_functions import *
from trading_bot_rl.functions.data_preprocessing import *

from stable_baselines3.common.callbacks import BaseCallback
import os

def env_kwargs_reinit():
    return {
    "hmax": kwarg_hmax,
    "initial_amount": kwarg_initial_amount,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list, # buy_cost_list[0],
    "sell_cost_pct": sell_cost_list, #sell_cost_list[0],
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": kwarg_reward_scaling,
    "make_plots": MAKE_PLOTS,
    "print_verbosity": VERBOSITY_PRINT,
    "discrete_action_space": discrete_action_space,
}

def callback(chosen_callback):
    if chosen_callback == 'tensorboard':
        return TensorboardCallback
    elif chosen_callback == 'eval':
        return eval_callback
    elif chosen_callback == 'checkpoint':
        return checkpoint_callback

#### Parameters

In [2]:
trained_models = {}                       # dictionary for 'saving' models
last_model_trained = None                 # 'for testing' - takes last trained model as one for test

# RL parameters -----------------
discrete_action_space = False             # For discrete 'action_space' in env [21 ~ 0.1 step, 11 ~ 0.2 step, etc..]
if discrete_action_space: 
    discrete_actions = 11   

VERBOSITY_PRINT = 1                       # in 'episodes' 
#VERBOSE_INFO_TRAINING = False             # verbosity for 'stable baselines training'
#VERBOSITY_PRINT = 1000*(1-VERBOSE_INFO_TRAINING) + 1*VERBOSE_INFO_TRAINING
MAKE_PLOTS = False

chosen_callback = 'tensorboard'           # 'tensorboard', 'eval', 'checkpoint', None
if chosen_callback == 'tensorboard': callback_arg = BaseCallback
if chosen_callback == 'eval': callback_arg = None
if chosen_callback == 'checkpoint': callback_arg = None

fixed_seed = True                        # only for replicating results or hyperparameters tuning
if fixed_seed: 
    seed_values = [1]
    
model_name = 'a2c'
algorithm_parameters = {"learning_rate": 1e-4,
                        "device": 'cuda',
                         }

turbulence_threshold = None
risk_indicator_col = None
quantile = 0.997
# 'vix' column
# turbulence_threshold_define() to get turbulence

# Env parameters -----------------
kwarg_hmax = 100
kwarg_initial_amount = 1000000
kwarg_reward_scaling = 1e-4              
kwarg_buy_sell_cost = 0.001

# Iterative Training parameters ------------
resume_training = False                     # resume training from already existing model  
times = 1
start_training_episode = 50                                 # start == episode until which to train 'first time'
step_training_episodes = start_training_episode             # step == number of episodes to train afterwards
end_training_episode = start_training_episode * times + 1

In [3]:
# All features (except 'date' are chosen to by RL feature ---> want to remove something ---> drop with pandas)
# Must have column names ('date', 'open', 'close', 'volume', 'high', 'low') ---> use pandas rename your df

# Big problem was with 'data_split' ---> it ate last element from 'train', 'valid', 'end'

# -----------------------

df_names = ['^GSPC_ta_my_features'] # ['all_ta_features', 'filtered_features', '^GSPC_ta_my_features']
df_name_forecasts = None # '_with_forecasts_LSTM_1_120'
unwanted_features = ['date', 'tic']

path_to_datasets = os.getcwd()+'\\Trading_bot_RL\\datasets\\'
path_to_models = os.getcwd()+'\\Trading_bot_RL\\trained_models\\'

test_and_valid_pct = 0.15
valid_split = False
BOOL_TO_INT = True

dict_args={
            "test_and_valid_pct": test_and_valid_pct,
            "tic_name": 'SPY',
            "valid_split": valid_split,
            "BOOL_TO_INT": BOOL_TO_INT,
            "tech_indicators_usage": True,
            "use_vix": False,
            "use_turbulence": False,
            "user_defined_feature": False,
}

#### Iterative Training Process

In [None]:
for df_name in df_names:
    
    df_main_file = path_to_datasets+f"{df_name}.csv"
    
    if df_name_forecasts == None:
        #data.append(df_forecasts_file)
        df_forecasts_file = None
        if valid_split:
            train_main, valid_main, trade_main = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)
        else:
            train_main, trade_main = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)
    else:
        df_forecasts_file = path_to_datasets+f"{df_name+df_name_forecasts}.csv"
        if valid_split:
            train_main, valid_main, trade_main, train_forecasts, valid_forecasts, trade_forecasts = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)
        else:
            train_main, trade_main, train_forecasts, trade_forecasts = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)
            
    INDICATORS_MAIN = train_main.columns.tolist()
    for feature in unwanted_features: INDICATORS_MAIN.remove(feature)
    if df_forecasts_file != None:
        INDICATORS_FORECASTS = train_forecasts.columns.tolist()
        for feature in unwanted_features:INDICATORS_FORECASTS.remove(feature)

    stock_dimension = len(train_main.tic.unique())
    state_space_main = 1 + 2*stock_dimension + len(INDICATORS_MAIN)*stock_dimension
    
    if df_forecasts_file != None: state_space_forecasts = 1 + 2*stock_dimension + len(INDICATORS_FORECASTS)*stock_dimension
    else: state_space_forecasts = None
        
    buy_cost_list = sell_cost_list = [kwarg_buy_sell_cost] * stock_dimension
    num_stock_shares = [0] * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space_main}, State Space Forecasts: {state_space_forecasts}")

# ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    sharpe_forecastFalse = []
    sharpe_forecastTrue = []
    

    if not resume_training:
        for seed_value in (seed_values*fixed_seed + (1 - fixed_seed) * [1]):
            if fixed_seed: set_seed(seed_value)
            
            is_forecast_list = [False]
            if df_name_forecasts != None: is_forecast_list.append(True)
            for is_forecast in is_forecast_list:
                
                if is_forecast:
                    state_space = state_space_forecasts
                    INDICATORS = INDICATORS_FORECASTS
                    train = train_forecasts
                    trade = trade_forecasts
                    if valid_split: valid = valid_forecasts
                else:
                    state_space = state_space_main
                    INDICATORS = INDICATORS_MAIN
                    train = train_main
                    trade = trade_main
                    if valid_split: valid = valid_main

                env_kwargs = env_kwargs_reinit()

                for i in range(start_training_episode,end_training_episode,step_training_episodes):
                    NUM_EPISODES = i 
                    EPISODE_LENGTH = len(train)
                    NUM_TRAINING_STEPS_FOR_1_TRIAL = NUM_EPISODES * EPISODE_LENGTH 

# STOPPED HERE ---------
                    
                    name_of_the_save_file = f"{path_to_models}{model_name}_techindicators{dict_args['tech_indicators_usage']}_vix{dict_args['use_vix']}_turbulence{dict_args['use_turbulence']}\\{df_name}_{model_name}_lr{algorithm_parameters['learning_rate']}_Forecast{is_forecast}_Seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}_Episodes{NUM_EPISODES}_Sharpe{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}"
                    name_of_the_save_file_zip = name_of_the_save_file + ".zip"

                    if not os.path.exists(name_of_the_save_file_zip):

                        e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                        agent = DRLAgent(env = env_train)


                        model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)

                        trained = agent.train_model(model=model, 
                                                        tb_log_name=model_name,
                                                        total_timesteps=NUM_TRAINING_STEPS_FOR_1_TRIAL,
                                                        callback=callback(chosen_callback)(callback_arg))

                        if valid_split:
                            if turbulence_threshold_usage:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit())

                        else: 
                            if turbulence_threshold_usage:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit())
                                
                        df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)

                        sharpe = calculate_sharpe(df_account_value)

                        trained.save(name_of_the_save_file_zip)

                        print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                        print('---')

                    else:

                        print('Model already exists')

                        agent = DRLAgent(env = env_train)
                        model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)
                        trained = model.load(name_of_the_save_file)

                        if valid_split:
                            if turbulence_threshold != None:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit())

                        else: 
                            if turbulence_threshold != None:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit())

                        df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                        sharpe = calculate_sharpe(df_account_value)

                        if not is_forecast: sharpe_forecastFalse.append(sharpe)
                        else: sharpe_forecastTrue.append(sharpe)

                        print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                        print('---')
        print('------')
        print(df_name)
        print('Seed average no forecast = ', np.mean(sharpe_forecastFalse))
        print('Seed average with forecast = ', np.mean(sharpe_forecastTrue))
        print('------')
        print('No forecast = ', sharpe_forecastFalse)
        print('With forecast =', sharpe_forecastTrue)
        print('------')
    else:
        print("RESUME TRAINING MODE")
        for seed_value in (seed_values*fixed_seed + (1 - fixed_seed) * [1]):
            if fixed_seed: set_seed(seed_value)
            
            is_forecast_list = [False]
            if is_forecast != None: is_forecast_list.append(True)
            for is_forecast in is_forecast_list:
                
                if is_forecast:
                    state_space = state_space_forecasts
                    INDICATORS = INDICATORS_FORECASTS
                    train = train_forecasts
                    trade = trade_forecasts
                    if valid_split: valid = valid_forecasts
                else:
                    state_space = state_space_main
                    INDICATORS = INDICATORS_MAIN
                    train = train_main
                    trade = trade_main
                    if valid_split: valid = valid_main

                env_kwargs = env_kwargs_reinit()        
                
                k = 0
                for i in range(start_training_episode,end_training_episode,step_training_episodes):
                    NUM_EPISODES = i 
                    EPISODE_LENGTH = len(train)
                    NUM_TRAINING_STEPS_FOR_1_TRIAL = NUM_EPISODES * EPISODE_LENGTH 
                    
# STOPPED HERE ---------
                    
                    name_of_the_save_file = f"{path_to_models}{model_name}_techindicators{dict_args['tech_indicators_usage']}_vix{dict_args['use_vix']}_turbulence{dict_args['use_turbulence']}\\{df_name}_{model_name}_lr{algorithm_parameters['learning_rate']}_Forecast{is_forecast}_Seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}_Episodes{NUM_EPISODES}_Sharpe{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}"
                    name_of_the_save_file_zip = name_of_the_save_file + ".zip"

                    if not os.path.exists(name_of_the_save_file_zip):

                        e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                        agent = DRLAgent(env = env_train)
                        model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)

                        if k > 0: model = trained
                        else: model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)


                        trained = agent.train_model(model=model, 
                                                        tb_log_name=model_name,
                                                        total_timesteps=NUM_TRAINING_STEPS_FOR_1_TRIAL,
                                                        callback=callback(chosen_callback)(callback_arg))

                        if valid_split:
                            if turbulence_threshold != None:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit())

                        else: 
                            if turbulence_threshold != None:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit())

                        df_account_value, df_actions = DRLAgent.prediction(
                        model=trained, 
                        environment = env_trade_gym)

                        sharpe = calculate_sharpe(df_account_value)

                        trained.save(name_of_the_save_file_zip)

                        print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                        print('---')

                        k+=1
                    else:
                        k+=1

                        print('Model already exists')

                        e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                        agent = DRLAgent(env = env_train)
                        model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)
                        trained = model.load(name_of_the_save_file, env = env_train)

                        if valid_split:
                            if turbulence_threshold_usage:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit())

                        else: 
                            if turbulence_threshold_usage:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit(), turbulence_threshold_define(train, quantile), 'vix')
                            else:
                                env_trade_gym, env_trade = env_reinit(trade, env_kwargs_reinit())


                        df_account_value, df_actions = DRLAgent.prediction(
                                                                                model=trained, 
                                                                                environment = env_trade_gym)
                        sharpe = calculate_sharpe(df_account_value)
                        print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                        print('---')