#### Importing libraries

In [None]:
!pip install git+https://github.com/asa-eve/Trading_Bot_RL.git

In [202]:
import sys
import os

# Path to directory above Trading_Bot_RL on 1 level
sys.path.insert(0, f'{os.path.dirname(os.getcwd())}')

from trading_bot_rl.agent import *
from trading_bot_rl.env import *

from trading_bot_rl.functions.general import *
from trading_bot_rl.functions.callbacks import *
from trading_bot_rl.functions.env_functions import *
from trading_bot_rl.functions.data_preprocessing import *

from stable_baselines3.common.callbacks import BaseCallback

def env_kwargs_reinit():
    return {
    "hmax": kwarg_hmax,
    "initial_amount": kwarg_initial_amount,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list, # buy_cost_list[0],
    "sell_cost_pct": sell_cost_list, #sell_cost_list[0],
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": kwarg_reward_scaling,
    "make_plots": MAKE_PLOTS,
    "print_verbosity": VERBOSITY_PRINT,
    "discrete_action_space": discrete_action_space,
}

def callback(chosen_callback):
    if chosen_callback == 'tensorboard':
        return TensorboardCallback
    elif chosen_callback == 'eval':
        return eval_callback
    elif chosen_callback == 'checkpoint':
        return checkpoint_callback
    
def print_training_info():
    print('---')
    print('    Training with next initial parameters:')
    print('')
    print(f"          Mode - {'normal'*(1-only_forecasts_data) + (' |' * ((df_name_forecasts != None) and (only_forecasts_data != True))) + ' forecasted'*(df_name_forecasts!=None)}")
    print('')
    print(f'       Number of Stocks to trade  - {kwarg_hmax}')
    print(f'       Initial money amount       - {kwarg_initial_amount}')
    print(f'       Commision                  - {kwarg_buy_sell_cost}')
    print(f'       Env reward scaling         - {kwarg_reward_scaling}')
    print(f"       discrete action space      - {('No' * (1 - discrete_action_space)) + ('Yes' * (discrete_action_space))}")
    print('')
    print(f"   Training loop from {start_training_episode} to {end_training_episode - 1} (step = {step_training_episodes})")
    print('')
    print(f"       RL model                   - {model_name}")
    print(f"       Fixed seed                 - {('No' * (1 - fixed_seed)) + ('Yes' * (fixed_seed))}")
    print(f"       Device                     - {algorithm_parameters['device']}")
    print('')
    print(f"   Data processing info")
    print('')
    print(f"       Valid + Trade              - {int(test_and_valid_pct * 100)} %") if valid_split else print(f"       Trade                      - {int(test_and_valid_pct * 100)} %")
    print(f"       Tech Indicators usage      - {('No' * (1 - dict_args['tech_indicators_usage'])) + ('Yes' * (dict_args['tech_indicators_usage']))}")
    print('')
    print('---')
    for i in range(3): print('')

#### Parameters

In [187]:
trained_models = {}                       # dictionary for 'saving' models
last_model_trained = None                 # 'for testing' - takes last trained model as one for test

# RL parameters -----------------
discrete_action_space = False             # For discrete 'action_space' in env [21 ~ 0.1 step, 11 ~ 0.2 step, etc..]
if discrete_action_space: 
    discrete_actions = 11   

VERBOSITY_PRINT = 1                       # in 'episodes' 
#VERBOSE_INFO_TRAINING = False             # verbosity for 'stable baselines training'
#VERBOSITY_PRINT = 1000*(1-VERBOSE_INFO_TRAINING) + 1*VERBOSE_INFO_TRAINING
MAKE_PLOTS = False

chosen_callback = 'tensorboard'           # 'tensorboard', 'eval', 'checkpoint', None
if chosen_callback == 'tensorboard': callback_arg = BaseCallback
if chosen_callback == 'eval': callback_arg = None
if chosen_callback == 'checkpoint': callback_arg = None

fixed_seed = True                        # only for replicating results or hyperparameters tuning
if fixed_seed: 
    seed_values = [1]

    
model_name = 'a2c'
algorithm_parameters = {"learning_rate": 1e-4,
                        "device": 'cuda',
                         }


# Threshold parameters -----------------
#quantile = None
#turbulence_threshold = quantile      # turbulence_threshold_define() to get turbulence
#risk_indicator_col = None            # 'vix' column

# Env parameters -----------------
kwarg_hmax = 100
kwarg_initial_amount = 1000000
kwarg_reward_scaling = 1e-4              
kwarg_buy_sell_cost = 0.001

# Iterative Training parameters ------------
times_loop_training = 1
start_training_episode = 50                                              # start == episode until which to train 'first time'
step_training_episodes = start_training_episode                          # step == number of episodes to train afterwards
end_training_episode = start_training_episode * times_loop_training + 1

In [204]:
# All features (except 'date' are chosen to by RL feature ---> want to remove something ---> drop with pandas)
# Must have column names ('date', 'open', 'close', 'volume', 'high', 'low') ---> use pandas rename your df
# -----------------------

df_names = ['^GSPC_ta_my_features'] # ['all_ta_features', 'filtered_features', '^GSPC_ta_my_features']
df_name_forecasts = None            # '_with_forecasts_LSTM_1_120'
only_forecasts_data = False
unwanted_features = ['date', 'tic']

path_to_datasets = (os.path.dirname(os.getcwd())+'\\datasets\\').replace("\\","/")
path_to_models = (os.path.dirname(os.getcwd())+'\\trained_models\\').replace("\\","/")

test_and_valid_pct = 0.15
valid_split = False
BOOL_TO_INT = True

dict_args={
            "test_and_valid_pct": test_and_valid_pct,
            "tic_name": 'SPY',
            "valid_split": valid_split,
            "BOOL_TO_INT": BOOL_TO_INT,
            "tech_indicators_usage": False,
            "use_vix": False,
            "use_turbulence": False,
            "user_defined_feature": False,
}

print_training_info()

---
    Training with next initial parameters:

          Mode - normal

       Number of Stocks to trade  - 100
       Initial money amount       - 1000000
       Commision                  - 0.001
       Env reward scaling         - 0.0001
       discrete action space      - No

   Training loop from 50 to 50 (step = 50)

       RL model                   - a2c
       Fixed seed                 - Yes
       Device                     - cuda

   Data processing info

       Trade                      - 15 %
       Tech Indicators usage      - No

---





#### Iterative Training Process
- works with - 'data' & (data, data_with_forecasts)
- iterated through all df's (in df_names)
    - iterated through all seeds (in seed_values)

In [193]:
# For any 'data' in list
for df_name in df_names:

# Data unpackage + preprocessing
    df_main_file = path_to_datasets+f"{df_name}.csv"
    df_forecasts_file = path_to_datasets+f"{df_name+df_name_forecasts}.csv" if (df_name_forecasts != None) else (None)
    train_main, valid_main, trade_main, train_forecasts, valid_forecasts, trade_forecasts = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)

# Defining features (of state_space)
    INDICATORS_MAIN = train_main.columns.tolist()
    for feature in unwanted_features: INDICATORS_MAIN.remove(feature)
    if df_forecasts_file != None:
        INDICATORS_FORECASTS = train_forecasts.columns.tolist()
        for feature in unwanted_features:INDICATORS_FORECASTS.remove(feature)

# RL Env parameters defining
    stock_dimension = len(train_main.tic.unique())
    state_space_main = 1 + 2*stock_dimension + len(INDICATORS_MAIN)*stock_dimension
    state_space_forecasts = 1 + 2*stock_dimension + len(INDICATORS_FORECASTS)*stock_dimension if (df_forecasts_file != None) else (None)
    buy_cost_list = sell_cost_list = [kwarg_buy_sell_cost] * stock_dimension
    num_stock_shares = [0] * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space_main}, State Space Forecasts: {state_space_forecasts}")

    sharpe_forecastFalse = []
    sharpe_forecastTrue = []

# Main loop (for 'seeds' -> iteratively train 'data' (and if there 'data_with_forecasts') -> save each model and calculate sharpe ->  )
    for seed_value in (seed_values*fixed_seed + (1 - fixed_seed) * [None]):
        if fixed_seed: set_seed(seed_value)

        # Training only 'data' or with 'data_with_forecasts'
        is_forecast_list = [False, True] if (df_name_forecasts != None and (not only_forecasts_data)) \
                                                                        else ([False] if (not only_forecasts_data) else ([True] if (only_forecasts_data and df_name_forecasts != None) else []))

        for is_forecast in is_forecast_list:

            state_space = state_space_forecasts if is_forecast else state_space_main
            INDICATORS = INDICATORS_FORECASTS if is_forecast else INDICATORS_MAIN
            train = train_forecasts if is_forecast else train_main
            trade = trade_forecasts if is_forecast else trade_main
            if valid_split: valid = valid_forecasts if is_forecast else valid_main

            # Define 'env' parapeters
            env_kwargs = env_kwargs_reinit()

            # Counter for training (if k=0 -> no model, initialize) ELSE (model = trained)
            k=0

            # Iterative training loop
            for i in range(start_training_episode,end_training_episode,step_training_episodes):
                NUM_EPISODES = i 
                EPISODE_LENGTH = len(train)
                NUM_TRAINING_STEPS_FOR_1_TRIAL = NUM_EPISODES * EPISODE_LENGTH 

                name_of_the_save_file = f"{path_to_models}{model_name}_techindicators{dict_args['tech_indicators_usage']}_vix{dict_args['use_vix']}_turbulence{dict_args['use_turbulence']}\\{df_name}_{model_name}_lr{algorithm_parameters['learning_rate']}_Forecast{is_forecast}_Seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}_Episodes{NUM_EPISODES}_Sharpe{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}"
                name_of_the_save_file_zip = name_of_the_save_file + ".zip"

                # MODEL TRAINING
                if not os.path.exists(name_of_the_save_file_zip):

                    # MODEL TRAINING
                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value) if (k == 0) else trained
                    trained = agent.train_model(model=model, 
                                                    tb_log_name=model_name,
                                                    total_timesteps=NUM_TRAINING_STEPS_FOR_1_TRIAL,
                                                    callback=callback(chosen_callback)(callback_arg))

                    # CALCULATING sharpe on 'valid / test'
                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)

                    # MODEL SAVING
                    trained.save(name_of_the_save_file_zip)

                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')
                    k+=1

                # MODEL LOADING + sharpe calculating
                else:
                    print('Model already exists')
                    k+=1

                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)
                    trained = model.load(name_of_the_save_file, env = env_train)

                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)

                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')


        print('------')
        print(df_name)
        print('Seed average no forecast = ', np.mean(sharpe_forecastFalse))
        print('Seed average with forecast = ', np.mean(sharpe_forecastTrue))
        print('------')
        print('No forecast = ', sharpe_forecastFalse)
        print('With forecast =', sharpe_forecastTrue)
        print('------')

Successfully added technical indicators
train_forecasts  1993-01-29   2018-07-18
trade_forecasts  2018-07-19   2023-01-13
Stock Dimension: 1, State Space: 134, State Space Forecasts: None
{'learning_rate': 0.0001, 'device': 'cuda'}




day: 6413, episode: 3
begin_total_asset: 1000000.00
end_total_asset: 3174614.43
total_reward: 2174614.43
total_cost: 475904.93
total_trades: 6356
Sharpe: 0.415


KeyboardInterrupt: 