#### Importing libraries

In [None]:
!pip install git+https://github.com/asa-eve/Trading_Bot_RL.git

In [1]:
import sys
import os

# Path to directory above Trading_Bot_RL on 1 level
sys.path.insert(0, f'{os.path.dirname(os.getcwd())}')

from trading_bot_rl.agent import *
from trading_bot_rl.env import *

from trading_bot_rl.functions.general import *
from trading_bot_rl.functions.callbacks import *
from trading_bot_rl.functions.env_functions import *
from trading_bot_rl.functions.data_preprocessing import *

from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CheckpointCallback

def env_kwargs_reinit():
    return {
    "hmax": kwarg_hmax,
    "initial_amount": kwarg_initial_amount,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list, # buy_cost_list[0],
    "sell_cost_pct": sell_cost_list, #sell_cost_list[0],
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": kwarg_reward_scaling,
    "make_plots": MAKE_PLOTS,
    "print_verbosity": VERBOSITY_PRINT,
    "discrete_action_space": discrete_action_space,
    "seed": seed_value
}
    
def print_training_info():
    print('---')
    print('    Training with next initial parameters:')
    print('')
    print(f"          Mode - {'normal'*(1-only_forecasts_data) + (' |' * ((df_name_forecasts != None) and (only_forecasts_data != True))) + ' forecasted'*(df_name_forecasts!=None)}")
    print('')
    print(f'       Number of Stocks to trade  - {kwarg_hmax}')
    print(f'       Initial money amount       - {kwarg_initial_amount}')
    print(f'       Commision                  - {kwarg_buy_sell_cost}')
    print(f'       Env reward scaling         - {kwarg_reward_scaling}')
    print(f"       discrete action space      - {('No' * (1 - discrete_action_space)) + ('Yes' * (discrete_action_space))}")
    print('')
    print(f"   Training loop from {start_training_episode} to {end_training_episode - 1} (step = {step_training_episodes})")
    print('')
    print(f"       RL model                   - {model_name}")
    print(f"       Fixed seed                 - {('No' * (1 - fixed_seed)) + ('Yes' * (fixed_seed))}")
    print(f"       Device                     - {algorithm_parameters['device']}")
    print('')
    print(f"   Data processing info")
    print('')
    print(f"       Valid + Trade              - {int(test_and_valid_pct * 100)} %") if valid_split else print(f"       Trade                      - {int(test_and_valid_pct * 100)} %")
    print(f"       Tech Indicators usage      - {('No' * (1 - dict_args['tech_indicators_usage'])) + ('Yes' * (dict_args['tech_indicators_usage']))}")
    print(f"       'date' cyclic encoding     - {('No' * (1 - encode_normalize_data)) + ('Yes' * (encode_normalize_data))}")
    print(f"       Normalization              - {('No' * (1 - (dict_args['scaler'] == None))) + ('Yes' * (dict_args['scaler'] != None))}")
    print('')
    print('---')
    for i in range(3): print('')
    
# --------------------    
# Output functions in case needed
#with open(path_f + 'monkey.txt', 'w') as file:
    # Redirect the standard output to the file
#    sys.stdout = file

    # Print some output to the console
#    print('This is some output')
# Reset the standard output to the console
#sys.stdout = sys.__stdout__

  from .autonotebook import tqdm as notebook_tqdm


#### Parameters

In [2]:
# Threshold parameters -----------------
#quantile = None
#turbulence_threshold = quantile      # turbulence_threshold_define() to get turbulence
#risk_indicator_col = None            # 'vix' column

# Env parameters -----------------
kwarg_hmax = 100
kwarg_initial_amount = 1000000
kwarg_reward_scaling = 1e-3         # 3*1e-5      
kwarg_buy_sell_cost = 0.001

# RL parameters -----------------
VERBOSITY_PRINT = 1                       # in 'episodes' 
MAKE_PLOTS = False
#VERBOSE_INFO_TRAINING = False             # verbosity for 'stable baselines training'

discrete_action_space = False             # For discrete 'action_space' in env [21 ~ 0.1 step, 11 ~ 0.2 step, etc..]
if discrete_action_space: 
    discrete_actions = 11   

# ______________
chosen_callback = 'tensorboard'           # 'tensorboard', 'eval', 'checkpoint', None
if chosen_callback == 'eval':             # evaluate model perfomance every 'eval_freq' steps (on valid/test data)
    eval_freq_ep = 5
    n_eval_episodes = 1
    
# ______________
fixed_seed = True                        # only for replicating results or hyperparameters tuning
if fixed_seed: 
    seed_values = [1]

# ______________
# {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO, "trpo": TRPO, "lstm_ppo": RecurrentPPO}
model_name = 'lstm_ppo'
algorithm_parameters = {"learning_rate": 1e-3,
                        "device": 'cuda',
                        "gamma": 0.9,
                        "clip_range_vf": 0.5,
                        "clip_range": 0.1,
                        "batch_size": 256,
                        "n_steps": 256,
                        "n_epochs": 35,
                         }

# Iterative Training parameters ------------
times_loop_training = 40
start_training_episode = 50                                              # start == episode until which to train 'first time'
step_training_episodes = start_training_episode                          # step == number of episodes to train afterwards
end_training_episode = start_training_episode * times_loop_training + 1

In [3]:
df_names = ['test_filtered_features_features58'] # ['all_ta_features', 'filtered_features', '^GSPC_ta_my_features']
df_name_forecasts = '_with_forecasts_LSTM_1_120'            # '_with_forecasts_LSTM_1_120'

only_forecasts_data = True
unwanted_features = ['date', 'tic']    # unwanted in INDICATORS as features

test_and_valid_pct = 0.15
valid_split = False
BOOL_TO_INT = True

dict_args={
            "test_and_valid_pct": test_and_valid_pct,
            "tic_name": 'SPY',
            "valid_split": valid_split,
            "BOOL_TO_INT": BOOL_TO_INT,
            "tech_indicators_usage": False,
            "use_vix": False,
            "use_turbulence": False,
            "user_defined_feature": False,
}

encode_normalize_data = True
if encode_normalize_data: dict_args["scaler"] = None # 'MinMax', 'Standard', 'Robust', None

path_to_datasets = (os.path.dirname(os.getcwd())+'\\datasets\\').replace("\\","/")
path_to_models = (os.path.dirname(os.getcwd())+'\\trained_models\\').replace("\\","/")

print_training_info()

---
    Training with next initial parameters:

          Mode -  forecasted

       Number of Stocks to trade  - 100
       Initial money amount       - 1000000
       Commision                  - 0.001
       Env reward scaling         - 0.001
       discrete action space      - No

   Training loop from 50 to 2000 (step = 50)

       RL model                   - lstm_ppo
       Fixed seed                 - Yes
       Device                     - cuda

   Data processing info

       Trade                      - 15 %
       Tech Indicators usage      - No
       'date' cyclic encoding     - Yes
       Normalization              - 

---





#### Iterative Training Process
- works with - 'data' & (data, data_with_forecasts)
- iterated through all df's (in df_names)
    - iterated through all seeds (in seed_values)

In [None]:
# For any 'data' in list
for df_name in df_names:

# Data unpackage + preprocessing
    df_main_file = path_to_datasets+f"{df_name}.csv"
    df_forecasts_file = path_to_datasets+f"{df_name+df_name_forecasts}.csv" if (df_name_forecasts != None) else (None)
    if encode_normalize_data:
        train_main, valid_main, trade_main, train_forecasts, valid_forecasts, trade_forecasts = data_read_preprocessing_singleTIC_normalized_encoded(df_main_file, df_forecasts_file, **dict_args)
    else:
        train_main, valid_main, trade_main, train_forecasts, valid_forecasts, trade_forecasts = data_read_preprocessing_singleTIC(df_main_file, df_forecasts_file, **dict_args)

# Defining features (of state_space)
    INDICATORS_MAIN = train_main.columns.tolist()
    for feature in unwanted_features: 
        if feature in INDICATORS_MAIN: INDICATORS_MAIN.remove(feature)
    if df_forecasts_file != None:
        INDICATORS_FORECASTS = train_forecasts.columns.tolist()
        for feature in unwanted_features: 
            if feature in INDICATORS_FORECASTS: INDICATORS_FORECASTS.remove(feature)

# RL Env parameters defining
    stock_dimension = len(train_main.tic.unique())
    state_space_main = 1 + 2*stock_dimension + len(INDICATORS_MAIN)*stock_dimension
    state_space_forecasts = 1 + 2*stock_dimension + len(INDICATORS_FORECASTS)*stock_dimension if (df_forecasts_file != None) else (None)
    buy_cost_list = sell_cost_list = [kwarg_buy_sell_cost] * stock_dimension
    num_stock_shares = [0] * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space_main}, State Space Forecasts: {state_space_forecasts}")

    sharpe_forecastFalse = []
    sharpe_forecastTrue = []

# Main loop (for 'seeds' -> iteratively train 'data' (and if there 'data_with_forecasts') -> save each model and calculate sharpe ->  )
    for seed_value in (seed_values*fixed_seed + (1 - fixed_seed) * [None]):
        if fixed_seed: set_seed(seed_value)

        # Training only 'data' or with 'data_with_forecasts'
        is_forecast_list = [False, True] if (df_name_forecasts != None and (not only_forecasts_data)) \
                                                                        else ([False] if (not only_forecasts_data) else ([True] if (only_forecasts_data and df_name_forecasts != None) else []))

        for is_forecast in is_forecast_list:

            state_space = state_space_forecasts if is_forecast else state_space_main
            INDICATORS = INDICATORS_FORECASTS if is_forecast else INDICATORS_MAIN
            train = train_forecasts if is_forecast else train_main
            trade = trade_forecasts if is_forecast else trade_main
            if valid_split: valid = valid_forecasts if is_forecast else valid_main

            # Define 'env' parapeters 
            env_kwargs = env_kwargs_reinit()

            # Counter for training (if k=0 -> no model, initialize) ELSE (model = trained)
            k=0

            # Iterative training loop
            for i in range(start_training_episode,end_training_episode,step_training_episodes):
                for g in range(2):print('------')
                print(f'LOOP NUMBER {int(i/step_training_episodes)} --- {i-start_training_episode} EPISODES TRAINED')
                for g in range(2):print('------')
                NUM_EPISODES = step_training_episodes 
                EPISODE_LENGTH = len(train)
                NUM_TRAINING_STEPS_FOR_1_TRIAL = NUM_EPISODES * EPISODE_LENGTH 

                model_folder_name = f"{df_name}__{str(df_name_forecasts)[1:]*(is_forecast) + 'without_forecast'*(1-is_forecast)}__{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}__NormEncd{encode_normalize_data}"
                model_file_name = f"{model_name.upper()}__iterationEp{step_training_episodes}__trainedEp{i}__lr{algorithm_parameters['learning_rate']}__seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}"
                #name_of_the_save_file = f"{path_to_models}{model_name.upper()}_NormEnc{encode_normalize_data}_techindicators{dict_args['tech_indicators_usage']}_vix{dict_args['use_vix']}_turbulence{dict_args['use_turbulence']}\\{df_name}_{model_name}_lr{algorithm_parameters['learning_rate']}_Forecast{is_forecast}_Seed{str(seed_value)*fixed_seed + str(None)*(1-fixed_seed)}_Episodes{i}_Step{step_training_episodes}_Sharpe{valid_split*'Valid' + (1 - valid_split)*'Test'}{int(test_and_valid_pct*100)}"
                name_of_the_save_file = path_to_models + model_folder_name + f"\\{model_file_name}"
                name_of_the_save_file_zip = name_of_the_save_file + ".zip"

                # MODEL TRAINING
                if not os.path.exists(name_of_the_save_file_zip):
                    
                    # Initialize env (correcting number of episodes passed)
                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    e_train_gym.episode = i-start_training_episode + 1
                    
                    # Callback
                    if chosen_callback == 'eval':
                        eval_freq = EPISODE_LENGTH * eval_freq_ep
                        callback_arg = EvalCallback(eval_env=e_train_gym, eval_freq=eval_freq, n_eval_episodes=n_eval_episodes, best_model_save_path=name_of_the_save_file+'\\eval_callback', log_path=name_of_the_save_file+'\\eval_callback', deterministic=True)
                    elif chosen_callback == 'tensorboard':
                        callback_arg = TensorboardCallback(BaseCallback)
                    
                    # MODEL TRAINING
                    #e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value) if (k == 0) else model.load(prev_name, env = env_train)
                    trained = agent.train_model(model=model, 
                                                    tb_log_name=model_name,
                                                    total_timesteps=NUM_TRAINING_STEPS_FOR_1_TRIAL,
                                                    callback=callback_arg)

                    # CALCULATING sharpe on 'valid / test'
                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)

                    # MODEL SAVING
                    trained.save(name_of_the_save_file_zip)

                    prev_name = name_of_the_save_file_zip
                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')
                    k+=1

                # MODEL LOADING + sharpe calculating
                else:
                    print('Model already exists')
                    k+=1

                    e_train_gym, env_train = env_reinit(train, env_kwargs_reinit())
                    agent = DRLAgent(env = env_train)
                    model = agent.get_model(model_name=model_name, model_kwargs = algorithm_parameters, verbose=0, seed=seed_value)
                    trained = model.load(name_of_the_save_file, env = env_train)

                    env_trade_gym, env_trade = env_reinit(valid, env_kwargs_reinit()) if valid_split else env_reinit(trade, env_kwargs_reinit())
                    df_account_value, df_actions = DRLAgent.prediction(model=trained, environment = env_trade_gym)
                    sharpe = calculate_sharpe(df_account_value)
                    sharpe_forecastTrue.append(sharpe) if is_forecast else sharpe_forecastFalse.append(sharpe)
                    
                    prev_name = name_of_the_save_file_zip
                    print('Forecasts = ', is_forecast, '| Seed = ', seed_value, '| Num episodes = ', i, f'| Sharpe {valid_split*"Valid" + (1 - valid_split)*"Test"} = ', sharpe)
                    print('---')


        print('------')
        print(df_name)
        print('Seed average no forecast = ', np.mean(sharpe_forecastFalse))
        print('Seed average with forecast = ', np.mean(sharpe_forecastTrue))
        print('------')
        print('No forecast = ', sharpe_forecastFalse)
        print('With forecast =', sharpe_forecastTrue)
        print('------')

train  1999-03-19   2019-06-06
trade  2019-06-07   2022-12-27
train  1999-03-19   2019-06-06
trade  2019-06-07   2022-12-27
Stock Dimension: 1, State Space: 62, State Space Forecasts: 64
------
------
LOOP NUMBER 1 --- 0 EPISODES TRAINED
------
------
{'learning_rate': 0.001, 'device': 'cuda', 'gamma': 0.9, 'clip_range_vf': 0.5, 'clip_range': 0.1, 'batch_size': 256, 'n_steps': 256, 'n_epochs': 35}




day: 5086, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 1915756.12
total_reward: 915756.12
total_cost: 381437.92
total_trades: 4436
total_agent_reward: 915.7561205233974
Sharpe: 0.296
day: 5086, episode: 2
begin_total_asset: 1000000.00
end_total_asset: 1202475.66
total_reward: 202475.66
total_cost: 291614.19
total_trades: 3796
total_agent_reward: 202.4756559402029
Sharpe: 0.138
day: 5086, episode: 3
begin_total_asset: 1000000.00
end_total_asset: 1059123.68
total_reward: 59123.68
total_cost: 344177.56
total_trades: 4024
total_agent_reward: 59.12367646211777
Sharpe: 0.095
day: 5086, episode: 4
begin_total_asset: 1000000.00
end_total_asset: 991406.43
total_reward: -8593.57
total_cost: 311523.16
total_trades: 3757
total_agent_reward: -8.59357000169291
Sharpe: 0.054
day: 5086, episode: 5
begin_total_asset: 1000000.00
end_total_asset: 1311545.37
total_reward: 311545.37
total_cost: 315495.83
total_trades: 3516
total_agent_reward: 311.54536939351067
Sharpe: 0.171
day: 5086, episod



day: 896, episode: 1
begin_total_asset: 1000000.00
end_total_asset: 1326331.40
total_reward: 326331.40
total_cost: 86846.76
total_trades: 428
total_agent_reward: 326.3313972890664
Sharpe: 0.496
hit end!
Forecasts =  True | Seed =  1 | Num episodes =  50 | Sharpe Test =  0.4961019110139536
---
------
------
LOOP NUMBER 2 --- 50 EPISODES TRAINED
------
------




day: 5086, episode: 51
begin_total_asset: 1000000.00
end_total_asset: 1729994.07
total_reward: 729994.07
total_cost: 287708.10
total_trades: 3456
total_agent_reward: 729.9940681318552
Sharpe: 0.270
day: 5086, episode: 52
begin_total_asset: 1000000.00
end_total_asset: 1794978.36
total_reward: 794978.36
total_cost: 294863.00
total_trades: 3598
total_agent_reward: 794.978356724568
Sharpe: 0.284
day: 5086, episode: 53
begin_total_asset: 1000000.00
end_total_asset: 1858982.73
total_reward: 858982.73
total_cost: 290912.14
total_trades: 3612
total_agent_reward: 858.9827319371497
Sharpe: 0.299
