In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import math

%run utils.py

In [None]:
# Check if GPU is connected
print(tf.config.list_physical_devices('GPU'))
print(tf.config.list_physical_devices('CPU'))

In [None]:
# For importing processed data
# processed_data_path = 'file_path'

In [None]:
basic_factors = ['cshtrd', 'prccd', 'prchd', 'prcld', 'prcod', 'dol_vol', 'Mom_2day', 'Mom_3day', 'Mom_5day',
                 'MA_10day', 'MA_50day', 'open/MA10', 'open/MA50', 'STD_10day', 'H-L', 'RSI', 'MACD', 'MACD_Signal_Line']

TA_factors = [# Momentum indicators
              'momentum_stoch_rsi', 'momentum_stoch', 'momentum_ao', 'momentum_pvo', 'momentum_kama', 'momentum_wr',
              # Volume indicators
              'volume_adi', 'volume_em', 'volume_fi', 'volume_cmf', 'volume_vpt',
              # Volatility indicators
              'volatility_atr', 'volatility_bbh', 'volatility_dcw', 'volatility_ui',
              # Trend indicators
              'trend_adx', 'trend_aroon_up', 'trend_aroon_down', 'trend_ichimoku_a',
              # Other indicators
              'others_dr'
]

# Remove factors that have low variance
basic_factors.remove('dol_vol')

# Remove factors that have high correlation with the prcod (and prcod since it shouldn't affect the return)
basic_factors.remove('prccd')
basic_factors.remove('prcld')
basic_factors.remove('prchd')
basic_factors.remove('prcod')
basic_factors.remove('MA_10day')
TA_factors.remove('trend_ichimoku_a')
TA_factors.remove('volatility_bbh')
TA_factors.remove('momentum_kama')

# Remove factors that have high correlation with momentum_stoch
TA_factors.remove('momentum_wr')

factors = basic_factors + TA_factors
print(f'There are {len(basic_factors)} basic factors')
print(f'There are {len(TA_factors)} TA factors')
print(f'There are {len(factors)} factors')

data = pd.read_csv(processed_data_path)
all_days = list(data['datadate'].unique())
num_of_tokens = data.sector.nunique()
num_to_tic_dict, tic_to_num_dict = num_tic_dicts(data)

data = remove_dead_stocks(data)

data = assign_class_labels(data, 'fixed_thres')
data = data[['datadate', 'tic'] + factors + ['ret_d', 'TBill1y', 'rel_ret_d', 'rank', 'sector']]

data

In [None]:
all_days = list(data['datadate'].unique())
num_of_ts = len(all_days)
batch_size = 4096 # Only for model.predict()
print(f'There are {num_of_ts} days in the dataset')

tickers = list(data.tic.unique())
nt = len(tickers)
print(f'There are {nt} tickers')
assert len(tickers) * num_of_ts == data.shape[0]

seq_length = 20 # Length of time-series
train_length = 200 # Length of training data
ftd = train_length # First train day
ltd = ftd+train_length-1 # Last train day
num_stocks = 10 # Choose the top {num_stocks} each day
num_of_models = 3

num_iters = math.floor((num_of_ts - 2*train_length) / seq_length)
print(f'There are {num_iters} iterations')

In [None]:
model_dict = {}
# Add the path where all models are saved
# model_folder = 'file_path'

return_dict = {}
total_dict = {}
total_asset_dict = {}
position_dict_all = {}
for i in range(num_of_models):
    total_dict[f'model_{i}_top'] = 1
    total_dict[f'model_{i}_bot'] = 1
    total_asset_dict[f'model_{i}_top'] = [1]
    total_asset_dict[f'model_{i}_bot'] = [1]
    position_dict_all[f'model_{i}_top'] = {}
    position_dict_all[f'model_{i}_bot'] = {}
total_dict['ensemble_equal_top'] = 1
total_dict['ensemble_equal_bot'] = 1
total_asset_dict['ensemble_equal_top'] = [1]
total_asset_dict['ensemble_equal_bot'] = [1]
position_dict_all['ensemble_equal_top'] = {}
position_dict_all['ensemble_equal_bot'] = {}
total_dict['ensemble_weighted_top'] = 1
total_dict['ensemble_weighted_bot'] = 1
total_asset_dict['ensemble_weighted_top'] = [1]
total_asset_dict['ensemble_weighted_bot'] = [1]
position_dict_all['ensemble_weighted_top'] = {}
position_dict_all['ensemble_weighted_bot'] = {}

In [None]:
for num_iter in range(2):

    print(f'Iteration {num_iter+1} of {num_iters}')
    if num_iter != 0:
        first_run = False
    else:
        first_run = True

    ftd = train_length + num_iter * seq_length
    ltd = ftd + train_length - 1

    # Get data_train, data_test etc
    data_train, x_train, y_train, data_test, x_test, y_test, ret_d_train, ret_d_test, sector_train, sector_test = prep_train_test_data(data, seq_length, ftd, ltd, all_days)

    # Import/train/retrain (if needed) the models for the test period
    for i in range(num_of_models):

        # Import models
        model_path = model_folder + f"model_{i}_{all_days[ftd]}_{all_days[ltd]}.keras"
        model_dict[i] = keras.models.load_model(model_path)

    total_asset_dict, total_dict, position_dict_all, return_dict = simulate_top_bot(ftd, ltd, total_dict, first_run, num_stocks, total_asset_dict, position_dict_all, return_dict)

# SR

In [None]:
data = pd.read_csv(processed_data_path, usecols=['datadate', 'TBill1y'])
data = data.drop_duplicates(subset=['datadate'])
data

In [None]:
last_simul_day = "2024-12-03"
rfr = list(data[data['datadate'] <= last_simul_day]['TBill1y'])
rfr = rfr[-67*20:]

In [None]:
for key, item in total_asset_dict.items():
    SR = calculate_SR(item, rfr)
    print(f'SR for model {key} is {SR}')

In [None]:
top_keys = []
bot_keys = []
for model in ['model_0_', 'model_1_', 'model_2_', 'ensemble_equal_', 'ensemble_weighted_']:
    top_keys.append(model + 'top')
    bot_keys.append(model + 'bot')
calculate_H_L_SR(total_asset_dict, top_keys, bot_keys)