## Importing Libraries

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch.optim import Adam
from torch.nn.functional import mse_loss

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from scipy.stats import norm

import gym
from gym import spaces

from stable_baselines3 import PPO

import talib as ta

In [2]:
import warnings
warnings.filterwarnings("ignore")

## GIVEN functions

In [3]:
class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data


## Data Input

In [4]:
data = pd.read_csv("xnas-itch-20230703.tbbo.csv")

data['transformer'] = 0

pd.set_option('display.max_columns', None)

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()

In [5]:
df_with_indicators

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,transformer,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
0,1688371200660869841,1688371200660704717,1,2,32,T,B,0,194.12,1,130,165124,303634,193.63,194.12,27,27,1,1,AAPL,0,194.12,1,194.12,193.63,194.12,,,,,,,1.0,,,,,,,,,,,,,,194.120000,193.875000
1,1688371201201402566,1688371201201237816,1,2,32,T,B,0,194.11,2,130,164750,304724,193.90,194.11,5,400,1,1,AAPL,0,194.11,2,194.11,193.90,194.12,,,,,,,-1.0,,,,0.22,,,,,,,,,-0.000052,194.115000,193.961667
2,1688371201233688992,1688371201233524761,1,2,32,T,B,0,194.11,8,130,164231,304850,193.90,194.11,5,398,1,1,AAPL,0,194.11,8,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.215000,,,,,,,,0.000000,194.113333,193.993182
3,1688371201317556361,1688371201317392163,1,2,32,T,B,0,194.11,2,130,164198,305101,193.90,194.11,5,390,1,1,AAPL,0,194.11,2,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.212500,,,,,,,,0.000000,194.112500,193.995000
4,1688371201478520666,1688371201478356044,1,2,32,T,B,0,194.00,7,130,164622,306430,193.90,194.00,5,200,1,1,AAPL,0,194.00,7,194.00,193.90,194.11,,,,,,,-8.0,,,,0.21,0.211250,,,,,,,-74.468085,-0.000567,194.090000,193.979250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,1,2,32,T,B,0,192.44,6,130,164895,252532002,192.40,192.44,40,7,1,1,AAPL,0,192.44,6,192.44,192.40,192.40,63.061829,0.005002,0.006368,-0.001366,33.333333,22.222222,913234.0,192.444977,192.4175,192.390023,0.04,0.034997,0.031258,0.028636,0.027592,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211
59267,1688417961020718430,1688417961020553920,1,2,32,T,B,0,192.44,1,130,164510,252532102,192.40,192.44,40,1,1,1,AAPL,0,192.44,1,192.44,192.40,192.44,63.061829,0.006355,0.006365,-0.000011,66.666667,33.333333,913234.0,192.447411,192.4195,192.391589,0.04,0.037498,0.033006,0.029772,0.028212,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211
59268,1688417973297905504,1688417973297741235,1,2,32,T,A,0,192.40,5,130,164269,252532347,192.40,192.46,40,6,1,1,AAPL,0,192.40,5,192.46,192.40,192.44,44.499081,0.004151,0.005922,-0.001771,66.666667,55.555556,913229.0,192.447411,192.4195,192.391589,0.06,0.048749,0.038405,0.032795,0.029802,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211
59269,1688417996889779362,1688417996889614660,1,2,32,T,B,0,192.45,3,130,164702,252532944,192.40,192.45,35,16,1,1,AAPL,0,192.45,3,192.45,192.40,192.40,60.250043,0.006366,0.006011,0.000355,61.111111,64.814815,913232.0,192.451394,192.4220,192.392606,0.05,0.049375,0.040724,0.034515,0.030812,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211


In [6]:
df_with_indicators = df_with_indicators.drop(['rtype', 'publisher_id', 'instrument_id', 'action', 'depth', 'sequence'], axis=1)
df_with_indicators['ts_recv'] = pd.to_datetime(df_with_indicators['ts_recv'], unit='ns')
df_with_indicators['ts_recv_bucket'] = pd.cut(df_with_indicators['ts_recv'].dt.hour, bins=list(range(0,25,2)), right=False, labels=list(range(12)))

df_with_indicators

Unnamed: 0,ts_recv,ts_event,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,transformer,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,ts_recv_bucket
0,2023-07-03 08:00:00.660869841,1688371200660704717,B,194.12,1,130,165124,193.63,194.12,27,27,1,1,AAPL,0,194.12,1,194.12,193.63,194.12,,,,,,,1.0,,,,,,,,,,,,,,194.120000,193.875000,4
1,2023-07-03 08:00:01.201402566,1688371201201237816,B,194.11,2,130,164750,193.90,194.11,5,400,1,1,AAPL,0,194.11,2,194.11,193.90,194.12,,,,,,,-1.0,,,,0.22,,,,,,,,,-0.000052,194.115000,193.961667,4
2,2023-07-03 08:00:01.233688992,1688371201233524761,B,194.11,8,130,164231,193.90,194.11,5,398,1,1,AAPL,0,194.11,8,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.215000,,,,,,,,0.000000,194.113333,193.993182,4
3,2023-07-03 08:00:01.317556361,1688371201317392163,B,194.11,2,130,164198,193.90,194.11,5,390,1,1,AAPL,0,194.11,2,194.11,193.90,194.11,,,,,,,-1.0,,,,0.21,0.212500,,,,,,,,0.000000,194.112500,193.995000,4
4,2023-07-03 08:00:01.478520666,1688371201478356044,B,194.00,7,130,164622,193.90,194.00,5,200,1,1,AAPL,0,194.00,7,194.00,193.90,194.11,,,,,,,-8.0,,,,0.21,0.211250,,,,,,,-74.468085,-0.000567,194.090000,193.979250,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,2023-07-03 20:59:14.514485218,1688417954514320323,B,192.44,6,130,164895,192.40,192.44,40,7,1,1,AAPL,0,192.44,6,192.44,192.40,192.40,63.061829,0.005002,0.006368,-0.001366,33.333333,22.222222,913234.0,192.444977,192.4175,192.390023,0.04,0.034997,0.031258,0.028636,0.027592,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211,10
59267,2023-07-03 20:59:21.020718430,1688417961020553920,B,192.44,1,130,164510,192.40,192.44,40,1,1,1,AAPL,0,192.44,1,192.44,192.40,192.44,63.061829,0.006355,0.006365,-0.000011,66.666667,33.333333,913234.0,192.447411,192.4195,192.391589,0.04,0.037498,0.033006,0.029772,0.028212,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211,10
59268,2023-07-03 20:59:33.297905504,1688417973297741235,A,192.40,5,130,164269,192.40,192.46,40,6,1,1,AAPL,0,192.40,5,192.46,192.40,192.44,44.499081,0.004151,0.005922,-0.001771,66.666667,55.555556,913229.0,192.447411,192.4195,192.391589,0.06,0.048749,0.038405,0.032795,0.029802,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211,10
59269,2023-07-03 20:59:56.889779362,1688417996889614660,B,192.45,3,130,164702,192.40,192.45,35,16,1,1,AAPL,0,192.45,3,192.45,192.40,192.40,60.250043,0.006366,0.006011,0.000355,61.111111,64.814815,913232.0,192.451394,192.4220,192.392606,0.05,0.049375,0.040724,0.034515,0.030812,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211,10


In [7]:
market_features_df = df_with_indicators[35:]
side_mapping = {'A': 1, 'B': 2, 'N': 0}
market_features_df['side'] = market_features_df['side'].map(side_mapping)

flag_mapping = {130: 1, 128: 2, 0: 0}
market_features_df['flags'] = market_features_df['flags'].map(flag_mapping)
market_features_df

Unnamed: 0,ts_recv,ts_event,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,transformer,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP,ts_recv_bucket
35,2023-07-03 08:00:14.386057385,1688371214385893078,0,194.05,50,1,164307,194.0,194.30,3101,19,4,10,AAPL,0,194.05,50,194.30,194.0,194.05,54.544543,0.006271,-0.003130,0.009401,52.525253,61.952862,-266.0,194.065621,194.0170,193.968379,0.30,0.175078,0.098615,0.075141,0.072403,97.257397,30.435801,0.196362,166.666667,0.000000,194.020000,194.021894,4
36,2023-07-03 08:00:14.386063777,1688371214385899379,0,194.05,50,1,164398,194.0,194.30,3101,19,4,10,AAPL,0,194.05,50,194.30,194.0,194.05,54.544543,0.007108,-0.001082,0.008190,38.383838,52.525253,-266.0,194.068990,194.0200,193.971010,0.30,0.237539,0.138892,0.097627,0.083783,97.361721,22.989295,0.148320,83.333333,0.000000,194.020811,194.025188,4
37,2023-07-03 08:00:15.804852019,1688371215804687301,2,194.21,10,1,164718,194.0,194.21,3101,29,4,1,AAPL,0,194.21,10,194.21,194.0,194.05,85.890753,0.020446,0.003223,0.017223,40.404040,43.771044,-256.0,194.125889,194.0305,193.935111,0.21,0.223770,0.153114,0.108864,0.090094,97.458593,19.409454,0.125224,79.268293,0.000824,194.025789,194.025596,4
38,2023-07-03 08:00:19.671476629,1688371219671312224,0,194.14,10,1,164405,194.0,194.16,3101,400,4,1,AAPL,0,194.14,10,194.16,194.0,194.21,64.827662,0.025079,0.007594,0.017484,49.494949,42.760943,-266.0,194.142928,194.0375,193.932072,0.21,0.216885,0.164491,0.118978,0.096089,97.548546,16.622008,0.107240,-3.205128,-0.000360,194.028718,194.025873,4
39,2023-07-03 08:00:23.368835585,1688371223368671235,2,194.13,10,1,164350,194.0,194.13,3101,400,4,1,AAPL,0,194.13,10,194.13,194.0,194.14,62.470772,0.027625,0.011601,0.016025,57.575758,49.158249,-276.0,194.155247,194.0440,193.932753,0.14,0.178442,0.159593,0.121080,0.098285,97.632074,15.068361,0.097216,-113.095238,-0.000052,194.031250,194.026071,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,2023-07-03 20:59:14.514485218,1688417954514320323,2,192.44,6,1,164895,192.4,192.44,40,7,1,1,AAPL,0,192.44,6,192.44,192.4,192.40,63.061829,0.005002,0.006368,-0.001366,33.333333,22.222222,913234.0,192.444977,192.4175,192.390023,0.04,0.034997,0.031258,0.028636,0.027592,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211,10
59267,2023-07-03 20:59:21.020718430,1688417961020553920,2,192.44,1,1,164510,192.4,192.44,40,1,1,1,AAPL,0,192.44,1,192.44,192.4,192.44,63.061829,0.006355,0.006365,-0.000011,66.666667,33.333333,913234.0,192.447411,192.4195,192.391589,0.04,0.037498,0.033006,0.029772,0.028212,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211,10
59268,2023-07-03 20:59:33.297905504,1688417973297741235,1,192.40,5,1,164269,192.4,192.46,40,6,1,1,AAPL,0,192.40,5,192.46,192.4,192.44,44.499081,0.004151,0.005922,-0.001771,66.666667,55.555556,913229.0,192.447411,192.4195,192.391589,0.06,0.048749,0.038405,0.032795,0.029802,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211,10
59269,2023-07-03 20:59:56.889779362,1688417996889614660,2,192.45,3,1,164702,192.4,192.45,35,16,1,1,AAPL,0,192.45,3,192.45,192.4,192.40,60.250043,0.006366,0.006011,0.000355,61.111111,64.814815,913232.0,192.451394,192.4220,192.392606,0.05,0.049375,0.040724,0.034515,0.030812,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211,10


In [8]:
relevant_data = market_features_df[['price', 'side', 'flags', 'bid_px_00', 'ask_px_00', 'RSI', 'MACD', 'ATR_5', 'ATR_10', 'ATR_20', 'CCI', 'DLR', 'TWAP']]

# Creating input sequences and corresponding target prices
input_sequences = []
output_prices = []

window_size = 5

for i in range(len(relevant_data) - window_size):
    # Flatten the sequence data to include all selected features for each step in one flat sequence
    sequence = relevant_data.iloc[i:i+window_size].to_numpy().flatten()
    input_sequences.append(sequence)
    output_prices.append(relevant_data.iloc[i + window_size]['price'])

# Creating column names for the DataFrame
column_names = []
for i in range(window_size):
    for feature in ['price', 'side', 'flags', 'bid_px_00', 'ask_px_00', 'RSI', 'MACD', 'ATR_5', 'ATR_10', 'ATR_20', 'CCI', 'DLR', 'TWAP']:
        column_names.append(f"{feature}_{i+1}")

# Creating DataFrame
sequence_data_with_features = pd.DataFrame(input_sequences, columns=column_names)
sequence_data_with_features['Next_Price'] = output_prices

sequence_data_with_features.head()


Unnamed: 0,price_1,side_1,flags_1,bid_px_00_1,ask_px_00_1,RSI_1,MACD_1,ATR_5_1,ATR_10_1,ATR_20_1,CCI_1,DLR_1,TWAP_1,price_2,side_2,flags_2,bid_px_00_2,ask_px_00_2,RSI_2,MACD_2,ATR_5_2,ATR_10_2,ATR_20_2,CCI_2,DLR_2,TWAP_2,price_3,side_3,flags_3,bid_px_00_3,ask_px_00_3,RSI_3,MACD_3,ATR_5_3,ATR_10_3,ATR_20_3,CCI_3,DLR_3,TWAP_3,price_4,side_4,flags_4,bid_px_00_4,ask_px_00_4,RSI_4,MACD_4,ATR_5_4,ATR_10_4,ATR_20_4,CCI_4,DLR_4,TWAP_4,price_5,side_5,flags_5,bid_px_00_5,ask_px_00_5,RSI_5,MACD_5,ATR_5_5,ATR_10_5,ATR_20_5,CCI_5,DLR_5,TWAP_5,Next_Price
0,194.05,0.0,1.0,194.0,194.3,54.544543,0.006271,0.098615,0.075141,0.072403,166.666667,0.0,194.02,194.05,0.0,1.0,194.0,194.3,54.544543,0.007108,0.138892,0.097627,0.083783,83.333333,0.0,194.020811,194.21,2.0,1.0,194.0,194.21,85.890753,0.020446,0.153114,0.108864,0.090094,79.268293,0.000824,194.025789,194.14,0.0,1.0,194.0,194.16,64.827662,0.025079,0.164491,0.118978,0.096089,-3.205128,-0.00036,194.028718,194.13,2.0,1.0,194.0,194.13,62.470772,0.027625,0.159593,0.12108,0.098285,-113.095238,-5.2e-05,194.03125,194.13
1,194.05,0.0,1.0,194.0,194.3,54.544543,0.007108,0.138892,0.097627,0.083783,83.333333,0.0,194.020811,194.21,2.0,1.0,194.0,194.21,85.890753,0.020446,0.153114,0.108864,0.090094,79.268293,0.000824,194.025789,194.14,0.0,1.0,194.0,194.16,64.827662,0.025079,0.164491,0.118978,0.096089,-3.205128,-0.00036,194.028718,194.13,2.0,1.0,194.0,194.13,62.470772,0.027625,0.159593,0.12108,0.098285,-113.095238,-5.2e-05,194.03125,194.13,2.0,1.0,194.01,194.13,62.470772,0.029305,0.151674,0.120972,0.09937,-64.102564,0.0,194.033659,194.02
2,194.21,2.0,1.0,194.0,194.21,85.890753,0.020446,0.153114,0.108864,0.090094,79.268293,0.000824,194.025789,194.14,0.0,1.0,194.0,194.16,64.827662,0.025079,0.164491,0.118978,0.096089,-3.205128,-0.00036,194.028718,194.13,2.0,1.0,194.0,194.13,62.470772,0.027625,0.159593,0.12108,0.098285,-113.095238,-5.2e-05,194.03125,194.13,2.0,1.0,194.01,194.13,62.470772,0.029305,0.151674,0.120972,0.09937,-64.102564,0.0,194.033659,194.02,0.0,0.0,194.01,194.16,42.676806,0.021513,0.151339,0.123875,0.101902,-113.425926,-0.000567,194.033333,194.01
3,194.14,0.0,1.0,194.0,194.16,64.827662,0.025079,0.164491,0.118978,0.096089,-3.205128,-0.00036,194.028718,194.13,2.0,1.0,194.0,194.13,62.470772,0.027625,0.159593,0.12108,0.098285,-113.095238,-5.2e-05,194.03125,194.13,2.0,1.0,194.01,194.13,62.470772,0.029305,0.151674,0.120972,0.09937,-64.102564,0.0,194.033659,194.02,0.0,0.0,194.01,194.16,42.676806,0.021513,0.151339,0.123875,0.101902,-113.425926,-0.000567,194.033333,194.01,1.0,0.0,194.01,194.16,41.392784,0.014365,0.151071,0.126487,0.104307,-90.909091,-5.2e-05,194.032791,194.01
4,194.13,2.0,1.0,194.0,194.13,62.470772,0.027625,0.159593,0.12108,0.098285,-113.095238,-5.2e-05,194.03125,194.13,2.0,1.0,194.01,194.13,62.470772,0.029305,0.151674,0.120972,0.09937,-64.102564,0.0,194.033659,194.02,0.0,0.0,194.01,194.16,42.676806,0.021513,0.151339,0.123875,0.101902,-113.425926,-0.000567,194.033333,194.01,1.0,0.0,194.01,194.16,41.392784,0.014365,0.151071,0.126487,0.104307,-90.909091,-5.2e-05,194.032791,194.01,1.0,1.0,194.01,194.16,41.392784,0.008601,0.150857,0.128839,0.106591,-61.22449,0.0,194.032273,194.0


## Preprocessing DataFrame

In [9]:
features = sequence_data_with_features.drop(columns=['Next_Price']).values
prices = sequence_data_with_features['Next_Price'].values

# Normalize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Convert to PyTorch tensors
features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
prices_tensor = torch.tensor(prices, dtype=torch.float32).unsqueeze(1)  # Adding dimension for target

# Create dataset and split it
dataset = TensorDataset(features_tensor, prices_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Further split the train_dataset into training and validation sets
val_size = int(0.1 * len(train_dataset))
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# DataLoader
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
features_tensor.size()

torch.Size([59231, 65])

## Defining Transformer Model

In [12]:
# Define the Transformer Model
class TransformerModel(nn.Module):

    def __init__(self, input_dim, num_heads, num_layers, dim_feedforward, output_dim):
        super().__init__()
        
        self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dim_feedforward=dim_feedforward), num_layers=num_layers)
        self.fc_out = nn.Linear(input_dim, output_dim)

    def forward(self, src):
        out = self.transformer_encoder(src)
        out = self.fc_out(out[:, -1, :])
        return out

# Instantiate the model
model = TransformerModel(input_dim=features_tensor.shape[1], num_heads=5, num_layers=2, dim_feedforward=256, output_dim=1)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Training function
def train(dataloader, model, loss_fn, optimizer):
    model.train()
    total_loss = 0
    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        pred = model(X.unsqueeze(0))  
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

In [14]:
# Train the model
epochs = 10
for epoch in range(epochs):
    train_loss = train(train_loader, model, loss_fn, optimizer)
    print(f"Epoch {epoch+1}, Loss: {train_loss:.4f}")

Epoch 1, Loss: 18759.7340
Epoch 2, Loss: 342.9376
Epoch 3, Loss: 0.4490
Epoch 4, Loss: 0.4577
Epoch 5, Loss: 0.4342
Epoch 6, Loss: 0.4460
Epoch 7, Loss: 0.4559
Epoch 8, Loss: 0.4677
Epoch 9, Loss: 0.4602
Epoch 10, Loss: 0.4591


## Fine-Tuning Transformer Model

## Integrating PPO

In [16]:
# Function to get price prediction from transformer model
def predict_price(model, features):
    model.eval()
    with torch.no_grad():
        features_tensor = torch.tensor(features, dtype=torch.float32)
        prediction = model(features_tensor).item()
    return prediction

In [19]:
class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.transformer_model = transformer_model
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0
        
        self.transformer_features = ['price', 'side', 'flags', 'bid_px_00', 'ask_px_00', 'RSI', 'MACD', 'ATR_5', 'ATR_10', 'ATR_20', 'CCI', 'DLR', 'TWAP']
        self.state_columns = ['price', 'side', 'flags', 'bid_px_00', 'ask_px_00', 'RSI', 'MACD', 'ATR_5', 'ATR_10', 'ATR_20', 'CCI', 'DLR', 'TWAP', 'transformer']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        current_data = self.data[self.state_columns].iloc[self.current_step].values

        if self.current_step > 5:

            features = self.data[self.transformer_features].iloc[self.current_step-6:self.current_step-1].to_numpy().flatten()
          
            n_features = 65  
            n_samples = len(features) // n_features 

            if len(features) % n_features != 0:
                raise ValueError("The total number of elements in 'features' is not a multiple of 65")


            features_reshaped = features.reshape(n_samples, n_features)


            features_scaled = scaler.transform(features_reshaped)


            features_tensor = torch.tensor(features_scaled, dtype=torch.float32)


            features_tensor = features_tensor.unsqueeze(0)

            # Get prediction
            self.transformer_model.eval()  # Set the model to evaluation mode
            with torch.no_grad():
                predicted_price = self.transformer_model(features_tensor)

            current_data[13] = predicted_price[0][0]        

        return current_data

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
        
            self.cumulative_reward += reward
        
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9

        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()

        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1 and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
                
        elif action == 2 and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward
    
    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)
    
    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")



In [20]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env = TradingEnvironment(ticker_data, daily_trading_limit, model)

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
# model = DQN('MlpPolicy', env, verbose=1)
ppo_model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)

# Train the model
ppo_model.learn(total_timesteps=10000)

# Save the model
ppo_model.save("trading_agent_PPO+transformer")

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = ppo_model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------
| time/              |     |
|    fps             | 129 |
|    iterations      | 1   |
|    time_elapsed    | 3   |
|    total_timesteps | 512 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 132         |
|    iterations           | 2           |
|    time_elapsed         | 7           |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.012314079 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.211       |
|    entropy_loss         | -1.09       |
|    explained_variance   | -1.03       |
|    learning_rate        | 0.000993    |
|    loss                 | 0.0861      |
|    n_updates            | 6           |
|    policy_gradient_loss | -0.0103     |
|    value_loss           | 0.

## Evaluating Model (Trade Blotter)

In [21]:
INITIAL_CASH = 10_000_000  # $10 million

def preprocess_data(df):
    df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
    return df

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
    # Calculate rolling statistics
    rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
    rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
    rolling_mean_liq = volume_df.rolling(window=window_size).mean()
    rolling_std_liq = volume_df.rolling(window=window_size).std()
    
    return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
    if std > 0:
        z_score = (current_value - mean) / std
        percentile = norm.cdf(z_score)
    else:
        percentile = 0.5  # No variation
    return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
    vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
    liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

    # Define price adjustment scenarios based on market conditions
    if vol_percentile >= 0.9 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.25, -0.15)
    elif vol_percentile <= 0.1 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.10, -0.05)
    elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
        price_adjustment_percent = np.random.uniform(-0.05, +0.10)
    else:
        price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

    # Adjust price based on trade direction
    if trade_direction == 'BUY':
        adjusted_price = base_price * (1 - price_adjustment_percent)
    else:  # SELL
        adjusted_price = base_price * (1 + price_adjustment_percent)
    
    return adjusted_price


In [22]:
class TradingEnvironmentwithBlotter:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)
        
            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })

            

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

In [23]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

Cumulative reward: -12231.228875703917
Total portfolio value: 9989910.056471266
Step: 105, Timestamp: 2023-07-03 08:05:13.717864037, Action: BUY, Price: 200.25009933454817, Shares: 235.99220994248478, Symbol: AAPL, Reward: -0.38225036012723784, Transaction Cost: 0.08423636012724241, Slippage: 0.12999999999999545, Time Penalty: 0.168014
Step: 124, Timestamp: 2023-07-03 08:05:50.024437448, Action: SELL, Price: 185.6546487610965, Shares: 160.48034637860331, Symbol: AAPL, Reward: -0.27620100928928953, Transaction Cost: 0.07183700928926906, Slippage: 0.040000000000020464, Time Penalty: 0.164364
Step: 134, Timestamp: 2023-07-03 08:06:13.187564319, Action: BUY, Price: 188.74259443123987, Shares: 59.58056281172852, Symbol: AAPL, Reward: -0.2850712182043049, Transaction Cost: 0.025398218204301576, Slippage: 0.09000000000000341, Time Penalty: 0.169673
Step: 135, Timestamp: 2023-07-03 08:06:13.187564319, Action: BUY, Price: 196.94688265188609, Shares: 160.84237469902726, Symbol: AAPL, Reward: -0.