In [1]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
import talib as ta
import tensorflow as tf
from tensorflow.keras.models import Model




In [2]:
#Load the data

data = pd.read_csv('xnas-itch-20230703.tbbo.csv')

In [3]:
print(data.head())

               ts_recv             ts_event  rtype  publisher_id  \
0  1688371200660869841  1688371200660704717      1             2   
1  1688371201201402566  1688371201201237816      1             2   
2  1688371201233688992  1688371201233524761      1             2   
3  1688371201317556361  1688371201317392163      1             2   
4  1688371201478520666  1688371201478356044      1             2   

   instrument_id action side  depth         price  size  flags  ts_in_delta  \
0             32      T    B      0  194120000000     1    130       165124   
1             32      T    B      0  194110000000     2    130       164750   
2             32      T    B      0  194110000000     8    130       164231   
3             32      T    B      0  194110000000     2    130       164198   
4             32      T    B      0  194000000000     7    130       164622   

   sequence     bid_px_00     ask_px_00  bid_sz_00  ask_sz_00  bid_ct_00  \
0    303634  193630000000  194120000000 

In [4]:

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [5]:
# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()
market_features_df = df_with_indicators[35:]

In [6]:

market_features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59236 entries, 35 to 59270
Data columns (total 47 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ts_recv        59236 non-null  int64  
 1   ts_event       59236 non-null  int64  
 2   rtype          59236 non-null  int64  
 3   publisher_id   59236 non-null  int64  
 4   instrument_id  59236 non-null  int64  
 5   action         59236 non-null  object 
 6   side           59236 non-null  object 
 7   depth          59236 non-null  int64  
 8   price          59236 non-null  float64
 9   size           59236 non-null  int64  
 10  flags          59236 non-null  int64  
 11  ts_in_delta    59236 non-null  int64  
 12  sequence       59236 non-null  int64  
 13  bid_px_00      59236 non-null  float64
 14  ask_px_00      59236 non-null  float64
 15  bid_sz_00      59236 non-null  int64  
 16  ask_sz_00      59236 non-null  int64  
 17  bid_ct_00      59236 non-null  int64  
 18  ask_c

In [16]:
# Compute the shifted values for 'OBV' and any other necessary features
market_features_df['OBV_shifted'] = market_features_df['OBV'].shift(1)

# Define the function to decide action
def decide_action(row):
    # Buy Conditions
    if (row['RSI'] < 30) or \
       (row['MACD'] > row['MACD_signal']) or \
       ((row['Stoch_k'] < 20) and (row['Stoch_k'] > row['Stoch_d'])) or \
       (row['Close'] < row['Lower_BB']) or \
       ((row['ADX'] > 20) and (row['+DI'] > row['-DI'])):
        return 1  # Buy
    
    # Sell Conditions
    elif (row['RSI'] > 70) or \
         (row['MACD'] < row['MACD_signal']) or \
         ((row['Stoch_k'] > 80) and (row['Stoch_k'] < row['Stoch_d'])) or \
         (row['Close'] > row['Upper_BB']) or \
         ((row['ADX'] > 20) and (row['+DI'] < row['-DI'])):
        return 2  # Sell
    
    # Hold Condition
    else:
        return 0  # Hold

# Apply the function to the dataframe
market_features_df['recommend'] = market_features_df.apply(decide_action, axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  market_features_df['OBV_shifted'] = market_features_df['OBV'].shift(1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  market_features_df['recommend'] = market_features_df.apply(decide_action, axis=1)


In [17]:

from sklearn.preprocessing import StandardScaler


# Columns to be used as features
feature_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 
                   'Stoch_k', 'Stoch_d', 'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 
                   'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

# Assume market_features_df is your dataframe and 'recommend' is the target column

# Select only the specified feature columns
features = market_features_df[feature_columns]
target = market_features_df['recommend']

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the features and transform
scaled_features = scaler.fit_transform(features)

# Convert scaled features back to a DataFrame
scaled_features_df = pd.DataFrame(scaled_features, columns=features.columns)

# Add the target column back to the scaled features DataFrame
scaled_features_df['recommend'] = target.values

# Function to create sequences
def create_sequences(df, seq_length):
    sequences = []
    labels = []
    for i in range(len(df) - seq_length):
        sequence = df.iloc[i:i+seq_length].drop(columns=['recommend']).values
        label = df.iloc[i+seq_length]['recommend']
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Create sequences with a sequence length of 60 (for example)
seq_length = 60
X, y = create_sequences(scaled_features_df, seq_length)

# Split the data into training, validation, and test sets
from sklearn.model_selection import train_test_split

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Print shapes of the datasets
print("Training data shape: ", X_train.shape, y_train.shape)
print("Validation data shape: ", X_val.shape, y_val.shape)
print("Test data shape: ", X_test.shape, y_test.shape)


Training data shape:  (41423, 60, 17) (41423,)
Validation data shape:  (8876, 60, 17) (8876,)
Test data shape:  (8877, 60, 17) (8877,)


In [18]:
from tensorflow.keras.layers import LayerNormalization, MultiHeadAttention, Add, Dense, Dropout, GlobalAveragePooling1D, Input


def transformer_encoder_layer(inputs, head_size, num_heads, ff_dim, dropout_rate):
    # Multi-head attention and normalization
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    attn_output = Dropout(dropout_rate)(attn_output)
    out1 = Add()([inputs, attn_output])
    out1 = LayerNormalization(epsilon=1e-6)(out1)
    
    # Feed-forward network and normalization
    ffn_output = Dense(ff_dim, activation='relu')(out1)
    ffn_output = Dropout(dropout_rate)(ffn_output)
    ffn_output = Dense(inputs.shape[-1])(ffn_output)
    ffn_output = Add()([out1, ffn_output])
    ffn_output = LayerNormalization(epsilon=1e-6)(ffn_output)
    
    return ffn_output

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout_rate):
    inputs = Input(shape=input_shape)
    x = inputs

    for _ in range(num_layers):
        x = transformer_encoder_layer(x, head_size, num_heads, ff_dim, dropout_rate)

    x = GlobalAveragePooling1D()(x)
    outputs = Dense(3, activation='softmax')(x)  # Output layer for three classes: buy, hold, sell

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Model configuration
input_shape = (60, 17)  # 60 timesteps, 17 features
head_size = 64
num_heads = 4
ff_dim = 128
num_layers = 4
dropout_rate = 0.1

# Create the model
model = build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout_rate)
model.summary()



Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 60, 17)]             0         []                            
                                                                                                  
 multi_head_attention (Mult  (None, 60, 17)               18193     ['input_1[0][0]',             
 iHeadAttention)                                                     'input_1[0][0]']             
                                                                                                  
 dropout (Dropout)           (None, 60, 17)               0         ['multi_head_attention[0][0]']
                                                                                                  
 add (Add)                   (None, 60, 17)               0         ['input_1[0][0]',       

In [18]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=12, batch_size=1024, shuffle=True)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [19]:
model.save('trained_transformer_model.h5')

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

Test Loss: 0.27560245990753174
Test Accuracy: 0.8834065794944763


In [20]:
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions
predictions = model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)

# Calculate precision, recall, and F1-score
print(classification_report(y_test, predicted_classes))

# Confusion matrix
cm = confusion_matrix(y_test, predicted_classes)
print("Confusion Matrix:")
print(cm)

              precision    recall  f1-score   support

         1.0       0.90      0.98      0.94      7796
         2.0       0.56      0.21      0.31      1081

    accuracy                           0.88      8877
   macro avg       0.73      0.59      0.62      8877
weighted avg       0.86      0.88      0.86      8877

Confusion Matrix:
[[7613  183]
 [ 852  229]]


In [22]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=10,         # Number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # Restores model weights from the epoch with the best value of the monitored quantity.
)


In [23]:
from tensorflow.keras.callbacks import LearningRateScheduler

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_scheduler = LearningRateScheduler(scheduler)


In [25]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=2048,
    callbacks=[early_stopping, lr_scheduler]
)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [26]:
model.save('finetuned_transformer_model.h5')

  saving_api.save_model(


In [19]:
from tensorflow.keras.models import load_model

# Load the model from the file
model = load_model('finetuned_transformer_model.h5')

In [21]:
class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model_path=None,window_size=60):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.window_size = window_size
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns), 1), dtype=np.float32
        )

        # Load the pretrained transformer model if provided
        if transformer_model_path:
            self.model = tf.keras.models.load_model(transformer_model_path)
        else:
            self.model = None

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        # Ensure we don't go out of bounds
      end = self.current_step + self.window_size
      if end > len(self.data):
        end = len(self.data)
        self.current_step = end - self.window_size

      obs = self.data[self.state_columns].iloc[self.current_step:end].values
      if obs.shape[0] < self.window_size:
        # Pad with zeros or repeat entries if not enough data
        padding = np.zeros((self.window_size - obs.shape[0], len(self.state_columns)))
        obs = np.vstack((padding, obs))

      return obs  # This should now be (60, 17)

    def step(self, action=None):
        if action is None and self.model:
            state = self._next_observation()
            state = state.reshape(1, *state.shape)
            action = self._predict_action(state)

        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        if action != 0:
            transaction_cost = self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100 * transaction_time / 1e9
        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()
        info = {
            'step': self.current_step,
            'action': action,
            'price': actual_price,
            'shares': self.trades[-1]['shares'] if self.trades else 0
        }
        self.current_step += 1

        return obs, reward, done, info

    def _predict_action(self, state):
     with tf.device('/CPU:0'):  # Ensuring prediction runs on CPU
        output = self.model.predict(state)
     if 'recommend' in output:
        recommended_action = np.argmax(output['recommend'], axis=1)[0]  # Assumes 'recommend' contains softmax probabilities
     else:
        recommended_action = np.argmax(output, axis=1)[0]  # Fallback if 'recommend' key is not present
     return recommended_action

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if shares_bought > 0:
                self.trades.append(trade_info)
        elif action == 2:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if shares_sold > 0:
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100 * transaction_time / 1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        trades_df = pd.DataFrame(self.trades)
        trades_df.to_csv('trades_transformer.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

In [22]:
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

env = TradingEnvironment(ticker_data, daily_trading_limit,transformer_model_path='finetuned_transformer_model.h5') 
 # Adjust window_size if needed

 # Reset the environment to get the initial observation
obs = env.reset()

# Run the environment using only the Transformer model
env.reset()  # Reset your environment at the start
for action in predicted_classes:
    state, reward, done, info = env.step(action)  # Execute action in the environment
    if done:
        break  # Exit if the environment indicates the episode is finished
    env.render()
# Print the cumulative reward and trades
print(f"Cumulative Reward: {env.cumulative_reward}")
env.print_trades()