In [None]:
import pandas as pd
import numpy as np
import talib as ta

class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        self.data['RSI'] = ta.RSI(self.data['Close'], timeperiod=14)
        self.data['MACD'], self.data['MACD_signal'], self.data['MACD_hist'] = ta.MACD(self.data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
        self.data['Stoch_k'], self.data['Stoch_d'] = ta.STOCH(self.data['High'], self.data['Low'], self.data['Close'],
                                                              fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        self.data['OBV'] = ta.OBV(self.data['Close'], self.data['Volume'])

    def add_volatility_indicators(self):
        self.data['Upper_BB'], self.data['Middle_BB'], self.data['Lower_BB'] = ta.BBANDS(self.data['Close'], timeperiod=20)
        self.data['ATR_1'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=1)
        self.data['ATR_2'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=2)
        self.data['ATR_5'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)
        self.data['ATR_10'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=10)
        self.data['ATR_20'] = ta.ATR(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=20)

    def add_trend_indicators(self):
        self.data['ADX'] = ta.ADX(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['+DI'] = ta.PLUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['-DI'] = ta.MINUS_DI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=14)
        self.data['CCI'] = ta.CCI(self.data['High'], self.data['Low'], self.data['Close'], timeperiod=5)

    def add_other_indicators(self):
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data

In [None]:
data = pd.read_csv('/content/drive/MyDrive/xnas-itch-20230703.tbbo.csv')

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()


In [1]:
#Start from here to skip the data preprocessing, already done and imported from csv.

import os

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [2]:
import pandas as pd

In [3]:
df_with_indicators = pd.read_csv('df_with_indicators.csv')


In [5]:
#Calculate Signals for Ground Truth


def calculate_signal(df, lookahead=10, threshold=0.1):
    # Calculate future price after 'lookahead' steps
    df['future_close'] = df['Close'].shift(-lookahead)

    # Calculate the percentage change to future close
    df['price_change'] = (df['future_close'] - df['Close']) / df['Close']

    # Define signals based on the threshold
    df['signal'] = 'hold'  # default to hold
    df.loc[df['price_change'] > threshold, 'signal'] = 'buy'
    df.loc[df['price_change'] < -threshold, 'signal'] = 'sell'

    df.drop(['future_close', 'price_change'], axis=1, inplace=True)
    return df

df_with_indicators = calculate_signal(df_with_indicators, lookahead=10, threshold=0.0001)
signal_counts = df_with_indicators['signal'].value_counts()

print(signal_counts)

signal
hold    40934
sell     9229
buy      9075
Name: count, dtype: int64


In [7]:
#Train Test Split

total_samples = len(df_with_indicators)
train_size = int(total_samples * 0.8)
val_size = int(total_samples * 0.15)

train_data = df_with_indicators.iloc[:train_size]
val_data = df_with_indicators.iloc[train_size:train_size+val_size]
test_data = df_with_indicators.iloc[train_size+val_size:]

# Verifying the splits
print(f"Training Data: {len(train_data)} samples")
print(f"Validation Data: {len(val_data)} samples")
print(f"Testing Data: {len(test_data)} samples")


Training Data: 47390 samples
Validation Data: 8885 samples
Testing Data: 2963 samples


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.nn.utils import clip_grad_norm_

class TransformerEncoder(nn.Module):
    def __init__(self, feature_size, num_heads, ff_dim, dropout):
        super(TransformerEncoder, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=feature_size, num_heads=num_heads, dropout=dropout)
        self.feed_forward = nn.Sequential(
            nn.Linear(feature_size, ff_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(ff_dim, feature_size)
        )
        self.norm1 = nn.LayerNorm(feature_size)
        self.norm2 = nn.LayerNorm(feature_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        src2 = self.norm1(src)
        q = k = v = src2
        src2 = self.attention(q, k, v)[0]
        src = src + self.dropout(src2)
        src2 = self.feed_forward(self.norm2(src))
        src = src + self.dropout(src2)
        return src

class TransformerModel(nn.Module):
    def __init__(self, input_dim, feature_size, num_heads, ff_dim, num_layers, dropout):
        super(TransformerModel, self).__init__()
        self.layers = nn.ModuleList([TransformerEncoder(feature_size, num_heads, ff_dim, dropout) for _ in range(num_layers)])
        self.linear = nn.Linear(feature_size, 3)  # Output size is 3 for buy, sell, hold signals

    def forward(self, src):
        for layer in self.layers:
            src = layer(src)
        output = self.linear(src)
        return output




TransformerModel(
  (layers): ModuleList(
    (0-5): 6 x TransformerEncoder(
      (attention): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
      )
      (feed_forward): Sequential(
        (0): Linear(in_features=64, out_features=256, bias=True)
        (1): ReLU()
        (2): Dropout(p=0.1, inplace=False)
        (3): Linear(in_features=256, out_features=64, bias=True)
      )
      (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (linear): Linear(in_features=64, out_features=3, bias=True)
)


In [9]:
#Dataloader
import torch
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt


#Signal Mapping
signal_mapping = {'hold': 2, 'buy': 0, 'sell': 1}

# Apply the mapping
df_with_indicators['signal'] = df_with_indicators['signal'].map(signal_mapping)


train_data['signal'] = train_data['signal'].map(signal_mapping)
val_data['signal'] = val_data['signal'].map(signal_mapping)
test_data['signal'] = test_data['signal'].map(signal_mapping)

# Specifying the feature columns and converting to tensors
feature_columns = ['Close', 'Volume', 'RSI', 'MACD', 'Stoch_k']


def create_dataset(data):
    features = data[feature_columns].values
    labels = data['signal'].values

    # Convert to tensors
    features_tensor = torch.tensor(features, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.long)

    return TensorDataset(features_tensor, labels_tensor)

train_dataset = create_dataset(train_data)
val_dataset = create_dataset(val_data)
test_dataset = create_dataset(test_data)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)



Unique labels after mapping: [1 2 0]
Null values in 'signal' column: 0
Batch data shape: torch.Size([128, 5]), Batch target shape: torch.Size([128])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['signal'] = train_data['signal'].map(signal_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_data['signal'] = val_data['signal'].map(signal_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['signal'] = test_data['signal'].map(signal_mapping)


In [11]:
#Training Block


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")




os.makedirs("weights", exist_ok=True)
os.makedirs("pics", exist_ok=True)

# Model setup
model = TransformerModel(input_dim=5, feature_size=5, num_heads=1, ff_dim=256, num_layers=6, dropout=0.3).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.005)
criterion = nn.CrossEntropyLoss()

scheduler = CosineAnnealingLR(optimizer, T_max=500, eta_min=0.00001) #CosineAnnealing Learning Rate Scheduler

def train_and_validate(model, train_loader, val_loader, epochs):
    best_val_loss = float('inf')
    train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []

    for epoch in range(epochs):
        # Training phase
        model.train()
        total_loss, correct, total = 0, 0, 0
        for data, targets in train_loader:
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)
        
        train_losses.append(total_loss / len(train_loader))
        train_accuracy = 100 * correct / total
        train_accuracies.append(train_accuracy)

        # Validation phase
        model.eval()
        total_val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for data, targets in val_loader:
                data, targets = data.to(device), targets.to(device)
                outputs = model(data)
                val_loss = criterion(outputs, targets)
                total_val_loss += val_loss.item()

                _, val_predicted = torch.max(outputs.data, 1)
                val_correct += (val_predicted == targets).sum().item()
                val_total += targets.size(0)
        
        val_losses.append(total_val_loss / len(val_loader))
        val_accuracy = 100 * val_correct / val_total
        val_accuracies.append(val_accuracy)

        # Learning rate scheduler step
        scheduler.step()

        if val_losses[-1] < best_val_loss:
            best_val_loss = val_losses[-1]
            torch.save(model.state_dict(), f"weights/best_model_epoch_{epoch+1}.pth")

        if (epoch + 1) % 10 == 0:
            # Save model weights
            torch.save(model.state_dict(), f"weights/model_epoch_{epoch+1}.pth")

            # Plotting and saving losses
            plt.figure(figsize=(10, 5))
            plt.plot(train_losses, label='Train Loss')
            plt.plot(val_losses, label='Validation Loss')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            plt.title('Training and Validation Loss')
            plt.legend()
            plt.savefig(f"pics/loss_epoch_{epoch+1}.png")
            plt.close()

            # Plotting and saving accuracies
            plt.figure(figsize=(10, 5))
            plt.plot(train_accuracies, label='Train Accuracy')
            plt.plot(val_accuracies, label='Validation Accuracy')
            plt.xlabel('Epoch')
            plt.ylabel('Accuracy')
            plt.title('Training and Validation Accuracy')
            plt.legend()
            plt.savefig(f"pics/accuracy_epoch_{epoch+1}.png")
            plt.close()

    return train_losses, val_losses, train_accuracies, val_accuracies


# Run the training and validation
train_and_validate(model, train_loader, val_loader, epochs=500)

KeyboardInterrupt: 

In [15]:
import torch
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Initialize the model
model = TransformerModel(input_dim=5, feature_size=5, num_heads=1, ff_dim=256, num_layers=6, dropout=0.1)

# Load the model weights
model_path = './weights_batch64/model_epoch_500.pth' 
model.load_state_dict(torch.load(model_path))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set the model to evaluation mode

def evaluate_model(model, data_loader, device):
    all_preds, all_labels = [], []

    with torch.no_grad():  
        for data, labels in data_loader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * np.sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)
    print(f'Accuracy on the test set: {accuracy:.2f}%')
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=['Buy', 'Sell', 'Hold']))
    print("Confusion Matrix:")
    print(confusion_matrix(all_labels, all_preds))

    return accuracy

test_accuracy = evaluate_model(model, test_loader, device)

Accuracy on the test set: 82.59%
Classification Report:
              precision    recall  f1-score   support

         Buy       0.11      0.04      0.05       195
        Sell       0.19      0.05      0.08       235
        Hold       0.86      0.96      0.90      2533

    accuracy                           0.83      2963
   macro avg       0.39      0.35      0.35      2963
weighted avg       0.75      0.83      0.78      2963

Confusion Matrix:
[[   7    3  185]
 [   1   12  222]
 [  58   47 2428]]


In [20]:
# Evaluation
import numpy as np

def get_predictions_and_prices(model, data_loader, device):
    model.eval()  
    predictions = []
    prices = []
    with torch.no_grad():
        for data, _ in data_loader:
            data = data.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())
            prices.extend(data[:, 0].cpu().numpy())  
    return predictions, prices

class TradingEnvironmentwithBlotter:
    def __init__(self, prices, predictions, window_size=10):
        self.prices = prices
        self.predictions = predictions
        self.sma = np.convolve(prices, np.ones(window_size) / window_size, mode='valid')  # Simple moving average
        self.current_step = window_size - 1  # Start index adjusted for SMA
        self.balance = 10000000  # Starting balance
        self.shares_held = 0
        self.total_shares_traded = 0

    def step(self):
        if self.current_step >= len(self.prices):
            return
        
        current_price = self.prices[self.current_step]
        action = self.predictions[self.current_step] #Action from trained model
        current_sma = self.sma[self.current_step - (len(self.prices) - len(self.sma))]

        momentum = current_price - current_sma
        trade_fraction = 0.01 if abs(momentum) > current_sma * 0.01 else 0.005  # Adjust trade size by momentum

        if action == 0 and momentum < 0:  # Buy if momentum indicates an uptrend
            shares_to_buy = int((self.balance * trade_fraction) / current_price)
            if shares_to_buy > 0:
                self.balance -= shares_to_buy * current_price
                self.shares_held += shares_to_buy
                self.total_shares_traded += shares_to_buy
                print(f"Buy {shares_to_buy} shares at {current_price} per share")
        elif action == 1 and momentum > 0 and self.shares_held > 0:  # Sell if momentum indicates a downtrend
            self.balance += self.shares_held * current_price
            print(f"Sell {self.shares_held} shares at {current_price} per share")
            self.total_shares_traded += self.shares_held
            self.shares_held = 0
        elif action == 2:  # Hold action
            print(f"Hold at {current_price} per share")

        self.current_step += 1

    def run(self):
        while self.current_step < len(self.prices):
            self.step()

        # print(f"Final Balance: ${self.balance:.2f}")
        # print(f"Total Shares Traded: {self.total_shares_traded}")
        # print(f"Shares Held: {self.shares_held}")


predictions, prices = get_predictions_and_prices(model, test_loader, device)
env = TradingEnvironmentwithBlotter(prices, predictions)
env.run()


Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.43499755859375 per share
Hold at 192.43499755859375 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.43499755859375 per share
Hold at 192.43499755859375 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
Hold at 192.44000244140625 per share
H