In [1]:
#import required libraries
from utils import aws # used to create aws session and load parquet 
import pandas as pd
import numpy as np
import dask.dataframe as dd
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyRegressor
import math

In [2]:
# Step 1: Load historical data
# Load historical limit order book and tape data into pandas DataFrame
# load sample feature set from s3 to a dask dataframe
# samp_lob_ddf = aws.load_s3_file_as_ddf("s3://dsmp-ol2/processed-data/temp_sample_lob_feature_set.parquet")
# # compute the dask datafram to a pandas dataframe
# df = samp_lob_ddf.compute()

df = pd.read_parquet('data/output/temp_sample_lob_feature_set.parquet')

# Preprocess the data to extract relevant features
# Create a target column
# The horizon is how far in the future the Mid-Proce is being predicted
horizon = 20

# Assuming 'simple_df' is your DataFrame
df = df.copy()

df['Target'] = df['Mid_Price'].shift(-horizon)

# Drop rows where all values in the 'Target' column are NaN
df.dropna(subset=['Target'], how='all', inplace=True)

In [3]:
# Step 2: Load and train prediction model
# Sort the DataFrame by the date column
df_sorted = df.sort_values(by=['Date', 'Timestamp'])

# Identify the unique dates in the DataFrame
unique_dates = df_sorted['Date'].unique()

In [4]:
# Split the data
# Select the first two dates as train data and the third date as test data
train_data = df_sorted[df_sorted['Date'].isin(unique_dates[:2])]
test_data = df_sorted[df_sorted['Date'] == unique_dates[2]]

X_train = train_data.drop(['Target'], axis=1)  # Features
y_train = train_data['Target']  # Target - Shift 'Mid_Price' by 20 timestamps into the future

X_test = test_data.drop(['Target'], axis=1)  # Features
y_test = test_data['Target']  # Target - Shift 'Mid_Price' by 20 timestamps into the future


# Load the prediction model
prediction_model = DummyRegressor(strategy="mean") # Instantiate a dummy model as a placeholder 

# Train the model
prediction_model.fit(X_train, y_train)

In [5]:
# Step 3: Define trading strategy
# Define your trading strategy based on model predictions and trading parameters
def simple_trading_strategy(prediction, current_mid_price):
    if prediction > current_mid_price:
        return 'buy'  # Buy if the prediction is positive
    elif prediction < current_mid_price:
        return 'sell'  # Sell if the prediction is negative
    else:
        return 'hold'  # Hold if the prediction is neutral

In [6]:
# Step 4: Implement trading simulator
class TradingSimulator:
    def __init__(self, historical_data, prediction_model, trading_strategy, initial_cash):
        self.historical_data = historical_data
        self.prediction_model = prediction_model
        self.trading_strategy = trading_strategy
        self.portfolio = 0  # Initial portfolio value
        self.cash = initial_cash  # Initial cash
        self.profit_loss = []  # Store profit/loss at each time step
        self.trades = [] # Store trade history
        self.trades_test = [] # Store trade history

    def run_simulation(self):
        for index, row in self.historical_data.iterrows():
            # Extract features from historical data for prediction
            features = row

            # Use prediction model to generate predictions
            prediction = self.prediction_model.predict([features])[0]

            # Get the current mid-price (replace 'current_mid_price' with the actual value)
            current_mid_price = row['Mid_Price']

            # Implement trading strategy within the simulator
            trade_action = self.trading_strategy(prediction, current_mid_price)
            self.trades_test.append([row['Date'], row['Timestamp'], trade_action])

            # Execute trade based on prediction
            if trade_action == 'buy':
                # All-in
                if self.cash > 0:
                    # Invest all available cash in buying assets
                    trade_price = row['Ask'][0][0] # buy at the lowest ask-price
                    max_quantity = math.floor(self.cash / trade_price) # max quantity of possible to buy (must be an integer)
                    quantity_available = row['Ask'][0][1] # quantity of the lowest ask-price

                    if max_quantity < quantity_available:
                        quantity_to_buy = max_quantity
                    else:
                        quantity_to_buy = quantity_available
                    
                    self.portfolio += quantity_to_buy
                    self.cash = self.cash - (quantity_to_buy * trade_price)
                    self.trades.append([row['Date'], row['Timestamp'], ('buy', trade_price, quantity_to_buy)])
                    
                else:
                    # Hold if no available cash
                    self.trades.append([row['Date'], row['Timestamp'], ('hold', None, None)])
                    
                
            elif trade_action == 'sell':
                # All-out
                if self.portfolio > 0:
                    # Cash out all assets
                    trade_price = row['Bid'][0][0] # sell at the highest bid-price
                    quantity_available = row['Bid'][0][1] # quantity of the highest bid-price
                    max_quantity = self.portfolio

                    if max_quantity < quantity_available:
                        quantity_to_sell = max_quantity
                    else:
                        quantity_to_sell = quantity_available

                    self.portfolio -= quantity_to_sell
                    self.cash += quantity_to_sell * trade_price
                    self.trades.append([row['Date'], row['Timestamp'], ('sell',trade_price, quantity_to_sell)])
                    
                else:
                    # Hold if no assets to sell
                    self.trades.append([row['Date'], row['Timestamp'], ('sell', None, None)])
                    
            else:
                # Hold
                self.trades.append([row['Date'], row['Timestamp'], ('sell', None, None)])
                

            # Calculate profit/loss based on the current portfolio value
            # current_portfolio_value = self.portfolio * current_mid_price
            # total_value = current_portfolio_value + self.cash
            # profit_loss = total_value - initial_cash
            # self.profit_loss.append(profit_loss)
            
            # Calculate metrics (e.g., profit/loss, Sharpe ratio, maximum drawdown) and store
        return self.trades
            
    def get_performance_metrics(self):
        # Calculate and return performance metrics such as profit/loss, Sharpe ratio, etc.
        return self.cash, self.portfolio

In [9]:
# Step 5: Run simulation
simulator = TradingSimulator(X_test, prediction_model, simple_trading_strategy, initial_cash=1000000)
trades = simulator.run_simulation()
trades

[['2025-01-06', 3.441, ('buy', 278, 3)],
 ['2025-01-06', 3.472, ('buy', 278, 3)],
 ['2025-01-06', 3.72, ('buy', 278, 3)],
 ['2025-01-06', 3.906, ('buy', 278, 3)],
 ['2025-01-06', 4.247, ('buy', 278, 3)],
 ['2025-01-06', 5.208, ('buy', 278, 3)],
 ['2025-01-06', 5.766, ('buy', 278, 3)],
 ['2025-01-06', 6.355, ('buy', 278, 3)],
 ['2025-01-06', 6.386, ('buy', 278, 3)],
 ['2025-01-06', 6.448, ('buy', 278, 3)],
 ['2025-01-06', 6.541, ('buy', 278, 3)],
 ['2025-01-06', 6.603, ('buy', 278, 3)],
 ['2025-01-06', 6.789, ('buy', 278, 3)],
 ['2025-01-06', 7.006, ('buy', 278, 3)],
 ['2025-01-06', 7.44, ('buy', 278, 3)],
 ['2025-01-06', 7.595, ('buy', 278, 3)],
 ['2025-01-06', 7.657, ('buy', 278, 3)],
 ['2025-01-06', 7.719, ('buy', 278, 3)],
 ['2025-01-06', 7.781, ('buy', 278, 3)],
 ['2025-01-06', 7.998, ('buy', 286, 5)],
 ['2025-01-06', 8.06, ('buy', 286, 5)],
 ['2025-01-06', 8.091, ('buy', 286, 5)],
 ['2025-01-06', 8.277, ('buy', 286, 5)],
 ['2025-01-06', 8.339, ('buy', 286, 5)],
 ['2025-01-06', 8.4

In [10]:
# Step 6: Get performance metrics
performance_metrics = simulator.get_performance_metrics()
print(performance_metrics)

(1190825, 0)
