In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast

1.loading the data

In [47]:
# Load the datasets
predicted_tape_path = '../resampled_data_ffill.csv'
real_tape_path = '../Tapes/UoB_Set01_2025-01-02tapes.csv'
lob_dataset_path = '../result.csv'

predicted_tape = pd.read_csv(predicted_tape_path)
real_tape = pd.read_csv(real_tape_path)
lob_dataset = pd.read_csv(lob_dataset_path)

# Correcting the 'Timestamp' column name in the LOB dataset
lob_dataset.rename(columns={'Timestamp': 'timestamp'}, inplace=True)
lob_dataset['timestamp'] = pd.to_datetime(lob_dataset['timestamp'], unit='s', origin=pd.Timestamp('2025-01-02'))

# Display the first few rows of each dataset to understand their structure
predicted_tape.head(), real_tape.head(), lob_dataset.head()


(             timestamp  transaction_price
 0  2025-01-02 00:00:10              267.0
 1  2025-01-02 00:00:11              268.0
 2  2025-01-02 00:00:12              270.0
 3  2025-01-02 00:00:13              267.0
 4  2025-01-02 00:00:14              267.0,
    10.881   267   1
 0  11.067   269   1
 1  11.222   267   2
 2  12.338   270   2
 3  13.733   267   3
 4  18.321   265   2,
    Unnamed: 0                     timestamp       Bid         Ask
 0           0 2025-01-02 00:00:00.000000000        []          []
 1           1 2025-01-02 00:00:00.279000064  [[1, 6]]          []
 2           2 2025-01-02 00:00:01.332999936  [[1, 6]]  [[800, 1]]
 3           3 2025-01-02 00:00:01.581000192  [[1, 6]]  [[799, 1]]
 4           4 2025-01-02 00:00:01.642999808  [[1, 6]]  [[798, 1]])

2.Convert Timestamps and Merge Data

In [48]:
# Attempt to reload the real tape with a more informed approach
# Assuming the first column is indeed a timestamp but in a different format or scale,
# and the file lacks a header, we will manually assign column names based on assumption.

real_tape_columns = ['timestamp', 'price', 'quantity']
real_tape = pd.read_csv(real_tape_path, header=None, names=real_tape_columns)

# Display the first few rows again with the new column names
real_tape.head()


Unnamed: 0,timestamp,price,quantity
0,10.881,267,1
1,11.067,269,1
2,11.222,267,2
3,12.338,270,2
4,13.733,267,3


In [49]:
from datetime import datetime, timedelta

# Assuming the real tape's timestamps are in seconds since the start of the trading day
# Convert these to datetime format with base date 2025-01-02
base_date = datetime(2025, 1, 2)
real_tape['datetime'] = real_tape['timestamp'].apply(lambda x: base_date + timedelta(seconds=x))

# Drop the original 'timestamp' column to avoid confusion
real_tape.drop(columns=['timestamp'], inplace=True)

# Display the first few rows to verify the conversion
real_tape.head()


Unnamed: 0,price,quantity,datetime
0,267,1,2025-01-02 00:00:10.881
1,269,1,2025-01-02 00:00:11.067
2,267,2,2025-01-02 00:00:11.222
3,270,2,2025-01-02 00:00:12.338
4,267,3,2025-01-02 00:00:13.733


In [50]:
# Convert the LOB dataset's timestamp column to datetime format for accurate merging
lob_dataset['timestamp'] = pd.to_datetime(lob_dataset['timestamp'])

# Merge the Real Tape and LOB Dataset based on their datetime/timestamp columns
merged_dataset = pd.merge_asof(lob_dataset.sort_values('timestamp'), 
                               real_tape.sort_values('datetime'), 
                               left_on='timestamp', 
                               right_on='datetime', 
                               direction='nearest')

# Display the first few rows of the merged dataset to verify the merge
merged_dataset.head(70)


Unnamed: 0.1,Unnamed: 0,timestamp,Bid,Ask,price,quantity,datetime
0,0,2025-01-02 00:00:00.000000000,[],[],267,1,2025-01-02 00:00:10.881
1,1,2025-01-02 00:00:00.279000064,"[[1, 6]]",[],267,1,2025-01-02 00:00:10.881
2,2,2025-01-02 00:00:01.332999936,"[[1, 6]]","[[800, 1]]",267,1,2025-01-02 00:00:10.881
3,3,2025-01-02 00:00:01.581000192,"[[1, 6]]","[[799, 1]]",267,1,2025-01-02 00:00:10.881
4,4,2025-01-02 00:00:01.642999808,"[[1, 6]]","[[798, 1]]",267,1,2025-01-02 00:00:10.881
...,...,...,...,...,...,...,...
65,65,2025-01-02 00:00:10.819000064,"[[267, 6], [261, 1], [260, 11], [259, 1], [193...","[[268, 1], [269, 1], [271, 5], [275, 8], [277,...",267,1,2025-01-02 00:00:10.881
66,66,2025-01-02 00:00:10.912000000,"[[267, 5], [261, 1], [260, 11], [259, 1], [193...","[[269, 1], [271, 5], [275, 8], [277, 2], [281,...",267,1,2025-01-02 00:00:10.881
67,67,2025-01-02 00:00:11.036000000,"[[267, 5], [261, 1], [260, 11], [259, 1], [193...","[[269, 1], [271, 5], [275, 8], [277, 2], [281,...",269,1,2025-01-02 00:00:11.067
68,68,2025-01-02 00:00:11.066999808,"[[267, 5], [261, 1], [260, 11], [259, 1], [193...","[[269, 1], [271, 5], [275, 8], [281, 6], [584,...",269,1,2025-01-02 00:00:11.067


In [51]:
# Define initial simulation parameters
initial_capital = 10000
trade_size = 1  # Assume we trade 1 share/contract per trade for simplicity
transaction_cost = 0  # Assume $0 for simplicity

# Initialize variables for the simulation
capital = initial_capital
holdings = 0  # Number of shares/contracts held

# List to track each trade and its impact
trades = []

# Iterate through the merged dataset to identify and execute trades
for i in range(len(merged_dataset) - 1):
    current_price = merged_dataset.loc[i, 'price']
    next_price = merged_dataset.loc[i + 1, 'price']
    
    if next_price > current_price and capital >= current_price:
        # Buy scenario - Buy 1 share/contract if the next price is higher and we have enough capital
        capital -= (current_price + transaction_cost)
        holdings += trade_size
        trades.append(('Buy', current_price))
    elif next_price < current_price and holdings > 0:
        # Sell scenario - Sell 1 share/contract if the next price is lower and we have holdings
        capital += (current_price - transaction_cost)
        holdings -= trade_size
        trades.append(('Sell', current_price))

# Finalize simulation by selling any remaining holdings at the last known price
if holdings > 0:
    final_price = merged_dataset.iloc[-1]['price']
    capital += holdings * final_price
    trades.append(('Sell', final_price))
    holdings = 0

# Calculate final performance
final_capital = capital
profit_or_loss = final_capital - initial_capital

# Display the results
final_capital, profit_or_loss, trades[:10]  # Display the first 10 trades to keep the output manageable


(38201,
 28201,
 [('Buy', 267),
  ('Sell', 269),
  ('Buy', 267),
  ('Sell', 270),
  ('Buy', 265),
  ('Sell', 266),
  ('Buy', 261),
  ('Sell', 264),
  ('Buy', 263),
  ('Sell', 268)])