# Initial EDA

This Notebook outlines the initial EDA carried out on the LOB data. Initially the code has been written on a small sample of the full LOB dataset. To ensure no trends or outliers are missed this EDA will need to be run against the full dataset.

In [15]:
import pandas as pd


In [19]:
# Read in sample data
sample_csv = 'data/output/EDA_lob_output_data_sample.csv' # define path to sampel data

df = pd.read_csv(sample_csv)

In [17]:
# Reorder columns - this makes data and timestamp easier to read
desired_column_order = ['Timestamp', 'Date', 'Exchange', 'Bid', 'Ask', 'Mid_Price']
df = df[desired_column_order]
df

Unnamed: 0,Timestamp,Date,Exchange,Bid,Ask,Mid_Price
0,0.000,2025-01-02,Exch0,[],[],
1,0.279,2025-01-02,Exch0,"[[1, 6]]",[],
2,1.333,2025-01-02,Exch0,"[[1, 6]]","[[800, 1]]",400.5
3,1.581,2025-01-02,Exch0,"[[1, 6]]","[[799, 1]]",400.0
4,1.643,2025-01-02,Exch0,"[[1, 6]]","[[798, 1]]",399.5
...,...,...,...,...,...,...
1037929,30599.418,2025-01-06,Exch0,"[[323, 2], [104, 3], [63, 1], [44, 6]]","[[338, 1], [343, 2], [507, 4], [659, 1], [749,...",330.5
1037930,30599.449,2025-01-06,Exch0,"[[323, 2], [99, 3], [63, 1], [44, 6]]","[[338, 1], [343, 2], [507, 4], [659, 1], [749,...",330.5
1037931,30599.635,2025-01-06,Exch0,"[[323, 2], [99, 3], [63, 1], [44, 6]]","[[338, 1], [341, 2], [507, 4], [659, 1], [749,...",330.5
1037932,30599.697,2025-01-06,Exch0,"[[323, 2], [249, 1], [99, 3], [44, 6]]","[[338, 1], [341, 2], [507, 4], [659, 1], [749,...",330.5


## Tick Time

In [18]:
df['Date'] = pd.to_datetime(df['Date'])

# Sort DataFrame by date
df = df.sort_values(['Date','Timestamp'])

# Calculate the difference between Timestamps (Tick Time) grouped by day 
df['Tick_Time'] = df.groupby('Date')['Timestamp'].diff()

df

Unnamed: 0,Timestamp,Date,Exchange,Bid,Ask,Mid_Price,Tick_Time
0,0.000,2025-01-02,Exch0,[],[],,
1,0.279,2025-01-02,Exch0,"[[1, 6]]",[],,0.279
2,1.333,2025-01-02,Exch0,"[[1, 6]]","[[800, 1]]",400.5,1.054
3,1.581,2025-01-02,Exch0,"[[1, 6]]","[[799, 1]]",400.0,0.248
4,1.643,2025-01-02,Exch0,"[[1, 6]]","[[798, 1]]",399.5,0.062
...,...,...,...,...,...,...,...
1037929,30599.418,2025-01-06,Exch0,"[[323, 2], [104, 3], [63, 1], [44, 6]]","[[338, 1], [343, 2], [507, 4], [659, 1], [749,...",330.5,0.031
1037930,30599.449,2025-01-06,Exch0,"[[323, 2], [99, 3], [63, 1], [44, 6]]","[[338, 1], [343, 2], [507, 4], [659, 1], [749,...",330.5,0.031
1037931,30599.635,2025-01-06,Exch0,"[[323, 2], [99, 3], [63, 1], [44, 6]]","[[338, 1], [341, 2], [507, 4], [659, 1], [749,...",330.5,0.186
1037932,30599.697,2025-01-06,Exch0,"[[323, 2], [249, 1], [99, 3], [44, 6]]","[[338, 1], [341, 2], [507, 4], [659, 1], [749,...",330.5,0.062
