In [40]:
import pandas as  pd

In [41]:
df = pd.read_csv('data/lob/full_lob_head.csv', index_col=0)
df

Unnamed: 0,Timestamp,Exchange,Order Type,Price,Quantity,Date
25781997,0.000,Exch0,Bid,,,2025-01-02
67020690,0.000,Exch0,Ask,,,2025-01-02
25781998,0.279,Exch0,Bid,1.0,6.0,2025-01-02
67020691,0.279,Exch0,Ask,,,2025-01-02
25781999,1.333,Exch0,Bid,1.0,6.0,2025-01-02
...,...,...,...,...,...,...
26123018,29573.938,Exch0,Bid,292.0,2.0,2025-01-02
26123018,29573.938,Exch0,Bid,291.0,9.0,2025-01-02
26123018,29573.938,Exch0,Bid,288.0,3.0,2025-01-02
26123018,29573.938,Exch0,Bid,287.0,4.0,2025-01-02


In [42]:
# 1. Filter out rows with NaN values in 'Price' and 'Quantity'
df_filtered = df.dropna(subset=['Price', 'Quantity'])

# 2. Calculate the best bid and ask prices and volumes
best_bids = df_filtered[df_filtered['Order Type'] == 'Bid'].groupby('Timestamp').agg({'Price': 'max', 'Quantity': 'sum'}).rename(columns={'Price': 'Best Bid Price', 'Quantity': 'Bid Volume at Level 1'})
best_asks = df_filtered[df_filtered['Order Type'] == 'Ask'].groupby('Timestamp').agg({'Price': 'min', 'Quantity': 'sum'}).rename(columns={'Price': 'Best Ask Price', 'Quantity': 'Ask Volume at Level 1'})

# 3. Merge best bids and asks to calculate mid-price and spread
df_merged = best_bids.merge(best_asks, left_index=True, right_index=True, how='outer')
df_merged['Mid-price'] = (df_merged['Best Bid Price'] + df_merged['Best Ask Price']) / 2
df_merged['Bid-Ask Spread'] = df_merged['Best Ask Price'] - df_merged['Best Bid Price']

# 4. Calculate order imbalance at level 1
df_merged['Order Imbalance Level 1'] = df_merged['Bid Volume at Level 1'] - df_merged['Ask Volume at Level 1']

# Merging the features back with the original DataFrame
df = df.merge(df_merged, left_on='Timestamp', right_index=True, how='left')

df

Unnamed: 0,Timestamp,Exchange,Order Type,Price,Quantity,Date,Best Bid Price,Bid Volume at Level 1,Best Ask Price,Ask Volume at Level 1,Mid-price,Bid-Ask Spread,Order Imbalance Level 1
25781997,0.000,Exch0,Bid,,,2025-01-02,,,,,,,
67020690,0.000,Exch0,Ask,,,2025-01-02,,,,,,,
25781998,0.279,Exch0,Bid,1.0,6.0,2025-01-02,1.0,6.0,,,,,
67020691,0.279,Exch0,Ask,,,2025-01-02,1.0,6.0,,,,,
25781999,1.333,Exch0,Bid,1.0,6.0,2025-01-02,1.0,6.0,800.0,1.0,400.5,799.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26123018,29573.938,Exch0,Bid,292.0,2.0,2025-01-02,295.0,20.0,,,,,
26123018,29573.938,Exch0,Bid,291.0,9.0,2025-01-02,295.0,20.0,,,,,
26123018,29573.938,Exch0,Bid,288.0,3.0,2025-01-02,295.0,20.0,,,,,
26123018,29573.938,Exch0,Bid,287.0,4.0,2025-01-02,295.0,20.0,,,,,
