In [31]:
import pandas as  pd

In [32]:
df = pd.read_csv('data/lob/full_lob_head.csv', index_col=0)
df

Unnamed: 0,Timestamp,Exchange,Order Type,Price,Quantity,Date
25781997,0.000,Exch0,Bid,,,2025-01-02
67020690,0.000,Exch0,Ask,,,2025-01-02
25781998,0.279,Exch0,Bid,1.0,6.0,2025-01-02
67020691,0.279,Exch0,Ask,,,2025-01-02
25781999,1.333,Exch0,Bid,1.0,6.0,2025-01-02
...,...,...,...,...,...,...
26123018,29573.938,Exch0,Bid,292.0,2.0,2025-01-02
26123018,29573.938,Exch0,Bid,291.0,9.0,2025-01-02
26123018,29573.938,Exch0,Bid,288.0,3.0,2025-01-02
26123018,29573.938,Exch0,Bid,287.0,4.0,2025-01-02


In [33]:

df.sort_values(by=['Date','Timestamp'])

# Convert 'Timestamp' to a timedelta and add it to 'Date' to get a datetime
df['Datetime'] = pd.to_datetime(df['Date']) + pd.to_timedelta(df['Timestamp'], unit='s')

df

Unnamed: 0,Timestamp,Exchange,Order Type,Price,Quantity,Date,Datetime
25781997,0.000,Exch0,Bid,,,2025-01-02,2025-01-02 00:00:00.000
67020690,0.000,Exch0,Ask,,,2025-01-02,2025-01-02 00:00:00.000
25781998,0.279,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:00.279
67020691,0.279,Exch0,Ask,,,2025-01-02,2025-01-02 00:00:00.279
25781999,1.333,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:01.333
...,...,...,...,...,...,...,...
26123018,29573.938,Exch0,Bid,292.0,2.0,2025-01-02,2025-01-02 08:12:53.938
26123018,29573.938,Exch0,Bid,291.0,9.0,2025-01-02,2025-01-02 08:12:53.938
26123018,29573.938,Exch0,Bid,288.0,3.0,2025-01-02,2025-01-02 08:12:53.938
26123018,29573.938,Exch0,Bid,287.0,4.0,2025-01-02,2025-01-02 08:12:53.938


In [34]:
def calculate_levels(df):
    # Filter out rows where 'Price' and 'Quantity' are NaN
    df = df.dropna(subset=['Price', 'Quantity'])

    # Separate into bids and asks
    bids = df[df['Order Type'] == 'Bid']
    asks = df[df['Order Type'] == 'Ask']

    # Sort bids and asks
    bids = bids.sort_values(by='Price', ascending=False)
    asks = asks.sort_values(by='Price')

    # Assign levels
    bids['Level'] = bids.groupby(['Timestamp', 'Exchange', 'Date']).cumcount() + 1
    asks['Level'] = asks.groupby(['Timestamp', 'Exchange', 'Date']).cumcount() + 1

    # Calculate levels
    bids['Level'] = bids.index + 1
    asks['Level'] = asks.index + 1

    # Combine bids and asks back together and sort by original order if needed
    df = pd.concat([bids, asks]).sort_index()

    return df

df = calculate_levels(df)
df

Unnamed: 0,Timestamp,Exchange,Order Type,Price,Quantity,Date,Datetime,Level
25781998,0.279,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:00.279,25781999
25781999,1.333,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:01.333,25782000
25782000,1.581,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:01.581,25782001
25782001,1.643,Exch0,Bid,1.0,6.0,2025-01-02,2025-01-02 00:00:01.643,25782002
25782002,1.736,Exch0,Bid,261.0,1.0,2025-01-02,2025-01-02 00:00:01.736,25782003
...,...,...,...,...,...,...,...,...
67361710,29573.907,Exch0,Ask,556.0,2.0,2025-01-02,2025-01-02 08:12:53.907,67361711
67361710,29573.907,Exch0,Ask,303.0,1.0,2025-01-02,2025-01-02 08:12:53.907,67361711
67361710,29573.907,Exch0,Ask,780.0,3.0,2025-01-02,2025-01-02 08:12:53.907,67361711
67361710,29573.907,Exch0,Ask,503.0,5.0,2025-01-02,2025-01-02 08:12:53.907,67361711
