In [2]:
import pandas as pd
import plotly.graph_objects as go
import re

In [3]:
def prices(data):
    price_pattern = re.compile(r"price: (\d+\.\d+)")
    return [float(price) for price in price_pattern.findall(data)]

df = pd.read_parquet('orderbook.parquet', engine='pyarrow')
df['buy_prices'] = df['buys'].apply(prices)
df['sell_prices'] = df['sells'].apply(prices)
df['trade_prices'] = df['trades'].apply(prices)
df['date'] = pd.to_datetime(df['exchange_ts'], unit='ns')
#df.drop(columns=['properties', 'buys', 'sells', 'trades', 'healthy', 'stale', 'ignore'], inplace=True)
df.head()

Unnamed: 0,exchange_id,symbol_id,buys,sells,tob_0_price,tob_0_quantity,tob_1_price,tob_1_quantity,trades,properties,healthy,stale,ignore,exchange_ts,internal_ts,buy_prices,sell_prices,trade_prices,date
0,100,200,"[PriceLevel { price: 49.037699054311574, quant...","[PriceLevel { price: 50.448932183645724, quant...",49.037699,7.797284,50.448932,1.623753,"[PriceLevel { price: 49.45948149065878, quanti...","{""service"": ""test""}",False,False,False,1726825523744527000,1726825523744606924,"[49.037699054311574, 48.46135216985009, 48.357...","[50.448932183645724, 50.331930340975795, 52.64...","[49.45948149065878, 49.626299181401556, 50.082...",2024-09-20 09:45:23.744527000
1,100,200,"[PriceLevel { price: 49.59945442868031, quanti...","[PriceLevel { price: 49.59945442868031, quanti...",49.599454,7.797284,49.599454,1.623753,"[PriceLevel { price: 49.59945442868031, quanti...","{""service"": ""test""}",False,False,False,1726825523744535171,1726825523744590584,"[49.59945442868031, 49.329144703467975, 49.292...","[49.59945442868031, 50.04542334832386, 50.1403...","[49.59945442868031, 49.59945442868031, 49.5994...",2024-09-20 09:45:23.744535171
2,100,200,"[PriceLevel { price: 49.23123888303407, quanti...","[PriceLevel { price: 49.23123888303407, quanti...",49.231239,7.797284,49.231239,1.623753,"[PriceLevel { price: 49.23123888303407, quanti...","{""service"": ""test""}",False,False,False,1726825523744543174,1726825523744608149,"[49.23123888303407, 48.916968526779414, 48.864...","[49.23123888303407, 50.13332660113922, 49.9438...",[49.23123888303407],2024-09-20 09:45:23.744543174
3,100,200,"[PriceLevel { price: 49.35546543183609, quanti...","[PriceLevel { price: 49.35546543183609, quanti...",49.355465,7.797284,49.355465,1.623753,"[PriceLevel { price: 49.35546543183609, quanti...","{""service"": ""test""}",False,False,False,1726825523744548987,1726825523744646105,"[49.35546543183609, 48.797564018314006, 47.986...","[49.35546543183609, 50.085320926410475, 49.991...","[49.35546543183609, 49.35546543183609, 49.3554...",2024-09-20 09:45:23.744548987
4,100,200,"[PriceLevel { price: 49.54203991958444, quanti...","[PriceLevel { price: 49.54203991958444, quanti...",49.54204,7.797284,49.54204,1.623753,"[PriceLevel { price: 49.54203991958444, quanti...","{""service"": ""test""}",False,False,False,1726825523744554647,1726825523744565519,"[49.54203991958444, 48.729483672787296, 47.702...","[49.54203991958444, 50.46875317030652, 51.2605...","[49.54203991958444, 49.54203991958444, 49.5420...",2024-09-20 09:45:23.744554647


In [4]:
def hlc(df):
    trade_prices = df["trade_prices"]
    if not trade_prices:
        return None, None, None, None
    
    high = max(trade_prices)
    low = min(trade_prices)
    close = trade_prices[-1]
    return pd.Series([None, high, low, close])

df[['open', 'high', 'low', 'close']] = df.apply(hlc, axis = 1)

df.at[0, 'open'] = df['trade_prices'].iloc[0][0]
for i in range(1, len(df)):
    df.at[i, 'open'] = df.at[i-1, 'close']

df.head()

Unnamed: 0,exchange_id,symbol_id,buys,sells,tob_0_price,tob_0_quantity,tob_1_price,tob_1_quantity,trades,properties,...,exchange_ts,internal_ts,buy_prices,sell_prices,trade_prices,date,open,high,low,close
0,100,200,"[PriceLevel { price: 49.037699054311574, quant...","[PriceLevel { price: 50.448932183645724, quant...",49.037699,7.797284,50.448932,1.623753,"[PriceLevel { price: 49.45948149065878, quanti...","{""service"": ""test""}",...,1726825523744527000,1726825523744606924,"[49.037699054311574, 48.46135216985009, 48.357...","[50.448932183645724, 50.331930340975795, 52.64...","[49.45948149065878, 49.626299181401556, 50.082...",2024-09-20 09:45:23.744527000,49.459481,50.082337,49.438125,49.438125
1,100,200,"[PriceLevel { price: 49.59945442868031, quanti...","[PriceLevel { price: 49.59945442868031, quanti...",49.599454,7.797284,49.599454,1.623753,"[PriceLevel { price: 49.59945442868031, quanti...","{""service"": ""test""}",...,1726825523744535171,1726825523744590584,"[49.59945442868031, 49.329144703467975, 49.292...","[49.59945442868031, 50.04542334832386, 50.1403...","[49.59945442868031, 49.59945442868031, 49.5994...",2024-09-20 09:45:23.744535171,49.438125,49.599454,49.599454,49.599454
2,100,200,"[PriceLevel { price: 49.23123888303407, quanti...","[PriceLevel { price: 49.23123888303407, quanti...",49.231239,7.797284,49.231239,1.623753,"[PriceLevel { price: 49.23123888303407, quanti...","{""service"": ""test""}",...,1726825523744543174,1726825523744608149,"[49.23123888303407, 48.916968526779414, 48.864...","[49.23123888303407, 50.13332660113922, 49.9438...",[49.23123888303407],2024-09-20 09:45:23.744543174,49.599454,49.231239,49.231239,49.231239
3,100,200,"[PriceLevel { price: 49.35546543183609, quanti...","[PriceLevel { price: 49.35546543183609, quanti...",49.355465,7.797284,49.355465,1.623753,"[PriceLevel { price: 49.35546543183609, quanti...","{""service"": ""test""}",...,1726825523744548987,1726825523744646105,"[49.35546543183609, 48.797564018314006, 47.986...","[49.35546543183609, 50.085320926410475, 49.991...","[49.35546543183609, 49.35546543183609, 49.3554...",2024-09-20 09:45:23.744548987,49.231239,49.355465,49.355465,49.355465
4,100,200,"[PriceLevel { price: 49.54203991958444, quanti...","[PriceLevel { price: 49.54203991958444, quanti...",49.54204,7.797284,49.54204,1.623753,"[PriceLevel { price: 49.54203991958444, quanti...","{""service"": ""test""}",...,1726825523744554647,1726825523744565519,"[49.54203991958444, 48.729483672787296, 47.702...","[49.54203991958444, 50.46875317030652, 51.2605...","[49.54203991958444, 49.54203991958444, 49.5420...",2024-09-20 09:45:23.744554647,49.355465,49.54204,49.54204,49.54204


In [5]:
fig = go.Figure(data=[go.Candlestick(x=df['date'],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'])])

fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()