In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt

In [3]:
from dspy.hdb import get_dataset

In [None]:
RCS = [f"{coin}USDT" for coin in ["BTC", "ETH", "XRP", "SOL", "DOGE"]]
TIMES = ['250601.000100', '250720.215000']

# <font color="grey">Data loading and handling</font>

Data is available in two forms: limit order book (LOB) and data sampled at fixed intervals (for example, seconds). The timestamps are given in nanosecond resolution as Unix timestamps, but the granularity of the data is not that fine. A simple dataloader and some helper function to convert Python datetime objects or strings of the form '240802.1450' into timestamps are provided. The 'book' data contains the bid and ask prices and volumes up to a specified depth (the maximum depth is currently 25).

In [19]:
# At the moment, we are only concerned with data available through the Tardis dataset.
dl = get_dataset("tardis")

In [None]:
tdf = dl.load_trades(["BTCUSDT", "ETHUSDT", "SOLUSDT", "XRPUSDT", "DOGEUSDT"], TIMES)
tdf.head()

In [21]:
tdf.head()

ts,ts_local,product,id,side,price,vol
i64,i64,str,i64,i32,f64,f64
1749945660170000000,1749945660172614000,"""BTCUSDT""",6398513364,-1,105378.5,0.004
1749945660209000000,1749945660212434000,"""BTCUSDT""",6398513365,-1,105378.5,0.004
1749945660377000000,1749945660380384000,"""BTCUSDT""",6398513366,1,105378.6,0.006
1749945660617000000,1749945660619750000,"""BTCUSDT""",6398513367,1,105378.6,0.005
1749945660634000000,1749945660637283000,"""BTCUSDT""",6398513368,1,105378.6,0.004


In [None]:
ts_unique = tdf.n_unique(subset=["ts"])
ts_side_price_unique =
tdf.n_unique(subset=["ts", "side", "price"])

  identical_duplicates = (ts_unique ==
  ts_side_price_unique)
  print(f"All duplicate timestamps have 
  identical side+price: 
  {identical_duplicates}")

All timestamps unique: True


####  <a id='chapter1'> <font color="grey">1. Limit Order Book</font></a>

In [11]:
df = dl.load_book("BTCUSDT", TIMES, depth=10)

In [12]:
df.shape

(14105160, 42)

In [13]:
# Add human readable timestamp and mid prices
df = df.ds.add_datetime('ts')
df.head()

ts,ts_local,asks[0].price,asks[0].amount,bids[0].price,bids[0].amount,asks[1].price,asks[1].amount,bids[1].price,bids[1].amount,asks[2].price,asks[2].amount,bids[2].price,bids[2].amount,asks[3].price,asks[3].amount,bids[3].price,bids[3].amount,asks[4].price,asks[4].amount,bids[4].price,bids[4].amount,asks[5].price,asks[5].amount,bids[5].price,bids[5].amount,asks[6].price,asks[6].amount,bids[6].price,bids[6].amount,asks[7].price,asks[7].amount,bids[7].price,bids[7].amount,asks[8].price,asks[8].amount,bids[8].price,bids[8].amount,asks[9].price,asks[9].amount,bids[9].price,bids[9].amount,dts
i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns]
1749945660012000000,1749945660013768000,105378.6,16.907,105378.5,8.632,105378.7,0.514,105378.4,0.171,105378.8,0.515,105378.3,1.398,105378.9,0.514,105378.2,0.002,105379.0,0.559,105378.0,0.188,105379.1,0.012,105377.9,0.002,105379.2,0.03,105377.5,0.005,105379.3,0.015,105377.4,2.664,105379.4,0.021,105377.3,0.1,105379.7,0.743,105376.4,0.072,2025-06-15 00:01:00.012
1749945660064000000,1749945660066738000,105378.6,16.926,105378.5,8.629,105378.7,0.514,105378.4,0.171,105378.8,0.515,105378.3,1.398,105378.9,0.514,105378.2,0.002,105379.0,0.559,105378.0,0.188,105379.1,0.012,105377.9,0.002,105379.2,0.03,105377.5,0.005,105379.3,0.015,105377.4,2.664,105379.4,0.021,105377.3,0.1,105379.7,0.743,105376.4,0.072,2025-06-15 00:01:00.064
1749945660116000000,1749945660118072000,105378.6,17.202,105378.5,8.63,105378.7,0.514,105378.4,0.171,105378.8,0.515,105378.3,1.398,105378.9,0.512,105378.2,0.002,105379.0,0.557,105378.0,0.188,105379.1,0.012,105377.9,0.002,105379.2,0.03,105377.5,0.005,105379.3,0.015,105377.4,2.664,105379.4,0.021,105377.3,0.1,105379.7,0.743,105376.4,0.072,2025-06-15 00:01:00.116
1749945660169000000,1749945660171689000,105378.6,17.265,105378.5,8.63,105378.7,0.514,105378.4,0.171,105378.8,0.515,105378.3,1.398,105378.9,0.514,105378.2,0.002,105379.0,0.559,105378.0,0.188,105379.1,0.012,105377.9,0.002,105379.2,0.03,105377.5,0.005,105379.3,0.015,105377.4,2.664,105379.4,0.021,105377.3,0.1,105379.7,0.743,105376.4,0.072,2025-06-15 00:01:00.169
1749945660220000000,1749945660223083000,105378.6,17.006,105378.5,8.622,105378.7,0.514,105378.4,0.171,105378.8,0.515,105378.3,1.398,105378.9,0.514,105378.2,0.002,105379.0,0.559,105378.0,0.188,105379.1,0.012,105377.9,0.002,105379.2,0.03,105377.5,0.005,105379.3,0.015,105377.4,2.664,105379.4,0.021,105377.3,0.1,105379.7,0.743,105376.4,0.072,2025-06-15 00:01:00.220


In [None]:
plt.plot(df['dts'], df['asks[0].price'], df['dts'], df['bids[0].price'])
plt.xlabel('Time')
plt.ylabel('Bid and ask prices')
plt.title(f'{RCS[0]}')
plt.show()

In [None]:
df = dl.load_book("BTCUSDT", ["250605.120000", "250605.130000"], depth=1)
df = df.ds.add_datetime('ts')
fig, ax = plt.subplots(1,2, figsize=(12,6))
ax[0].plot(df['dts'], df['asks[0].price'], df['dts'], df['bids[0].price'])
ax[1].plot(df['dts'], df['asks[0].price'] - df['bids[0].price'])
ax[0].set_xlabel('Time')
ax[0].set_ylabel('Prices')
ax[1].set_xlabel('Time')
ax[1].set_ylabel('Bid-ask spread')
plt.show()

####  <a id='chapter2'> <font color="grey">2. Fixed-frequency data (bars)</font></a>

In [None]:
df = dl.load_bar(["BTCUSDT", "ETHUSDT"], ["250605.120000", "250605.150000"], col="mid", freq="1s")

In [None]:
df.head()

In [None]:
plt.plot(df['ts'], df['mid_BTCUSDT'])
plt.show()