In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib.pyplot as plt # type: ignore
import numpy as np
import polars as pl

In [4]:
from dspy.hdb import get_dataset

In [22]:
RCS = [f"{coin}USDT" for coin in ["BTC", "ETH", "SOL", "DOGE", "LINK"]]
TIMES = ['250401.000100', '250430.215000']

# <font color="grey">Data loading and handling</font>

Data is available in two forms: limit order book (LOB) and data sampled at fixed intervals (for example, seconds). The timestamps are given in nanosecond resolution as Unix timestamps, but the granularity of the data is not that fine. A simple dataloader and some helper function to convert Python datetime objects or strings of the form '240802.1450' into timestamps are provided. The 'book' data contains the bid and ask prices and volumes up to a specified depth (the maximum depth is currently 25).

In [None]:
# At the moment, we are only concerned with data available through the Tardis dataset.
dl = get_dataset("tardis")

####  <a id='chapter1'> <font color="grey">1. Limit Order Book</font></a>

In [None]:
df = dl.load_book("BTCUSDT", TIMES, depth=10)

In [16]:
# Add human readable timestamp and mid prices
df = df.ds.add_datetime('ts')
df.head()

ts,ts_local,asks[0].price,asks[0].amount,bids[0].price,bids[0].amount,asks[1].price,asks[1].amount,bids[1].price,bids[1].amount,asks[2].price,asks[2].amount,bids[2].price,bids[2].amount,asks[3].price,asks[3].amount,bids[3].price,bids[3].amount,asks[4].price,asks[4].amount,bids[4].price,bids[4].amount,asks[5].price,asks[5].amount,bids[5].price,bids[5].amount,asks[6].price,asks[6].amount,bids[6].price,bids[6].amount,asks[7].price,asks[7].amount,bids[7].price,bids[7].amount,asks[8].price,asks[8].amount,bids[8].price,bids[8].amount,asks[9].price,asks[9].amount,bids[9].price,bids[9].amount,dts
i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns]
1748822460029000000,1748822460032326000,105554.9,9.979,105554.8,4.289,105555.0,0.016,105554.7,0.002,105555.1,0.002,105554.3,0.001,105555.4,0.006,105554.0,0.002,105555.8,0.001,105553.9,0.001,105555.9,0.003,105553.7,0.002,105556.2,0.001,105553.5,0.001,105556.3,0.172,105553.1,0.001,105556.4,0.04,105553.0,0.04,105556.5,0.027,105552.9,0.01,2025-06-02 00:01:00.029
1748822460132000000,1748822460135215000,105554.9,9.979,105554.8,4.288,105555.0,0.016,105554.7,0.002,105555.1,0.002,105554.3,0.001,105555.4,0.006,105554.0,0.002,105555.8,0.001,105553.9,0.001,105555.9,0.003,105553.7,0.002,105556.2,0.001,105553.5,0.001,105556.3,0.172,105553.1,0.001,105556.4,0.04,105553.0,0.04,105556.5,0.027,105552.9,0.01,2025-06-02 00:01:00.132
1748822460185000000,1748822460187873000,105554.9,9.851,105554.8,4.286,105555.0,0.016,105554.7,0.002,105555.1,0.002,105554.3,0.001,105555.4,0.006,105554.0,0.002,105555.8,0.001,105553.9,0.001,105555.9,0.003,105553.7,0.002,105556.2,0.001,105553.5,0.001,105556.3,0.172,105553.1,0.001,105556.4,0.04,105553.0,0.04,105556.5,0.027,105552.9,0.01,2025-06-02 00:01:00.185
1748822460237000000,1748822460240149000,105554.9,9.851,105554.8,4.293,105555.0,0.016,105554.7,0.002,105555.1,0.002,105554.3,0.001,105555.4,0.006,105554.0,0.002,105555.8,0.001,105553.9,0.001,105555.9,0.003,105553.7,0.002,105556.2,0.001,105553.5,0.001,105556.3,0.172,105553.1,0.001,105556.4,0.04,105553.0,0.04,105556.5,0.027,105552.9,0.01,2025-06-02 00:01:00.237
1748822460288000000,1748822460291489000,105554.9,9.842,105554.8,4.198,105555.0,0.016,105554.7,0.002,105555.1,0.002,105554.3,0.001,105555.4,0.006,105554.0,0.002,105555.8,0.001,105553.9,0.001,105555.9,0.003,105553.7,0.002,105556.2,0.001,105553.5,0.001,105556.3,0.172,105553.1,0.001,105556.4,0.04,105553.0,0.04,105556.5,0.027,105552.9,0.01,2025-06-02 00:01:00.288


In [None]:
plt.plot(df['dts'], df[f'mid_{RCS[0]}'])
plt.xlabel('Time')
plt.ylabel('Mid price')
plt.title(f'{RCS[0]}')
plt.show()

In [None]:
# Add a column with random +1 or -1 entries
df = df.with_columns(
    pl.lit(np.random.choice([1., -1.], size=len(df))).alias('random_signal')
)

In [None]:
pdf = df.target.add_sig_pnl(ts_col="ts", col="mid_ETHUSDT", signal="random_signal", horizon="1m", in_bp=False)
pdf.head()

####  <a id='chapter2'> <font color="grey">2. Fixed-frequency data (bars)</font></a>