In [1]:
import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
import polars as pl


project_root = Path().resolve().parent
sys.path.insert(0, str(project_root / "src"))

from dspy.hdb import get_dataset
from dspy.sim.market_simulator import MarketSimulator
from dspy.utils import to_ns, ts_to_str
from dspy.features.feature_utils import apply_batch_features, extract_features , flatten_features
from dspy.agents.agent_utils import get_agent
from dspy.features.utils import get_products
from dspy.utils import add_ts_dt

# ---------- Load run config file ----------

def load_config(path: Path) -> dict:
    with open(path, "r") as f:
        return json.load(f)

config_path = project_root / "run/run_config.json"
config = load_config(config_path)

dataset_name     = config["dataset"]
product          = config["product"]
depth            = config["depth"]
latency_ns       = config["latency_micros"] * 1_000
max_inventory    = config["max_inventory"]
inv_penalty      = config["inventory_penalty"]
initial_cash     = config["initial_cash"]
agent_config     = config["agent"]
intervals        = config["intervals"]
min_order_size   = config["min_order_size"]
tick_size        = config["tick_size"]
initial_cash     = config["initial_cash"]
cost_in_bps      = config["cost_in_bps"]
fixed_cost       = config["fixed_cost"]
simulator_mode   = config["simulator_mode"]

loader = get_dataset(dataset_name)
all_books, all_ts = [], []
feature_path = project_root / "run/features.json"
feature_config = load_config(feature_path)
inventory_feature_flag = "inventory" in feature_config.keys()

#loaddataframe
for interval in intervals:
        start_str = interval["start"]
        end_str   = interval["end"]
        print('dataframe from:', start_str,'to:',end_str)

        start_ts = datetime.strptime(interval["start"], "%Y-%m-%d %H:%M:%S").strftime("%y%m%d.%H%M%S")
        end_ts   = datetime.strptime(interval["end"],   "%Y-%m-%d %H:%M:%S").strftime("%y%m%d.%H%M%S")

        df = loader.load_book(
            product=product,
            times=[start_ts, end_ts],
            depth=depth,
            type="book_snapshot_25",
            lazy=False
        )
# from dspy.features.book_features import add_mid,add_vwap
# df=add_mid(df)
# df.head()

dataframe from: 2025-04-01 00:00:00 to: 2025-04-01 00:00:30


In [2]:
df, feature_cols = apply_batch_features(df, feature_config)
df

realized_vol_200ms_vwap_level1,realized_vol_100ms_mid,ts,ts_local,asks[0].price,asks[0].amount,bids[0].price,bids[0].amount,asks[1].price,asks[1].amount,bids[1].price,bids[1].amount,asks[2].price,asks[2].amount,bids[2].price,bids[2].amount,asks[3].price,asks[3].amount,bids[3].price,bids[3].amount,asks[4].price,asks[4].amount,bids[4].price,bids[4].amount,mid,spread,rel_return_mid,lob_price_level_bid_1,lob_price_level_ask_1,book_imbalance_5,vwap_level2,cross_vwap_level3,mid_ret_t5,vwap_level1,realized_vol_50_ticks_mid,ret_50ms_mid,zscore_500ms_mid,zscore_t50_mid,avg_100ms_mid,ts_dt,std_100ms_mid,avg_t50_mid,std_t50_mid
f64,f64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[ns],f64,f64,f64
6.5732e-9,0.0,1743465615902000000,1743465615904269000,82446.5,6.464,82446.4,6.04,82446.6,0.002,82446.3,0.012,82446.9,0.002,82446.2,0.002,82447.1,0.24,82446.1,0.019,82447.2,0.03,82446.0,0.072,82446.45,0.1,0.0,82446.4,82446.5,-0.04603,82446.451574,82446.448459,0.0,82446.451695,0.000014,0.0,0.0,-0.863894,82446.45,2025-04-01 00:00:15.902,0.0,82450.39,4.560746
3.7115e-9,0.0,1743465615955000000,1743465615957469000,82446.5,6.592,82446.4,6.033,82446.6,0.002,82446.3,0.012,82446.9,0.002,82446.2,0.002,82447.1,0.24,82446.1,0.019,82447.2,0.03,82446.0,0.072,82446.45,0.1,0.0,82446.4,82446.5,-0.055983,82446.452093,82446.44794,0.0,82446.452214,0.000014,0.0,0.0,-0.831865,82446.45,2025-04-01 00:00:15.955,0.0,82450.258,4.577667
1.0042e-8,0.0,1743465616268000000,1743465616270272000,82446.5,7.01,82446.4,6.055,82446.6,0.002,82446.3,0.012,82446.9,0.002,82446.2,0.002,82447.1,0.24,82446.1,0.019,82447.2,0.03,82446.0,0.072,82446.45,0.1,0.0,82446.4,82446.5,-0.083606,82446.453536,82446.446495,0.0,82446.453655,0.000014,0.0,0.0,-0.770478,82446.45,2025-04-01 00:00:16.268,0.0,82450.126,4.590654
2.3521e-7,0.0,1743465616472000000,1743465616473916000,82446.5,0.677,82446.4,8.276,82446.9,0.002,82446.3,0.012,82447.2,0.03,82446.2,0.199,82447.4,0.002,82446.1,0.498,82447.7,0.002,82446.0,0.072,82446.45,0.1,0.0,82446.4,82446.5,0.854043,82446.407528,82446.507286,0.0,82446.407562,0.000013,0.0,0.0,-0.706194,82446.45,2025-04-01 00:00:16.472,0.0,82449.994,4.599741
2.3150e-7,0.0,1743465616523000000,1743465616525912000,82446.5,0.677,82446.4,8.337,82446.9,0.002,82446.3,0.012,82447.2,0.03,82446.2,0.199,82447.4,0.002,82446.1,0.498,82447.7,0.002,82446.0,0.072,82446.45,0.1,0.0,82446.4,82446.5,0.854949,82446.407477,82446.507238,0.0,82446.407511,0.000013,0.0,0.0,-0.674848,82446.45,2025-04-01 00:00:16.523,0.0,82449.802,4.54179
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
3.7290e-8,0.0,1743465629790000000,1743465629792324000,82489.7,0.178,82489.6,17.67,82489.8,0.002,82489.5,0.008,82490.3,0.002,82489.2,0.048,82491.1,0.002,82489.1,0.002,82491.4,0.002,82488.9,0.002,82489.65,0.1,0.0,82489.6,82489.7,0.979236,82489.600974,82489.700581,0.0,82489.600997,0.000013,0.0,0.0,0.556097,82489.65,2025-04-01 00:00:29.790,0.0,82487.532,3.610737
3.6254e-8,0.0,1743465629842000000,1743465629845260000,82489.7,0.178,82489.6,17.75,82489.8,0.002,82489.5,0.008,82490.3,0.002,82489.2,0.048,82491.1,0.002,82489.1,0.002,82491.4,0.002,82488.9,0.002,82489.65,0.1,0.0,82489.6,82489.7,0.979329,82489.60097,82489.700578,0.0,82489.600993,0.000013,0.0,0.0,0.525541,82489.65,2025-04-01 00:00:29.842,0.0,82487.696,3.513775
2.2150e-8,0.0,1743465629893000000,1743465629895869000,82489.7,0.18,82489.6,17.746,82489.8,0.002,82489.5,0.008,82490.3,0.002,82489.2,0.048,82491.1,0.002,82489.1,0.002,82491.4,0.002,82488.9,0.002,82489.65,0.1,0.0,82489.6,82489.7,0.979104,82489.600981,82489.700567,0.0,82489.601004,0.000013,0.0,0.0,0.494768,82489.65,2025-04-01 00:00:29.893,0.0,82487.86,3.406012
0.000039,0.000056,1743465629946000000,1743465629948836000,82496.2,1.178,82496.1,19.061,82496.4,0.002,82496.0,1.545,82496.6,0.043,82495.9,0.003,82496.9,0.002,82495.7,0.002,82497.2,0.002,82495.6,0.124,82496.15,0.1,0.000079,82496.1,82496.2,0.888262,82496.098343,82496.208204,0.000079,82496.10582,0.000017,0.000079,2.041241,2.342588,82492.9,2025-04-01 00:00:29.946,4.596194,82488.154,3.475157


In [3]:
#df.filter(df["realized_vol_100ms_mid_ret_t5"].is_nan())

In [3]:
# Only check float columns
columns_with_nan = [
    col for col, dtype in zip(df.columns, df.schema.values())
    if dtype in [pl.Float32, pl.Float64]
    and df.select(pl.col(col).is_nan().any()).item()
]

print("Columns with NaN:", columns_with_nan)

Columns with NaN: []


In [8]:
[line for line in df.columns if 'ret_t' in line]

['realized_vol_100ms_mid_ret_t5', 'mid_ret_t5']

In [9]:
df ['mid_ret_t5']

mid_ret_t5
f64
-0.000539
-0.000543
-0.00053
0.000012
0.000007
…
0.0
0.0
0.0
0.000079


In [15]:
import numpy as np
arr=[82517.45,82517.45,82517.45,82517.45,82525.75]
arr=np.array(arr)
(82525.75-np.mean(arr))/np.std(arr,ddof=1)
np.std(arr,ddof=1)
np.mean(arr)

np.float64(82519.11)

In [12]:
c=82526.310427
p=82525.706949
(c-p)/p

7.312606244954507e-06

In [8]:
aq=5.192+0.79
bq=10.116+0.016	
(bq-aq)/(bq+aq)

0.2575400273054486

In [5]:
aq=5.192
ap=82517.5
bq=10.116
bp=82517.4
(bp*aq+ap*bq)/(bq+aq)

82517.46608309382

In [6]:
(bp*bq+ap*aq)/(bq+aq)

82517.43391690619