# Step 4 / Point 2 — Extract + Snapshot

This notebook snapshots ClickHouse datasets into versioned Parquet files + a `manifest.json` for reproducible research.

- Source view: `polybot.user_trade_research`
- Optional: `polybot.market_trades`, `polybot.clob_tob`

Output: `research/data/snapshots/<snapshot_id>/...`


In [1]:
username = "gabagool22"

# Optional filters (ISO strings) — keep None to snapshot everything we have.
start_ts = None  # e.g. "2025-12-14T00:00:00Z"
end_ts = None    # e.g. "2025-12-15T00:00:00Z"

margin_minutes = 15
include_market_trades = True
include_clob_tob = True


In [2]:
import os
import sys
from pathlib import Path


def _find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "research" / "snapshot.py").exists():
            return p
    raise RuntimeError(
        "Cannot locate repo root (expected to find `research/snapshot.py`). "
        "Start Jupyter from the repo root or set REPO_ROOT=/path/to/polybot."
    )


repo_root_env = os.getenv("REPO_ROOT")
repo_root = Path(repo_root_env) if repo_root_env else _find_repo_root(Path.cwd())
sys.path.insert(0, str(repo_root))

from research.snapshot import snapshot_user

print("repo_root:", repo_root)


repo_root: /Users/antoniostano/programming/polybot


In [3]:
result = snapshot_user(
    username=username,
    start_ts=start_ts,
    end_ts=end_ts,
    margin_minutes=margin_minutes,
    include_market_trades=include_market_trades,
    include_clob_tob=include_clob_tob,
)
result

{'snapshot_dir': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000',
 'manifest_path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/manifest.json',
 'outputs': {'trades': {'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/trades.parquet',
   'rows': 20877,
   'min_ts': '2025-12-14 11:45:37',
   'max_ts': '2025-12-16 20:51:17'},
  'market_trades': {'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/market_trades.parquet',
   'rows': 19193},
  'clob_tob': {'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/clob_tob.parquet',
   'rows': 16438}}}

In [4]:
import json

manifest_path = result["manifest_path"]
manifest = json.loads(Path(manifest_path).read_text())
manifest["snapshot_id"], manifest["outputs"]

('gabagool22-20251216T205220+0000',
 {'clob_tob': {'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/clob_tob.parquet',
   'rows': 16438},
  'market_trades': {'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/market_trades.parquet',
   'rows': 19193},
  'trades': {'max_ts': '2025-12-16 20:51:17',
   'min_ts': '2025-12-14 11:45:37',
   'path': '/Users/antoniostano/programming/polybot/research/data/snapshots/gabagool22-20251216T205220+0000/trades.parquet',
   'rows': 20877}})

In [5]:
import pandas as pd

trades = pd.read_parquet(Path(result["snapshot_dir"]) / "trades.parquet")
trades.head()

Unnamed: 0,ts,day,hour_utc,username,proxy_address,market_slug,title,token_id,condition_id,side,...,best_ask_price,best_ask_size,mid,spread,price_minus_mid,tob_imbalance,exec_type,edge_vs_mid,effective_spread,effective_spread_ratio
0,2025-12-14 11:45:37,2025-12-14,11,gabagool22,0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d,btc-updown-15m-1765712700,"Bitcoin Up or Down - December 14, 6:45AM-7:00A...",9749576552879489345227050541994885550913688913...,0xe8b7e75fcffae402d260670fd0fb0114578a5b7fff88...,BUY,...,0.0,0.0,0.0,0.0,0.53,,UNKNOWN,,,
1,2025-12-14 11:45:37,2025-12-14,11,gabagool22,0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d,eth-updown-15m-1765712700,"Ethereum Up or Down - December 14, 6:45AM-7:00...",4639703364689995573571175229848151106931398707...,0xba7dfa2353cc0480a125185ffc2fac3d8c33474a960f...,BUY,...,0.0,0.0,0.0,0.0,0.23,,UNKNOWN,,,
2,2025-12-14 11:45:37,2025-12-14,11,gabagool22,0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d,btc-updown-15m-1765712700,"Bitcoin Up or Down - December 14, 6:45AM-7:00A...",5108999353819477633166653263465403529775473065...,0xe8b7e75fcffae402d260670fd0fb0114578a5b7fff88...,BUY,...,0.0,0.0,0.0,0.0,0.57,,UNKNOWN,,,
3,2025-12-14 11:45:37,2025-12-14,11,gabagool22,0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d,btc-updown-15m-1765712700,"Bitcoin Up or Down - December 14, 6:45AM-7:00A...",5108999353819477633166653263465403529775473065...,0xe8b7e75fcffae402d260670fd0fb0114578a5b7fff88...,BUY,...,0.0,0.0,0.0,0.0,0.54,,UNKNOWN,,,
4,2025-12-14 11:45:39,2025-12-14,11,gabagool22,0x6031b6eed1c97e853c6e0f03ad3ce3529351f96d,btc-updown-15m-1765712700,"Bitcoin Up or Down - December 14, 6:45AM-7:00A...",5108999353819477633166653263465403529775473065...,0xe8b7e75fcffae402d260670fd0fb0114578a5b7fff88...,BUY,...,0.0,0.0,0.0,0.0,0.55,,UNKNOWN,,,
