In [1]:
# Cell 1: imports & basic config

from pathlib import Path
import csv
from collections import defaultdict
from typing import List, Dict, Any, Optional

import pandas as pd

from pathlib import Path
import sys

CWD = Path.cwd()
if (CWD / "kalshi_fetcher").exists():
    PROJECT_ROOT = CWD
elif (CWD.parent / "kalshi_fetcher").exists():
    PROJECT_ROOT = CWD.parent
else:
    raise RuntimeError(f"Could not find kalshi_fetcher from {CWD}")

sys.path.append(str(PROJECT_ROOT))

from kalshi_fetcher.kalshi_client import request
print("PROJECT_ROOT =", PROJECT_ROOT)


# Data directories
DATA_DIR = PROJECT_ROOT / "data"
TRADES_DIR = DATA_DIR / "series_trades"

DATA_DIR.mkdir(parents=True, exist_ok=True)
TRADES_DIR.mkdir(parents=True, exist_ok=True)

# ðŸ”§ TODO: fill in your series tickers here
SERIES_TICKERS: List[str] = [
    "kxsbads",
    "kxsongsoncharttswift",
    "kxemmydseries",
    "kxrtcaptainamerica",
    "kxrtmoana2",
    "kxrtmickey17",
    "kxrtsnowwhite",
    "kxrtmufasa",
    "kxrtminecraft",
    "kxsongsoncharttswift2",
    "kxrtthemonkey",
    "kxrtnovocaine",
    "kxrtfantasticfour",
    "kxsongsoncharttswift6",
    "kxtrumpputin",
    "kxllm1-25dec31",
    "kxgameawards-2025",
]

SERIES_TICKERS = [s.upper() for s in SERIES_TICKERS]
print(SERIES_TICKERS)

SERIES_MARKETS_CSV = DATA_DIR / "series_markets.csv"

STATUS_MAP = {
    "closed": "closed",
    "settled": "settled",
    "determined": "closed",   # result known, payouts maybe pending
    "finalized": "settled",   # fully settled
}


ModuleNotFoundError: No module named 'config'

In [19]:
import importlib
import kalshi_fetcher.config as kc
import kalshi_fetcher.kalshi_client as kc_client

kc.KALSHI_API_BASE_URL = "https://api.elections.kalshi.com/marketdata/v2"
importlib.reload(kc_client)  # picks up the updated base URL


<module 'kalshi_fetcher.kalshi_client' from 'c:\\Users\\Adam Nhan\\prediction-market-money-printer\\kalshi_fetcher\\kalshi_client.py'>

In [20]:
# Cell 2 â€” Fetch all closed/settled markets for a single series

from typing import Any, Dict, List, Optional

def fetch_markets_for_series(series_ticker: str, limit: int = 1000) -> List[Dict[str, Any]]:
    """
    Fetch ALL closed/settled markets for a given series_ticker.
    Uses your existing kalshi_client.request helper.
    """
    print(f"[series:{series_ticker}] Fetching markets...")
    markets: List[Dict[str, Any]] = []
    cursor: Optional[str] = None

    while True:
        params: Dict[str, Any] = {
            "series_ticker": series_ticker,
            "limit": limit,
            # only want backtestable markets
            "status": "closed,settled",
        }
        if cursor:
            params["cursor"] = cursor

        resp = request("/markets", params=params)
        data = resp


        page_markets = data.get("markets", [])
        markets.extend(page_markets)

        cursor = data.get("cursor")
        if not cursor:
            break

    print(f"[series:{series_ticker}] Found {len(markets)} closed/settled markets.")
    return markets


In [21]:
# Cell 3 â€” Fetch markets for ONE series (first in list)

if not SERIES_TICKERS:
    raise ValueError("SERIES_TICKERS is empty â€” add some tickers in Cell 1.")

test_series = SERIES_TICKERS[0]
print("Testing series:", test_series)

test_markets_raw = fetch_markets_for_series(test_series)

# Show the first few raw dicts
test_markets_raw[:3]


Testing series: KXSBADS
[series:KXSBADS] Fetching markets...


HTTPError: 400 Client Error: Bad Request for url: https://api.elections.kalshi.com/trade-api/v2/markets?series_ticker=KXSBADS&limit=1000&status=closed%2Csettled

In [18]:
# Cell 4 â€” Normalize markets for this ONE series into a DataFrame

rows = []

for m in test_markets_raw:
    raw_status = m.get("status")
    normalized_status = STATUS_MAP.get(raw_status, raw_status)

    rows.append({
        "series_ticker": test_series,
        "market_ticker": m.get("ticker"),
        "event_ticker": m.get("event_ticker"),
        "status": raw_status,
        "normalized_status": normalized_status,
        "title": m.get("title"),
        "subtitle": m.get("subtitle"),
        "category": m.get("category"),
        "open_time": m.get("open_time"),
        "close_time": m.get("close_time"),
        "expiration_time": m.get("expiration_time"),
        "settled_time": m.get("settled_time"),
        "yes_bid": m.get("yes_bid"),
        "yes_ask": m.get("yes_ask"),
        "last_price": m.get("last_price"),
        "volume": m.get("volume"),
        "result": m.get("result"),   # "yes" / "no"
    })

test_markets_df = pd.DataFrame(rows)
print("Rows:", len(test_markets_df))
test_markets_df.head()


NameError: name 'test_markets_raw' is not defined

In [15]:
# Cell 5 â€” Fetch and normalize markets for ALL series tickers

all_markets_rows = []

for series in SERIES_TICKERS:
    print(f"Processing series: {series}")
    
    markets_raw = fetch_markets_for_series(series)
    
    for m in markets_raw:
        raw_status = m.get("status")
        normalized_status = STATUS_MAP.get(raw_status, raw_status)

        all_markets_rows.append({
            "series_ticker": series,
            "market_ticker": m.get("ticker"),
            "event_ticker": m.get("event_ticker"),
            "status": raw_status,
            "normalized_status": normalized_status,
            "title": m.get("title"),
            "subtitle": m.get("subtitle"),
            "category": m.get("category"),
            "open_time": m.get("open_time"),
            "close_time": m.get("close_time"),
            "expiration_time": m.get("expiration_time"),
            "settled_time": m.get("settled_time"),
            "yes_bid": m.get("yes_bid"),
            "yes_ask": m.get("yes_ask"),
            "last_price": m.get("last_price"),
            "volume": m.get("volume"),
            "result": m.get("result"),
        })

series_markets_df = pd.DataFrame(all_markets_rows)

print("Total markets:", len(series_markets_df))
series_markets_df.head()


Processing series: KXSBADS
[series:KXSBADS] Fetching markets...
[series:KXSBADS] Found 17 closed/settled markets.
Processing series: KXSONGSONCHARTTSWIFT
[series:KXSONGSONCHARTTSWIFT] Fetching markets...
[series:KXSONGSONCHARTTSWIFT] Found 2 closed/settled markets.
Processing series: KXEMMYDSERIES
[series:KXEMMYDSERIES] Fetching markets...
[series:KXEMMYDSERIES] Found 19 closed/settled markets.
Processing series: KXRTCAPTAINAMERICA
[series:KXRTCAPTAINAMERICA] Fetching markets...
[series:KXRTCAPTAINAMERICA] Found 10 closed/settled markets.
Processing series: KXRTMOANA2
[series:KXRTMOANA2] Fetching markets...
[series:KXRTMOANA2] Found 9 closed/settled markets.
Processing series: KXRTMICKEY17
[series:KXRTMICKEY17] Fetching markets...
[series:KXRTMICKEY17] Found 14 closed/settled markets.
Processing series: KXRTSNOWWHITE
[series:KXRTSNOWWHITE] Fetching markets...
[series:KXRTSNOWWHITE] Found 16 closed/settled markets.
Processing series: KXRTMUFASA
[series:KXRTMUFASA] Fetching markets...
[s

Unnamed: 0,series_ticker,market_ticker,event_ticker,status,normalized_status,title,subtitle,category,open_time,close_time,expiration_time,settled_time,yes_bid,yes_ask,last_price,volume,result
0,KXSBADS,KXSBADS-25-CB,KXSBADS-25,finalized,settled,"Will Coinbase Global, Inc. run a Super Bowl ad...",,,2025-02-03T22:30:10Z,2025-02-10T03:21:55.832299Z,2025-02-10T15:00:00Z,,0,1,1,251537,no
1,KXSBADS,KXSBADS-25-T,KXSBADS-25,finalized,settled,Will Toyota run a Super Bowl ad in 2025?,":: Includes, for example, Toyota and Lexus",,2025-01-25T20:00:29Z,2025-02-10T03:23:26.038868Z,2025-02-10T15:00:00Z,,0,1,1,529343,no
2,KXSBADS,KXSBADS-25-PE,KXSBADS-25,finalized,settled,Will Pepsi run a Super Bowl ad in 2025?,":: Includes, for example, Doritos",,2025-01-25T20:00:26Z,2025-02-10T00:16:05.504161Z,2025-02-10T15:00:00Z,,99,100,99,97243,yes
3,KXSBADS,KXSBADS-25-CC,KXSBADS-25,finalized,settled,Will Coca-Cola run a Super Bowl ad in 2025?,":: Includes, for example, Fanta",,2025-01-25T20:00:09Z,2025-02-10T03:25:15.623462Z,2025-02-10T15:00:00Z,,0,1,1,673537,no
4,KXSBADS,KXSBADS-25-H,KXSBADS-25,finalized,settled,Will Hyundai run a Super Bowl ad in 2025?,":: Includes, for example, Hyundai and Genesis",,2025-01-25T20:00:41Z,2025-02-10T03:23:05.417962Z,2025-02-10T15:00:00Z,,0,1,9,115213,no


In [16]:
# Cell 6 â€” Save markets metadata to data/series_markets.csv

SERIES_MARKETS_CSV = DATA_DIR / "series_markets.csv"

SERIES_MARKETS_CSV.parent.mkdir(parents=True, exist_ok=True)
series_markets_df.to_csv(SERIES_MARKETS_CSV, index=False)

SERIES_MARKETS_CSV


WindowsPath('c:/Users/Adam Nhan/prediction-market-money-printer/data/series_markets.csv')

In [17]:
# Cell 7 â€” Unique market tickers across all series

unique_tickers = series_markets_df["market_ticker"].dropna().unique()
len(unique_tickers), unique_tickers[:5]


(171,
 array(['KXSBADS-25-CB', 'KXSBADS-25-T', 'KXSBADS-25-PE', 'KXSBADS-25-CC',
        'KXSBADS-25-H'], dtype=object))

In [18]:
# Cell 8 â€” Fetch ALL trades for a single market_ticker

def fetch_trades_for_market(ticker: str, limit: int = 1000):
    """
    Fetch ALL public trades for a given market via /markets/trades.
    Uses your request(endpoint, params) helper.
    """
    print(f"[market:{ticker}] Fetching trades...")
    trades = []
    cursor: Optional[str] = None

    while True:
        params: Dict[str, Any] = {
            "ticker": ticker,
            "limit": limit,
        }
        if cursor:
            params["cursor"] = cursor

        data = request("/markets/trades", params=params)

        page_trades = data.get("trades", [])
        trades.extend(page_trades)

        cursor = data.get("cursor")
        if not cursor:
            break

    print(f"[market:{ticker}] Got {len(trades)} trades.")
    return trades


In [19]:
# Cell 9 â€” Test fetching trades for a single market

test_ticker = unique_tickers[0]
print("Testing trades for:", test_ticker)

test_trades_raw = fetch_trades_for_market(test_ticker)
len(test_trades_raw), (test_trades_raw[:3] if test_trades_raw else None)


Testing trades for: KXSBADS-25-CB
[market:KXSBADS-25-CB] Fetching trades...
[market:KXSBADS-25-CB] Got 4290 trades.


(4290,
 [{'count': 4193,
   'created_time': '2025-02-10T03:20:57.509561Z',
   'no_price': 99,
   'no_price_dollars': '0.9900',
   'price': 0.01,
   'taker_side': 'yes',
   'ticker': 'KXSBADS-25-CB',
   'trade_id': 'c17edfc1-b778-4261-bf9b-d6517493dbb0',
   'yes_price': 1,
   'yes_price_dollars': '0.0100'},
  {'count': 1000,
   'created_time': '2025-02-10T03:20:16.115251Z',
   'no_price': 99,
   'no_price_dollars': '0.9900',
   'price': 0.01,
   'taker_side': 'yes',
   'ticker': 'KXSBADS-25-CB',
   'trade_id': 'bafc053e-9981-455b-8ba7-4b80c5b39d52',
   'yes_price': 1,
   'yes_price_dollars': '0.0100'},
  {'count': 500,
   'created_time': '2025-02-10T03:20:16.115251Z',
   'no_price': 99,
   'no_price_dollars': '0.9900',
   'price': 0.01,
   'taker_side': 'yes',
   'ticker': 'KXSBADS-25-CB',
   'trade_id': '85341570-95e0-464e-af4b-dcbdb5128cab',
   'yes_price': 1,
   'yes_price_dollars': '0.0100'}])

In [20]:
# Cell 10 â€” Save trades for a single market to data/series_trades/{ticker}.csv

def save_trades_csv(ticker: str, trades: list) -> Path:
    """
    Save trades for a market to data/series_trades/{ticker}.csv
    using a consistent subset of fields.
    """
    out_path = TRADES_DIR / f"{ticker}.csv"

    if not trades:
        print(f"[market:{ticker}] No trades, skipping write.")
        return out_path

    # Fields useful for Î”p / intraday backtesting
    fieldnames = [
        "trade_id",
        "ticker",
        "price",               # cents
        "yes_price",
        "no_price",
        "yes_price_dollars",   # already /100
        "no_price_dollars",
        "count",
        "taker_side",
        "created_time",
    ]

    with out_path.open("w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for t in trades:
            row = {k: t.get(k) for k in fieldnames}
            writer.writerow(row)

    print(f"[market:{ticker}] Wrote {len(trades)} trades to {out_path}")
    return out_path


In [21]:
# Cell 11 â€” Save trades for ONE test market

test_ticker = unique_tickers[0]
print("Saving trades for:", test_ticker)

_ = save_trades_csv(test_ticker, test_trades_raw)

# Confirm file exists
(TRADES_DIR / f"{test_ticker}.csv").exists()


Saving trades for: KXSBADS-25-CB
[market:KXSBADS-25-CB] Wrote 4290 trades to c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-CB.csv


True

In [22]:
# Cell 12 â€” Fetch and save trades for ALL markets in series_markets_df

for ticker in unique_tickers:
    print("=" * 60)
    print("Processing ticker:", ticker)

    trades = fetch_trades_for_market(ticker)
    save_trades_csv(ticker, trades)

print("Done fetching & saving trades for all tickers.")


Processing ticker: KXSBADS-25-CB
[market:KXSBADS-25-CB] Fetching trades...
[market:KXSBADS-25-CB] Got 4290 trades.
[market:KXSBADS-25-CB] Wrote 4290 trades to c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-CB.csv
Processing ticker: KXSBADS-25-T
[market:KXSBADS-25-T] Fetching trades...
[market:KXSBADS-25-T] Got 4930 trades.
[market:KXSBADS-25-T] Wrote 4930 trades to c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-T.csv
Processing ticker: KXSBADS-25-PE
[market:KXSBADS-25-PE] Fetching trades...
[market:KXSBADS-25-PE] Got 166 trades.
[market:KXSBADS-25-PE] Wrote 166 trades to c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-PE.csv
Processing ticker: KXSBADS-25-CC
[market:KXSBADS-25-CC] Fetching trades...
[market:KXSBADS-25-CC] Got 11864 trades.
[market:KXSBADS-25-CC] Wrote 11864 trades to c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-CC.csv
Processing ticker

In [24]:
# Cell 13 â€” Load trades for one specific market: KXSBADS-25-CB

target_ticker = "KXSBADS-25-CB"

trades_path = TRADES_DIR / f"{target_ticker}.csv"
print("Trades path:", trades_path, "exists:", trades_path.exists())

market_trades_df = pd.read_csv(trades_path)

print("Rows:", len(market_trades_df))
market_trades_df.head()


Trades path: c:\Users\Adam Nhan\prediction-market-money-printer\data\series_trades\KXSBADS-25-CB.csv exists: True
Rows: 4290


Unnamed: 0,trade_id,ticker,price,yes_price,no_price,yes_price_dollars,no_price_dollars,count,taker_side,created_time
0,c17edfc1-b778-4261-bf9b-d6517493dbb0,KXSBADS-25-CB,0.01,1,99,0.01,0.99,4193,yes,2025-02-10T03:20:57.509561Z
1,bafc053e-9981-455b-8ba7-4b80c5b39d52,KXSBADS-25-CB,0.01,1,99,0.01,0.99,1000,yes,2025-02-10T03:20:16.115251Z
2,85341570-95e0-464e-af4b-dcbdb5128cab,KXSBADS-25-CB,0.01,1,99,0.01,0.99,500,yes,2025-02-10T03:20:16.115251Z
3,6147d800-5161-403c-8db7-f7ce980e3663,KXSBADS-25-CB,0.01,1,99,0.01,0.99,100,yes,2025-02-10T03:20:16.115251Z
4,24d60d07-4377-4307-a2f7-b91d143710b4,KXSBADS-25-CB,0.01,1,99,0.01,0.99,119,yes,2025-02-10T03:20:16.115251Z


In [25]:
# Cell 14 â€” Clean & prepare trade data for delta-p analysis

df = market_trades_df.copy()

# 1. Convert created_time -> datetime (UTC)
df["created_time"] = pd.to_datetime(df["created_time"], errors="coerce", utc=True)

# 2. Sort by time
df = df.sort_values("created_time").reset_index(drop=True)

# 3. Create market-implied probability
# yes_price_dollars is already in [0,1]
df["p_mkt"] = df["yes_price_dollars"]

# Quick peek
df[["created_time", "p_mkt"]].head()


Unnamed: 0,created_time,p_mkt
0,2025-02-04 02:20:41.675941+00:00,0.23
1,2025-02-04 02:21:07.879938+00:00,0.23
2,2025-02-04 02:21:31.994860+00:00,0.23
3,2025-02-04 02:38:46.296169+00:00,0.48
4,2025-02-04 02:38:46.296169+00:00,0.47


In [26]:
# Cell 15 â€” Compute delta-p (change in implied probability)

df["delta_p"] = df["p_mkt"].diff()   # current - previous

# Peek at first few rows with delta-p
df[["created_time", "p_mkt", "delta_p"]].head(10)


Unnamed: 0,created_time,p_mkt,delta_p
0,2025-02-04 02:20:41.675941+00:00,0.23,
1,2025-02-04 02:21:07.879938+00:00,0.23,0.0
2,2025-02-04 02:21:31.994860+00:00,0.23,0.0
3,2025-02-04 02:38:46.296169+00:00,0.48,0.25
4,2025-02-04 02:38:46.296169+00:00,0.47,-0.01
5,2025-02-04 02:40:10.183553+00:00,0.49,0.02
6,2025-02-04 02:40:10.183553+00:00,0.48,-0.01
7,2025-02-04 02:56:29.335976+00:00,0.4,-0.08
8,2025-02-04 02:57:02.113588+00:00,0.24,-0.16
9,2025-02-04 03:15:48.131815+00:00,0.48,0.24


In [27]:
# Cell 16 â€” Identify threshold-based Î”p events

threshold = 0.05   # 5% move

df["trigger"] = df["delta_p"].abs() >= threshold

# Show only rows where a trigger fired
df[df["trigger"]][["created_time", "p_mkt", "delta_p"]].head(10)


Unnamed: 0,created_time,p_mkt,delta_p
3,2025-02-04 02:38:46.296169+00:00,0.48,0.25
7,2025-02-04 02:56:29.335976+00:00,0.4,-0.08
8,2025-02-04 02:57:02.113588+00:00,0.24,-0.16
9,2025-02-04 03:15:48.131815+00:00,0.48,0.24
25,2025-02-04 05:51:55.912252+00:00,0.6,0.11
27,2025-02-04 05:51:55.912252+00:00,0.51,-0.09
28,2025-02-04 05:53:32.486452+00:00,0.6,0.09
33,2025-02-04 06:24:37.757773+00:00,0.65,0.05
34,2025-02-04 06:24:37.757773+00:00,0.6,-0.05
39,2025-02-04 13:45:12.474238+00:00,0.4,-0.14


In [28]:
df["trigger"].sum()


np.int64(393)

In [29]:
# Cell 17 â€” Direction of the momentum signal

# +1 if delta_p > 0 (up move â†’ YES), -1 if delta_p < 0 (down move â†’ NO), 0 otherwise
df["signal_side"] = 0
df.loc[df["delta_p"] > 0, "signal_side"] = 1    # YES
df.loc[df["delta_p"] < 0, "signal_side"] = -1   # NO

# Filter to only trigger rows, show a small sample
signal_rows = df[df["trigger"]][["created_time", "p_mkt", "delta_p", "signal_side"]]
signal_rows.head(10)


Unnamed: 0,created_time,p_mkt,delta_p,signal_side
3,2025-02-04 02:38:46.296169+00:00,0.48,0.25,1
7,2025-02-04 02:56:29.335976+00:00,0.4,-0.08,-1
8,2025-02-04 02:57:02.113588+00:00,0.24,-0.16,-1
9,2025-02-04 03:15:48.131815+00:00,0.48,0.24,1
25,2025-02-04 05:51:55.912252+00:00,0.6,0.11,1
27,2025-02-04 05:51:55.912252+00:00,0.51,-0.09,-1
28,2025-02-04 05:53:32.486452+00:00,0.6,0.09,1
33,2025-02-04 06:24:37.757773+00:00,0.65,0.05,1
34,2025-02-04 06:24:37.757773+00:00,0.6,-0.05,-1
39,2025-02-04 13:45:12.474238+00:00,0.4,-0.14,-1


In [33]:
# Cell 18 â€” Use a stricter Î”p threshold of 0.25

threshold = 0.4

df["trigger_025"] = df["delta_p"].abs() >= threshold
df["side_025"] = 0
df.loc[df["delta_p"] > 0, "side_025"] = 1
df.loc[df["delta_p"] < 0, "side_025"] = -1

print("Number of 0.25 triggers:", df["trigger_025"].sum())

# Show first few triggers
df[df["trigger_025"]][["created_time", "p_mkt", "delta_p", "side_025"]].head(10)


Number of 0.25 triggers: 1


Unnamed: 0,created_time,p_mkt,delta_p,side_025
3561,2025-02-10 00:10:43.217835+00:00,0.66,0.47,1


In [34]:
# Cell 19 â€” Identify the first Î”p â‰¥ 0.25 event as the entry

# Get only rows where the 0.25 trigger fired
trig = df[df["trigger_025"]].copy()

print("Total 0.25 triggers:", len(trig))

if len(trig) == 0:
    print("No 0.25 triggers in this market.")
    first_entry = None
else:
    # First trigger in time
    first_entry = trig.iloc[0]

    entry_time = first_entry["created_time"]
    entry_price = first_entry["p_mkt"]       # implied prob at entry
    entry_side = first_entry["side_025"]     # +1 = YES, -1 = NO
    entry_delta = first_entry["delta_p"]

    print("First entry candidate:")
    print(" time :", entry_time)
    print(" p_mkt:", entry_price)
    print(" side :", "YES" if entry_side > 0 else "NO")
    print(" Î”p   :", entry_delta)


Total 0.25 triggers: 1
First entry candidate:
 time : 2025-02-10 00:10:43.217835+00:00
 p_mkt: 0.66
 side : YES
 Î”p   : 0.47000000000000003


In [35]:
# Cell 20 â€” Compute PnL for the single 0.25-threshold trade on this market

if first_entry is None:
    print("No 0.25 trigger â†’ no trade â†’ no PnL to compute.")
else:
    # 1. Look up this market in the metadata table
    meta_row = series_markets_df.loc[
        series_markets_df["market_ticker"] == target_ticker
    ]

    if meta_row.empty:
        raise ValueError(f"No metadata found for {target_ticker}")

    meta_row = meta_row.iloc[0]
    result_str = str(meta_row["result"]).lower()  # "yes" or "no"

    # 2. Convert to payoff for YES
    if result_str == "yes":
        payoff_yes = 1.0
    elif result_str == "no":
        payoff_yes = 0.0
    else:
        raise ValueError(f"Unexpected result value: {result_str}")

    # 3. Reuse entry info
    entry_price_yes = float(entry_price)      # p_mkt at entry (YES price in dollars)
    entry_side = int(entry_side)              # +1 YES, -1 NO

    # Prices for YES / NO
    price_yes = entry_price_yes
    price_no = 1.0 - entry_price_yes

    # Payoff depending on side
    if entry_side > 0:   # long YES
        payoff = payoff_yes
        entry_cost = price_yes
    else:                # long NO
        payoff = 1.0 - payoff_yes
        entry_cost = price_no

    pnl = payoff - entry_cost

    print(f"Market: {target_ticker}")
    print(f"Result: {result_str.upper()}  (payoff_yes={payoff_yes})")
    print(f"Entry side: {'YES' if entry_side>0 else 'NO'}")
    print(f"Entry price (YES): {entry_price_yes:.4f}")
    print(f"Effective entry cost: {entry_cost:.4f}")
    print(f"Payoff on position: {payoff:.4f}")
    print(f"PnL (no fees yet): {pnl:.4f}")


Market: KXSBADS-25-CB
Result: NO  (payoff_yes=0.0)
Entry side: YES
Entry price (YES): 0.6600
Effective entry cost: 0.6600
Payoff on position: 0.0000
PnL (no fees yet): -0.6600


In [36]:
# Cell 21 â€” Backtest a single market at a single Î”p threshold

def backtest_single_market_threshold(
    ticker: str,
    threshold: float = 0.25,
    verbose: bool = False,
):
    """
    For a given market ticker:
      - load trades
      - compute delta-p
      - find FIRST |Î”p| >= threshold
      - determine side (YES/NO)
      - look up final result
      - compute PnL (no fees yet)

    Returns a dict with summary info, or None if no trade.
    """
    # 1. Load trades CSV
    path = TRADES_DIR / f"{ticker}.csv"
    if not path.exists():
        if verbose:
            print(f"[{ticker}] No trades file at {path}")
        return None

    trades_df = pd.read_csv(path)
    if trades_df.empty:
        if verbose:
            print(f"[{ticker}] Trades file is empty.")
        return None

    # 2. Prepare data
    df = trades_df.copy()
    df["created_time"] = pd.to_datetime(df["created_time"], errors="coerce", utc=True)
    df = df.sort_values("created_time").reset_index(drop=True)
    df["p_mkt"] = df["yes_price_dollars"]
    df["delta_p"] = df["p_mkt"].diff()

    # 3. Find trigger rows
    df["trigger"] = df["delta_p"].abs() >= threshold
    df["side"] = 0
    df.loc[df["delta_p"] > 0, "side"] = 1    # YES
    df.loc[df["delta_p"] < 0, "side"] = -1   # NO

    trig = df[df["trigger"]]
    if trig.empty:
        if verbose:
            print(f"[{ticker}] No |Î”p| >= {threshold:.2f} trigger â†’ no trade.")
        return {
            "market_ticker": ticker,
            "threshold": threshold,
            "had_trade": False,
            "entry_time": None,
            "entry_side": None,
            "entry_price_yes": None,
            "pnl": 0.0,
            "result": None,
        }

    # 4. Take first trigger
    first = trig.iloc[0]
    entry_time = first["created_time"]
    entry_price_yes = float(first["p_mkt"])
    entry_side = int(first["side"])  # +1 YES, -1 NO

    # 5. Look up metadata / result
    meta_row = series_markets_df.loc[
        series_markets_df["market_ticker"] == ticker
    ]
    if meta_row.empty:
        raise ValueError(f"No metadata found for {ticker}")
    meta_row = meta_row.iloc[0]
    result_str = str(meta_row["result"]).lower()  # "yes" / "no"

    if result_str == "yes":
        payoff_yes = 1.0
    elif result_str == "no":
        payoff_yes = 0.0
    else:
        raise ValueError(f"Unexpected result value for {ticker}: {result_str}")

    # 6. Compute payoff & PnL
    price_yes = entry_price_yes
    price_no = 1.0 - entry_price_yes

    if entry_side > 0:      # long YES
        payoff = payoff_yes
        entry_cost = price_yes
    else:                   # long NO
        payoff = 1.0 - payoff_yes
        entry_cost = price_no

    pnl = payoff - entry_cost

    if verbose:
        print(f"[{ticker}] threshold={threshold:.2f}")
        print(f"  result      : {result_str.upper()} (payoff_yes={payoff_yes})")
        print(f"  entry_time  : {entry_time}")
        print(f"  entry_side  : {'YES' if entry_side>0 else 'NO'}")
        print(f"  entry_price : {entry_price_yes:.4f}")
        print(f"  entry_cost  : {entry_cost:.4f}")
        print(f"  payoff      : {payoff:.4f}")
        print(f"  pnl         : {pnl:.4f}")

    return {
        "market_ticker": ticker,
        "threshold": threshold,
        "had_trade": True,
        "entry_time": entry_time,
        "entry_side": entry_side,
        "entry_price_yes": entry_price_yes,
        "pnl": pnl,
        "result": result_str,
    }


In [37]:
# Cell 22 â€” Test the single-market backtest function on a few tickers

test_tickers = unique_tickers[:3]  # first three markets
test_tickers


array(['KXSBADS-25-CB', 'KXSBADS-25-T', 'KXSBADS-25-PE'], dtype=object)

In [38]:
results_samples = []

for t in test_tickers:
    print("=" * 50)
    print("Testing:", t)
    res = backtest_single_market_threshold(t, threshold=0.25, verbose=True)
    results_samples.append(res)

results_samples


Testing: KXSBADS-25-CB
[KXSBADS-25-CB] threshold=0.25
  result      : NO (payoff_yes=0.0)
  entry_time  : 2025-02-10 00:10:43.217835+00:00
  entry_side  : YES
  entry_price : 0.6600
  entry_cost  : 0.6600
  payoff      : 0.0000
  pnl         : -0.6600
Testing: KXSBADS-25-T
[KXSBADS-25-T] threshold=0.25
  result      : NO (payoff_yes=0.0)
  entry_time  : 2025-02-06 03:20:44.005234+00:00
  entry_side  : YES
  entry_price : 0.5700
  entry_cost  : 0.5700
  payoff      : 0.0000
  pnl         : -0.5700
Testing: KXSBADS-25-PE
[KXSBADS-25-PE] threshold=0.25
  result      : YES (payoff_yes=1.0)
  entry_time  : 2025-01-25 23:35:09.903742+00:00
  entry_side  : YES
  entry_price : 0.9500
  entry_cost  : 0.9500
  payoff      : 1.0000
  pnl         : 0.0500


[{'market_ticker': 'KXSBADS-25-CB',
  'threshold': 0.25,
  'had_trade': True,
  'entry_time': Timestamp('2025-02-10 00:10:43.217835+0000', tz='UTC'),
  'entry_side': 1,
  'entry_price_yes': 0.66,
  'pnl': -0.66,
  'result': 'no'},
 {'market_ticker': 'KXSBADS-25-T',
  'threshold': 0.25,
  'had_trade': True,
  'entry_time': Timestamp('2025-02-06 03:20:44.005234+0000', tz='UTC'),
  'entry_side': 1,
  'entry_price_yes': 0.57,
  'pnl': -0.57,
  'result': 'no'},
 {'market_ticker': 'KXSBADS-25-PE',
  'threshold': 0.25,
  'had_trade': True,
  'entry_time': Timestamp('2025-01-25 23:35:09.903742+0000', tz='UTC'),
  'entry_side': 1,
  'entry_price_yes': 0.95,
  'pnl': 0.050000000000000044,
  'result': 'yes'}]

In [43]:
# Cell 23 â€” Backtest ALL markets at Î”p threshold = 0.25

threshold = 0.4

all_results = []

for t in unique_tickers:
    res = backtest_single_market_threshold(t, threshold=threshold, verbose=False)
    if res is None:
        # e.g. missing file â€” you can also log/print here if you want
        continue
    all_results.append(res)

len(all_results)


168

In [44]:
# Cell 24 â€” Results DataFrame + basic stats

results_df = pd.DataFrame(all_results)
print("Rows in results_df:", len(results_df))
results_df.head()


Rows in results_df: 168


Unnamed: 0,market_ticker,threshold,had_trade,entry_time,entry_side,entry_price_yes,pnl,result
0,KXSBADS-25-CB,0.4,True,2025-02-10 00:10:43.217835+00:00,1.0,0.66,-0.66,no
1,KXSBADS-25-T,0.4,False,NaT,,,0.0,
2,KXSBADS-25-PE,0.4,False,NaT,,,0.0,
3,KXSBADS-25-CC,0.4,True,2025-01-31 16:35:08.905157+00:00,-1.0,0.13,0.13,no
4,KXSBADS-25-H,0.4,True,2025-02-10 01:22:34.775518+00:00,1.0,0.8,-0.8,no


In [45]:
# How many markets had an actual trade?
results_df["had_trade"].value_counts()


had_trade
False    135
True      33
Name: count, dtype: int64

In [46]:
# PnL stats for markets where we actually traded
trade_pnls = results_df[results_df["had_trade"]]["pnl"]

summary = {
    "num_markets": len(results_df),
    "num_trades": len(trade_pnls),
    "mean_pnl": trade_pnls.mean(),
    "median_pnl": trade_pnls.median(),
    "min_pnl": trade_pnls.min(),
    "max_pnl": trade_pnls.max(),
}
summary


{'num_markets': 168,
 'num_trades': 33,
 'mean_pnl': np.float64(-0.3721212121212121),
 'median_pnl': -0.66,
 'min_pnl': -0.99,
 'max_pnl': 0.55}

In [47]:
# Cell 25 â€” Backtest all markets across multiple Î”p thresholds

thresholds = [0.05, 0.08, 0.10, 0.12, 0.15, 0.20, 0.25]

all_results_multi = []

for thr in thresholds:
    print("=" * 70)
    print(f"Running backtest for threshold = {thr:.2f}")
    
    for t in unique_tickers:
        res = backtest_single_market_threshold(
            t,
            threshold=thr,
            verbose=False,
        )
        if res is None:
            continue
        all_results_multi.append(res)

len(all_results_multi)


Running backtest for threshold = 0.05
Running backtest for threshold = 0.08
Running backtest for threshold = 0.10
Running backtest for threshold = 0.12
Running backtest for threshold = 0.15
Running backtest for threshold = 0.20
Running backtest for threshold = 0.25


1176

In [48]:
# Cell 26 â€” Build DataFrame and summarize by threshold

multi_df = pd.DataFrame(all_results_multi)
print("Rows in multi_df:", len(multi_df))
multi_df.head()


Rows in multi_df: 1176


Unnamed: 0,market_ticker,threshold,had_trade,entry_time,entry_side,entry_price_yes,pnl,result
0,KXSBADS-25-CB,0.05,True,2025-02-04 02:38:46.296169+00:00,1.0,0.48,-0.48,no
1,KXSBADS-25-T,0.05,True,2025-01-25 21:34:36.985058+00:00,-1.0,0.25,0.25,no
2,KXSBADS-25-PE,0.05,True,2025-01-25 23:35:09.903742+00:00,1.0,0.95,0.05,yes
3,KXSBADS-25-CC,0.05,True,2025-01-26 02:12:30.923499+00:00,-1.0,0.7,0.7,no
4,KXSBADS-25-H,0.05,True,2025-01-25 22:17:42.142293+00:00,-1.0,0.25,0.25,no


In [49]:
# Keep only rows where a trade actually happened
trades_only = multi_df[multi_df["had_trade"]]

summary_rows = []

for thr in thresholds:
    sub = trades_only[trades_only["threshold"] == thr]
    pnls = sub["pnl"]
    
    summary_rows.append({
        "threshold": thr,
        "num_markets": sub["market_ticker"].nunique(),
        "num_trades": len(sub),
        "mean_pnl": pnls.mean() if len(pnls) > 0 else None,
        "median_pnl": pnls.median() if len(pnls) > 0 else None,
        "min_pnl": pnls.min() if len(pnls) > 0 else None,
        "max_pnl": pnls.max() if len(pnls) > 0 else None,
    })

threshold_summary_df = pd.DataFrame(summary_rows)
threshold_summary_df


Unnamed: 0,threshold,num_markets,num_trades,mean_pnl,median_pnl,min_pnl,max_pnl
0,0.05,140,140,-0.017929,0.055,-0.97,0.86
1,0.08,129,129,-0.01969,0.05,-0.97,0.86
2,0.1,121,121,-0.042149,0.01,-0.97,0.86
3,0.12,114,114,-0.051754,0.015,-0.99,0.86
4,0.15,104,104,-0.035192,0.055,-0.99,0.79
5,0.2,86,86,-0.186977,-0.29,-0.99,0.77
6,0.25,67,67,-0.23806,-0.36,-0.99,0.55


In [50]:
# Cell 27 â€” Always-short-from-open backtest for a single market

def backtest_open_short_no(ticker: str, verbose: bool = False):
    """
    Strategy:
      - At the first trade in the market, short YES (i.e. go long NO).
      - Hold to settlement.
    """
    # 1. Load trades
    path = TRADES_DIR / f"{ticker}.csv"
    if not path.exists():
        if verbose:
            print(f"[{ticker}] Trades file does not exist at {path}")
        return None
    
    trades_df = pd.read_csv(path)
    if trades_df.empty:
        if verbose:
            print(f"[{ticker}] Trades file is empty.")
        return None

    # 2. Get earliest trade
    df = trades_df.copy()
    df["created_time"] = pd.to_datetime(df["created_time"], errors="coerce", utc=True)
    df = df.sort_values("created_time").reset_index(drop=True)
    first = df.iloc[0]

    entry_time = first["created_time"]
    entry_price_yes = float(first["yes_price_dollars"])  # in [0,1]

    # 3. Look up result from metadata
    meta_row = series_markets_df.loc[
        series_markets_df["market_ticker"] == ticker
    ]
    if meta_row.empty:
        raise ValueError(f"No metadata found for {ticker}")
    meta_row = meta_row.iloc[0]
    result_str = str(meta_row["result"]).lower()  # "yes" or "no"

    if result_str == "yes":
        payoff_yes = 1.0
    elif result_str == "no":
        payoff_yes = 0.0
    else:
        raise ValueError(f"Unexpected result value for {ticker}: {result_str}")

    # 4. Position is long NO (short YES)
    price_yes = entry_price_yes
    price_no = 1.0 - entry_price_yes

    payoff_no = 1.0 - payoff_yes
    entry_cost_no = price_no

    pnl = payoff_no - entry_cost_no

    if verbose:
        print(f"[{ticker}] Open-short-NO strategy")
        print(f"  result        : {result_str.upper()} (payoff_yes={payoff_yes})")
        print(f"  entry_time    : {entry_time}")
        print(f"  entry_price_Y : {entry_price_yes:.4f}")
        print(f"  entry_cost_NO : {entry_cost_no:.4f}")
        print(f"  payoff_NO     : {payoff_no:.4f}")
        print(f"  pnl           : {pnl:.4f}")

    return {
        "market_ticker": ticker,
        "strategy": "open_short_no",
        "entry_time": entry_time,
        "entry_price_yes": entry_price_yes,
        "entry_cost_no": entry_cost_no,
        "result": result_str,
        "pnl": pnl,
    }


In [51]:
# Cell 28 â€” Test open-short-NO on one market

target_ticker = "KXSBADS-25-CB"

open_short_result = backtest_open_short_no(target_ticker, verbose=True)
open_short_result


[KXSBADS-25-CB] Open-short-NO strategy
  result        : NO (payoff_yes=0.0)
  entry_time    : 2025-02-04 02:20:41.675941+00:00
  entry_price_Y : 0.2300
  entry_cost_NO : 0.7700
  payoff_NO     : 1.0000
  pnl           : 0.2300


{'market_ticker': 'KXSBADS-25-CB',
 'strategy': 'open_short_no',
 'entry_time': Timestamp('2025-02-04 02:20:41.675941+0000', tz='UTC'),
 'entry_price_yes': 0.23,
 'entry_cost_no': 0.77,
 'result': 'no',
 'pnl': 0.22999999999999998}

In [52]:
# Cell 29 â€” Backtest open-short-NO on ALL markets

open_short_results = []

for t in unique_tickers:
    res = backtest_open_short_no(t, verbose=False)
    if res is None:
        continue
    open_short_results.append(res)

len(open_short_results)


168

In [53]:
# Cell 30 â€” DataFrame + stats for open-short-NO strategy

open_short_df = pd.DataFrame(open_short_results)
print("Rows in open_short_df:", len(open_short_df))
open_short_df.head()


Rows in open_short_df: 168


Unnamed: 0,market_ticker,strategy,entry_time,entry_price_yes,entry_cost_no,result,pnl
0,KXSBADS-25-CB,open_short_no,2025-02-04 02:20:41.675941+00:00,0.23,0.77,no,0.23
1,KXSBADS-25-T,open_short_no,2025-01-25 21:34:36.985058+00:00,0.33,0.67,no,0.33
2,KXSBADS-25-PE,open_short_no,2025-01-25 22:33:56.175338+00:00,0.69,0.31,yes,-0.31
3,KXSBADS-25-CC,open_short_no,2025-01-26 00:58:01.562666+00:00,0.89,0.11,no,0.89
4,KXSBADS-25-H,open_short_no,2025-01-25 21:43:24.025139+00:00,0.33,0.67,no,0.33


In [54]:
# PnL stats
pnls = open_short_df["pnl"]

open_short_summary = {
    "num_markets": len(open_short_df),
    "mean_pnl": pnls.mean(),
    "median_pnl": pnls.median(),
    "min_pnl": pnls.min(),
    "max_pnl": pnls.max(),
}
open_short_summary


{'num_markets': 168,
 'mean_pnl': np.float64(0.09125000000000001),
 'median_pnl': 0.010000000000000009,
 'min_pnl': -0.99,
 'max_pnl': 0.99}

In [None]:
# StrategyConfig-driven backtest helpers
import itertools
import json
from dataclasses import asdict
from datetime import timedelta

sys.path.append(str(PROJECT_ROOT))
from trading_engine.strategy_config import StrategyConfig

In [None]:
# Helpers: load trades, simulate NO position, summarize backtest

def load_trades_with_result(ticker: str):
    df_path = TRADES_DIR / f"{ticker}.csv"
    if not df_path.exists():
        return None, None

    df = pd.read_csv(df_path)
    if df.empty:
        return None, None

    df["created_time"] = pd.to_datetime(df["created_time"], errors="coerce", utc=True)
    df = df.sort_values("created_time").reset_index(drop=True)
    df["price_yes"] = df["yes_price_dollars"]
    df["price_no"] = 1.0 - df["price_yes"]

    meta_row = series_markets_df.loc[series_markets_df["market_ticker"] == ticker]
    if meta_row.empty:
        return df, None

    result_str = str(meta_row.iloc[0]["result"]).lower()
    return df, result_str


def simulate_no_position_with_config(ticker: str, cfg: StrategyConfig, qty: int = 1, verbose: bool = False):
    trades_df, result_str = load_trades_with_result(ticker)
    if trades_df is None:
        if verbose:
            print(f"[{ticker}] no trades loaded")
        return {
            "market_ticker": ticker,
            "had_trade": False,
            "pnl": 0.0,
            "exit_reason": "no_trades",
            "entry_time": None,
            "exit_time": None,
        }

    payoff_yes = None
    if result_str == "yes":
        payoff_yes = 1.0
    elif result_str == "no":
        payoff_yes = 0.0

    candidates = trades_df
    if cfg.max_no_entry_price is not None:
        candidates = candidates[candidates["price_no"] <= cfg.max_no_entry_price]

    if candidates.empty:
        if verbose:
            print(f"[{ticker}] skipped: no price_no <= {cfg.max_no_entry_price}")
        return {
            "market_ticker": ticker,
            "had_trade": False,
            "pnl": 0.0,
            "exit_reason": "entry_filtered",
            "entry_time": None,
            "exit_time": None,
        }

    entry = candidates.iloc[0]
    entry_time = entry["created_time"]
    entry_price_no = float(entry["price_no"])
    entry_value = entry_price_no * qty

    take_profit_val = None if cfg.take_profit_pct is None else cfg.take_profit_pct * entry_value
    stop_loss_val = None if cfg.stop_loss_pct is None else cfg.stop_loss_pct * entry_value

    exit_price_no = None
    exit_time = None
    exit_reason = None

    for _, row in trades_df[trades_df["created_time"] >= entry_time].iterrows():
        curr_no = float(row["price_no"])
        pnl_running = (entry_price_no - curr_no) * qty

        if take_profit_val is not None and pnl_running >= take_profit_val:
            exit_price_no = curr_no
            exit_time = row["created_time"]
            exit_reason = "take_profit"
            break

        if stop_loss_val is not None and pnl_running <= stop_loss_val:
            exit_price_no = curr_no
            exit_time = row["created_time"]
            exit_reason = "stop_loss"
            break

        if cfg.max_hold_seconds is not None:
            age = (row["created_time"] - entry_time).total_seconds()
            if age >= cfg.max_hold_seconds:
                exit_price_no = curr_no
                exit_time = row["created_time"]
                exit_reason = "time_expired"
                break

    if exit_price_no is None:
        payoff_no = (1.0 - payoff_yes) if payoff_yes is not None else float(trades_df.iloc[-1]["price_no"])
        exit_price_no = payoff_no
        exit_time = trades_df.iloc[-1]["created_time"]
        exit_reason = "settlement"

    final_pnl = (entry_price_no - exit_price_no) * qty

    return {
        "market_ticker": ticker,
        "had_trade": True,
        "pnl": final_pnl,
        "exit_reason": exit_reason,
        "entry_time": entry_time,
        "exit_time": exit_time,
        "entry_price_no": entry_price_no,
        "exit_price_no": exit_price_no,
        "result": result_str,
    }


def backtest_config_over_markets(cfg: StrategyConfig, tickers: list[str]) -> pd.DataFrame:
    rows = []
    for t in tickers:
        res = simulate_no_position_with_config(t, cfg)
        res["max_no_entry_price"] = cfg.max_no_entry_price
        res["take_profit_pct"] = cfg.take_profit_pct
        res["stop_loss_pct"] = cfg.stop_loss_pct
        res["max_hold_seconds"] = cfg.max_hold_seconds
        rows.append(res)
    return pd.DataFrame(rows)


def summarize_backtest(df: pd.DataFrame) -> dict[str, float]:
    trades = df[df["had_trade"]]
    if trades.empty:
        return {
            "num_markets": len(df),
            "num_trades": 0,
            "mean_pnl": 0.0,
            "median_pnl": 0.0,
            "min_pnl": 0.0,
            "max_pnl": 0.0,
            "win_rate": 0.0,
        }

    pnls = trades["pnl"]
    return {
        "num_markets": len(df),
        "num_trades": len(trades),
        "mean_pnl": pnls.mean(),
        "median_pnl": pnls.median(),
        "min_pnl": pnls.min(),
        "max_pnl": pnls.max(),
        "win_rate": (pnls > 0).mean(),
    }

In [None]:
# Grid search StrategyConfig space against historical trades
search_space = {
    "max_no_entry_price": [0.25, 0.35, 0.45, 0.55, 0.65, None],
    "take_profit_pct": [None, 0.05, 0.10, 0.20],
    "stop_loss_pct": [None, -0.05, -0.10, -0.20],
    "max_hold_seconds": [6 * 3600, 24 * 3600, 3 * 24 * 3600, None],
}

grid_rows = []
for max_no, tp, sl, hold in itertools.product(
    search_space["max_no_entry_price"],
    search_space["take_profit_pct"],
    search_space["stop_loss_pct"],
    search_space["max_hold_seconds"],
):
    cfg = StrategyConfig(
        max_no_entry_price=max_no,
        take_profit_pct=tp,
        stop_loss_pct=sl,
        max_hold_seconds=hold,
    )

    cfg_df = backtest_config_over_markets(cfg, unique_tickers)
    summary = summarize_backtest(cfg_df)
    summary.update({
        "max_no_entry_price": max_no,
        "take_profit_pct": tp,
        "stop_loss_pct": sl,
        "max_hold_seconds": hold,
    })
    grid_rows.append(summary)

grid_df = pd.DataFrame(grid_rows)

grid_df = grid_df.sort_values(["mean_pnl", "win_rate"], ascending=[False, False]).reset_index(drop=True)

grid_df.head()

In [None]:
# Extract best config, persist, and summarize
if grid_df.empty:
    raise ValueError("Grid search returned no configs")

best_row = grid_df.iloc[0].to_dict()

best_config = StrategyConfig(
    max_no_entry_price=best_row["max_no_entry_price"],
    take_profit_pct=best_row["take_profit_pct"],
    stop_loss_pct=best_row["stop_loss_pct"],
    max_hold_seconds=best_row["max_hold_seconds"],
)

best_config_df = backtest_config_over_markets(best_config, unique_tickers)
best_summary = summarize_backtest(best_config_df)

best_config_dict = asdict(best_config)
best_config_dict.update({
    "mean_pnl": best_summary["mean_pnl"],
    "win_rate": best_summary["win_rate"],
    "num_trades": best_summary["num_trades"],
})

best_config_path = DATA_DIR / "best_strategy_config.json"
best_config_path.write_text(json.dumps(best_config_dict, indent=2))
print("Saved best config to", best_config_path)

pd.DataFrame([best_summary])

In [None]:
#STRATEGY CONFIG

In [6]:
# StrategyConfig-driven backtest helpers
import itertools
import json
from dataclasses import asdict
from datetime import timedelta
import sys

sys.path.append(str(PROJECT_ROOT))
from trading_engine.strategy_config import StrategyConfig


In [7]:
# Helpers: load trades, simulate NO position, summarize backtest

def load_trades_with_result(ticker: str):
    df_path = TRADES_DIR / f"{ticker}.csv"
    if not df_path.exists():
        return None, None

    df = pd.read_csv(df_path)
    if df.empty:
        return None, None

    df["created_time"] = pd.to_datetime(df["created_time"], errors="coerce", utc=True)
    df = df.sort_values("created_time").reset_index(drop=True)
    df["price_yes"] = df["yes_price_dollars"]
    df["price_no"] = 1.0 - df["price_yes"]

    meta_row = series_markets_df.loc[series_markets_df["market_ticker"] == ticker]
    if meta_row.empty:
        return df, None

    result_str = str(meta_row.iloc[0]["result"]).lower()
    return df, result_str


def simulate_no_position_with_config(ticker: str, cfg: StrategyConfig, qty: int = 1, verbose: bool = False):
    trades_df, result_str = load_trades_with_result(ticker)
    if trades_df is None:
        if verbose:
            print(f"[{ticker}] no trades loaded")
        return {
            "market_ticker": ticker,
            "had_trade": False,
            "pnl": 0.0,
            "exit_reason": "no_trades",
            "entry_time": None,
            "exit_time": None,
        }

    payoff_yes = None
    if result_str == "yes":
        payoff_yes = 1.0
    elif result_str == "no":
        payoff_yes = 0.0

    candidates = trades_df
    if cfg.max_no_entry_price is not None:
        candidates = candidates[candidates["price_no"] <= cfg.max_no_entry_price]

    if candidates.empty:
        if verbose:
            print(f"[{ticker}] skipped: no price_no <= {cfg.max_no_entry_price}")
        return {
            "market_ticker": ticker,
            "had_trade": False,
            "pnl": 0.0,
            "exit_reason": "entry_filtered",
            "entry_time": None,
            "exit_time": None,
        }

    entry = candidates.iloc[0]
    entry_time = entry["created_time"]
    entry_price_no = float(entry["price_no"])
    entry_value = entry_price_no * qty

    take_profit_val = None if cfg.take_profit_pct is None else cfg.take_profit_pct * entry_value
    stop_loss_val = None if cfg.stop_loss_pct is None else cfg.stop_loss_pct * entry_value

    exit_price_no = None
    exit_time = None
    exit_reason = None

    for _, row in trades_df[trades_df["created_time"] >= entry_time].iterrows():
        curr_no = float(row["price_no"])
        pnl_running = (entry_price_no - curr_no) * qty

        if take_profit_val is not None and pnl_running >= take_profit_val:
            exit_price_no = curr_no
            exit_time = row["created_time"]
            exit_reason = "take_profit"
            break

        if stop_loss_val is not None and pnl_running <= stop_loss_val:
            exit_price_no = curr_no
            exit_time = row["created_time"]
            exit_reason = "stop_loss"
            break

        if cfg.max_hold_seconds is not None:
            age = (row["created_time"] - entry_time).total_seconds()
            if age >= cfg.max_hold_seconds:
                exit_price_no = curr_no
                exit_time = row["created_time"]
                exit_reason = "time_expired"
                break

    if exit_price_no is None:
        payoff_no = (1.0 - payoff_yes) if payoff_yes is not None else float(trades_df.iloc[-1]["price_no"])
        exit_price_no = payoff_no
        exit_time = trades_df.iloc[-1]["created_time"]
        exit_reason = "settlement"

    final_pnl = (entry_price_no - exit_price_no) * qty

    return {
        "market_ticker": ticker,
        "had_trade": True,
        "pnl": final_pnl,
        "exit_reason": exit_reason,
        "entry_time": entry_time,
        "exit_time": exit_time,
        "entry_price_no": entry_price_no,
        "exit_price_no": exit_price_no,
        "result": result_str,
    }


def backtest_config_over_markets(cfg: StrategyConfig, tickers: list[str]) -> pd.DataFrame:
    rows = []
    for t in tickers:
        res = simulate_no_position_with_config(t, cfg)
        res["max_no_entry_price"] = cfg.max_no_entry_price
        res["take_profit_pct"] = cfg.take_profit_pct
        res["stop_loss_pct"] = cfg.stop_loss_pct
        res["max_hold_seconds"] = cfg.max_hold_seconds
        rows.append(res)
    return pd.DataFrame(rows)


def summarize_backtest(df: pd.DataFrame) -> dict[str, float]:
    trades = df[df["had_trade"]]
    if trades.empty:
        return {
            "num_markets": len(df),
            "num_trades": 0,
            "mean_pnl": 0.0,
            "median_pnl": 0.0,
            "min_pnl": 0.0,
            "max_pnl": 0.0,
            "win_rate": 0.0,
        }

    pnls = trades["pnl"]
    return {
        "num_markets": len(df),
        "num_trades": len(trades),
        "mean_pnl": pnls.mean(),
        "median_pnl": pnls.median(),
        "min_pnl": pnls.min(),
        "max_pnl": pnls.max(),
        "win_rate": (pnls > 0).mean(),
    }


In [8]:
# Grid search StrategyConfig space against historical trades
search_space = {
    "max_no_entry_price": [0.25, 0.35, 0.45, 0.55, 0.65, None],
    "take_profit_pct": [None, 0.05, 0.10, 0.20],
    "stop_loss_pct": [None, -0.05, -0.10, -0.20],
    "max_hold_seconds": [6 * 3600, 24 * 3600, 3 * 24 * 3600, None],
}

grid_rows = []
for max_no, tp, sl, hold in itertools.product(
    search_space["max_no_entry_price"],
    search_space["take_profit_pct"],
    search_space["stop_loss_pct"],
    search_space["max_hold_seconds"],
):
    cfg = StrategyConfig(
        max_no_entry_price=max_no,
        take_profit_pct=tp,
        stop_loss_pct=sl,
        max_hold_seconds=hold,
    )

    cfg_df = backtest_config_over_markets(cfg, unique_tickers)
    summary = summarize_backtest(cfg_df)
    summary.update({
        "max_no_entry_price": max_no,
        "take_profit_pct": tp,
        "stop_loss_pct": sl,
        "max_hold_seconds": hold,
    })
    grid_rows.append(summary)

grid_df = pd.DataFrame(grid_rows)
grid_df = grid_df.sort_values(["mean_pnl", "win_rate"], ascending=[False, False]).reset_index(drop=True)

grid_df.head()


NameError: name 'unique_tickers' is not defined