# NBA High-Confidence Underdog Mispricing (T-6h)
Research-only summary tables and diagnostics.

In [4]:
import json
from pathlib import Path

import numpy as np
import pandas as pd


In [5]:
# Paths
repo_root = Path.cwd().resolve().parent
data_path = repo_root / 'data' / 'nba_low_conf_dataset.csv'
calibration_path = repo_root / 'data' / 'calibration_table.csv'
runup_path = repo_root / 'data' / 'runup_diagnostics.json'

data_path


WindowsPath('C:/Users/Adam Nhan/prediction-market-money-printer/data/nba_low_conf_dataset.csv')

In [6]:
df = pd.read_csv(data_path)
print(f'total_rows={len(df)}')


total_rows=1347


In [7]:
# Cohort definition
df['settle_yes'] = pd.to_numeric(df['settle_yes'], errors='coerce')
df['p_t6h'] = pd.to_numeric(df['p_t6h'], errors='coerce')

cohort = df[(df['high_conf'] == 1) & (df['is_underdog'] == 1)].copy()
print(f'cohort_rows_raw={len(cohort)}')

before = len(cohort)
cohort = cohort.dropna(subset=['p_t6h', 'settle_yes']).copy()
dropped = before - len(cohort)
print(f'cohort_rows_dropped_missing={dropped}')
print(f'cohort_rows_final={len(cohort)}')


cohort_rows_raw=95
cohort_rows_dropped_missing=0
cohort_rows_final=95


In [8]:
# Deliverable A: Calibration table
bins = [0.05, 0.10, 0.15, 0.20, 0.25]
labels = ['[0.05,0.10)', '[0.10,0.15)', '[0.15,0.20)', '[0.20,0.25)']

cohort['bin'] = pd.cut(cohort['p_t6h'], bins=bins, right=False, labels=labels)

in_bin = cohort[cohort['bin'].notna()].copy()
bin_counts = in_bin['bin'].value_counts().reindex(labels, fill_value=0)
outside_bins = len(cohort) - bin_counts.sum()
print(f'cohort_in_bins={int(bin_counts.sum())}')
print(f'cohort_outside_bins={int(outside_bins)}')

calib = (
    in_bin.groupby('bin', observed=True)
    .agg(
        N=('p_t6h', 'size'),
        mean_p=('p_t6h', 'mean'),
        realized_win_rate=('settle_yes', 'mean'),
    )
    .reset_index()
)
calib['edge'] = calib['realized_win_rate'] - calib['mean_p']

print('\nCalibration table:')
print(calib.to_string(index=False))

calib.to_csv(calibration_path, index=False)
print(f'Wrote {calibration_path}')

# Aggregate line
N_total = len(cohort)
avg_implied_p = cohort['p_t6h'].mean()
avg_realized = cohort['settle_yes'].mean()
raw_ev_per_contract = avg_realized - avg_implied_p

print('\nAggregate cohort:')
print(f'N_total={N_total}')
print(f'avg_implied_p={avg_implied_p:.6f}')
print(f'avg_realized={avg_realized:.6f}')
print(f'raw_ev_per_contract={raw_ev_per_contract:.6f}')


cohort_in_bins=95
cohort_outside_bins=0

Calibration table:
        bin  N   mean_p  realized_win_rate      edge
[0.05,0.10)  2 0.090000           0.000000 -0.090000
[0.10,0.15) 30 0.125667           0.066667 -0.059000
[0.15,0.20) 49 0.173265           0.163265 -0.010000
[0.20,0.25) 14 0.200000           0.357143  0.157143
Wrote C:\Users\Adam Nhan\prediction-market-money-printer\data\calibration_table.csv

Aggregate cohort:
N_total=95
avg_implied_p=0.160421
avg_realized=0.157895
raw_ev_per_contract=-0.002526


In [9]:
# Deliverable B: Convexity diagnostics (runups, no exits)
def pct_ge(series: pd.Series, thresh: float) -> float:
    if len(series) == 0:
        return float('nan')
    return float((series >= thresh).mean())

def runup_stats(series: pd.Series) -> dict:
    return {
        'mean': float(series.mean()) if len(series) else float('nan'),
        'median': float(series.median()) if len(series) else float('nan'),
        'p90': float(series.quantile(0.90)) if len(series) else float('nan'),
        'p95': float(series.quantile(0.95)) if len(series) else float('nan'),
    }

runup = pd.to_numeric(cohort['max_runup'], errors='coerce')
cohort = cohort.assign(max_runup=runup).dropna(subset=['max_runup'])

overall = {
    'pct_runup_ge_0.10': pct_ge(cohort['max_runup'], 0.10),
    'pct_runup_ge_0.20': pct_ge(cohort['max_runup'], 0.20),
    'stats': runup_stats(cohort['max_runup']),
}

winners = cohort[cohort['settle_yes'] == 1]
losers = cohort[cohort['settle_yes'] == 0]

winners_diag = {
    'pct_runup_ge_0.10': pct_ge(winners['max_runup'], 0.10),
    'pct_runup_ge_0.20': pct_ge(winners['max_runup'], 0.20),
    'stats': runup_stats(winners['max_runup']),
}

losers_diag = {
    'pct_runup_ge_0.10': pct_ge(losers['max_runup'], 0.10),
    'pct_runup_ge_0.20': pct_ge(losers['max_runup'], 0.20),
    'stats': runup_stats(losers['max_runup']),
}

print('Overall runup diagnostics:')
print(overall)
print('\nWinners runup diagnostics:')
print(winners_diag)
print('\nLosers runup diagnostics:')
print(losers_diag)

diagnostics = {
    'overall': overall,
    'winners': winners_diag,
    'losers': losers_diag,
}

runup_path.write_text(json.dumps(diagnostics, indent=2, sort_keys=True))
print(f'Wrote {runup_path}')


Overall runup diagnostics:
{'pct_runup_ge_0.10': 0.12631578947368421, 'pct_runup_ge_0.20': 0.042105263157894736, 'stats': {'mean': 0.05347368421052628, 'median': 0.02, 'p90': 0.12600000000000008, 'p95': 0.1759999999999999}}

Winners runup diagnostics:
{'pct_runup_ge_0.10': 0.06666666666666667, 'pct_runup_ge_0.20': 0.06666666666666667, 'stats': {'mean': 0.07466666666666665, 'median': 0.03, 'p90': 0.046, 'p95': 0.2749999999999992}}

Losers runup diagnostics:
{'pct_runup_ge_0.10': 0.1375, 'pct_runup_ge_0.20': 0.0375, 'stats': {'mean': 0.04949999999999997, 'median': 0.01999999999999995, 'p90': 0.13, 'p95': 0.17099999999999996}}
Wrote C:\Users\Adam Nhan\prediction-market-money-printer\data\runup_diagnostics.json
