In [32]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [40]:
odds_df = pd.read_csv("../data_collection/updated_scripts/oddsapi_outputs/2025-11-14/cbb_odds_2025-11-14.csv")
odds_df.drop(columns=['league'], inplace=True)
odds_df.rename(columns={'price': 'odds'}, inplace=True)
odds_df['vig_prob'] = 1 / odds_df['odds']

def remove_vig_probs(df):
    df = df.copy()
    df['fair_prb'] = pd.NA

    grouped = df.groupby(['game_id', 'bookmaker', 'market'])

    for _, group in grouped:
        if len(group) < 2:
            continue
        probs = group['vig_prob']
        total = probs.sum()
        if total == 0:
            continue
        fair_probs = (probs / total).round(4)
        df.loc[group.index, 'fair_prb'] = fair_probs

    return df

odds_df = remove_vig_probs(odds_df)

# Average per-team fair probabilities across DraftKings/FanDuel/Pinnacle
odds_df = odds_df.copy()
mask = odds_df['fair_prb'].notna()
avg_by_team = (
    odds_df.loc[mask]
    .groupby(['game_id', 'market', 'team'])['fair_prb']
    .transform('mean')
    .round(4)
)
odds_df.loc[mask, 'avg_fair_prb'] = avg_by_team
odds_df.loc[~mask, 'avg_fair_prb'] = pd.NA
odds_df.head()


Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb,avg_fair_prb
0,CBB,ef3972b9d28e04dd11342475593c9d1f,2025-11-14 17:29:18 CST,DraftKings,h2h,Charleston Southern Buccaneers,1.43,,Lindenwood Lions,Charleston Southern Buccaneers,0.699301,0.6579,0.64845
1,CBB,ef3972b9d28e04dd11342475593c9d1f,2025-11-14 17:29:18 CST,DraftKings,h2h,Lindenwood Lions,2.75,,Lindenwood Lions,Charleston Southern Buccaneers,0.363636,0.3421,0.35155
2,CBB,ef3972b9d28e04dd11342475593c9d1f,2025-11-14 17:29:18 CST,DraftKings,spreads,Charleston Southern Buccaneers,1.77,-2.5,Lindenwood Lions,Charleston Southern Buccaneers,0.564972,0.5305,0.5219
3,CBB,ef3972b9d28e04dd11342475593c9d1f,2025-11-14 17:29:18 CST,DraftKings,spreads,Lindenwood Lions,2.0,2.5,Lindenwood Lions,Charleston Southern Buccaneers,0.5,0.4695,0.4781
4,CBB,ef3972b9d28e04dd11342475593c9d1f,2025-11-14 17:29:18 CST,DraftKings,totals,Over,1.83,150.5,Lindenwood Lions,Charleston Southern Buccaneers,0.546448,0.5107,0.50535


In [54]:
kalshi_winners_df = pd.read_csv("../data_collection/updated_scripts/kalshi_data_logs/2025-11-14/ncaab_winners.csv")
kalshi_totals_df = pd.read_csv("../data_collection/updated_scripts/kalshi_data_logs/2025-11-14/ncaab_totals.csv")
kalshi_spreads_df = pd.read_csv("../data_collection/updated_scripts/kalshi_data_logs/2025-11-14/ncaab_spreads.csv")

columns_to_drop = ['timestamp', 'market_type', 'yes_bid2', 'yes_ask2', 'no_bid2', 'no_ask2', 'yes_depth_bids', 'yes_depth_asks', 'no_depth_bids', 'no_depth_asks']
kalshi_winners_df.drop(columns=columns_to_drop, inplace=True)
kalshi_spreads_df.drop(columns=columns_to_drop, inplace=True)
kalshi_totals_df.drop(columns=columns_to_drop, inplace=True)

In [55]:
kalshi_winners_df.head()

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h
0,KXNCAAMBGAME-25NOV14HPUAB-UAB,High Point at UAB Winner?,active,2025-11-28T19:30:00-05:00,0.48,0.49,0.51,0.52,0.01,0.01,1135455.44,19630.0
1,KXNCAAMBGAME-25NOV14HPUAB-HP,High Point at UAB Winner?,active,2025-11-28T19:30:00-05:00,0.49,0.59,0.41,0.51,0.1,0.1,1134652.62,44166.0
2,KXNCAAMBGAME-25NOV14WASHWSU-WSU,Washington at Washington St. Winner?,active,2025-11-28T23:00:00-05:00,0.21,0.23,0.77,0.79,0.02,0.02,1250346.58,8432.0
3,KXNCAAMBGAME-25NOV14WASHWSU-WASH,Washington at Washington St. Winner?,active,2025-11-28T23:00:00-05:00,0.78,0.79,0.21,0.22,0.01,0.01,1265073.06,10269.0
4,KXNCAAMBGAME-25NOV14GONZASU-GONZ,Gonzaga at Arizona St. Winner?,active,2025-11-28T23:00:00-05:00,0.9,0.91,0.09,0.1,0.01,0.01,1237788.39,8118.0
