In [96]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import regex as re
from collections import defaultdict

things to look at: how accurate is the prematch probability when it ends (if a team has over 50% chance to win, how often do they actually win the game?)

In [97]:
date = '2025-11-22'

In [98]:
odds_df = pd.read_csv(f"../data_collection/updated_scripts/oddsapi_outputs/{date}/cbb_odds.csv")
odds_df.drop(columns=['league'], inplace=True)
odds_df.rename(columns={'price': 'odds'}, inplace=True)
odds_df['vig_prob'] = 1 / odds_df['odds']

def remove_vig_probs(df):
    df = df.copy()
    df['fair_prb'] = pd.NA

    grouped = df.groupby(['game_id', 'bookmaker', 'market'])

    for _, group in grouped:
        if len(group) < 2:
            continue
        probs = group['vig_prob']
        total = probs.sum()
        if total == 0:
            continue
        fair_probs = (probs / total).round(4)
        df.loc[group.index, 'fair_prb'] = fair_probs

    return df

odds_df = remove_vig_probs(odds_df)


odds_winners_df = odds_df[odds_df['market'] == 'h2h'].copy()
odds_spreads_df = odds_df[odds_df['market'] == 'spreads'].copy()
odds_spreads_df = odds_spreads_df.loc[(odds_spreads_df['point'].notna()) & (odds_spreads_df['point'] > 0)]
odds_totals_df  = odds_df[odds_df['market'] == 'totals'].copy()

# Average per-team fair probabilities across DraftKings/FanDuel/Pinnacle for winners_df
mask = odds_winners_df['fair_prb'].notna()
avg_by_team = (
    odds_winners_df.loc[mask]
    .groupby(['game_id', 'team'])['fair_prb']
    .transform('mean')
    .round(4)
)
odds_winners_df.loc[mask, 'avg_fair_prb'] = avg_by_team
odds_winners_df.loc[~mask, 'avg_fair_prb'] = pd.NA

mask = odds_spreads_df['fair_prb'].notna()
avg_by_point = (
    odds_spreads_df.loc[mask]
    .groupby(['game_id', 'point', 'team'])['fair_prb']
    .transform('mean')
    .round(4)
)
odds_spreads_df['avg_fair_prb'] = avg_by_point

In [99]:
kalshi_winners_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/ncaab_winners.csv")
kalshi_totals_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/ncaab_totals.csv")
kalshi_spreads_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/ncaab_spreads.csv")

kalshi_spreads_df['points'] = kalshi_spreads_df['title'].str.extract(r'over ([\d.]+) Points\?').astype(float)

columns_to_drop = ['timestamp', 'market_type', 'yes_bid2', 'yes_ask2', 'no_bid2', 'no_ask2', 'yes_depth_bids', 'yes_depth_asks', 'no_depth_bids', 'no_depth_asks']
kalshi_winners_df.drop(columns=columns_to_drop, inplace=True)
kalshi_spreads_df.drop(columns=columns_to_drop, inplace=True)
kalshi_totals_df.drop(columns=columns_to_drop, inplace=True)

In [100]:
kalshi_spreads_df.head()

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h,points
0,KXNCAAMBSPREAD-25NOV22IONAORST-ORST9,Oregon St. wins by over 9.5 Points?,active,2025-12-06T17:30:00-05:00,0.21,0.38,0.62,0.79,0.17,0.17,5847.99,0.0,9.5
1,KXNCAAMBSPREAD-25NOV22IONAORST-ORST6,Oregon St. wins by over 6.5 Points?,active,2025-12-06T17:30:00-05:00,0.36,0.41,0.59,0.64,0.05,0.05,6301.07,13.0,6.5
2,KXNCAAMBSPREAD-25NOV22IONAORST-ORST3,Oregon St. wins by over 3.5 Points?,active,2025-12-06T17:30:00-05:00,0.51,0.53,0.47,0.49,0.02,0.02,18096.51,695.0,3.5
3,KXNCAAMBSPREAD-25NOV22IONAORST-ORST18,Oregon St. wins by over 18.5 Points?,active,2025-12-06T17:30:00-05:00,0.03,0.97,0.03,0.97,0.94,0.94,1506.76,0.0,18.5
4,KXNCAAMBSPREAD-25NOV22IONAORST-ORST15,Oregon St. wins by over 15.5 Points?,active,2025-12-06T17:30:00-05:00,0.09,0.28,0.72,0.91,0.19,0.19,5411.32,0.0,15.5


In [101]:
#get names from kalshi_winners_df
def extract_teams_from_winners(title):
    title = title.replace(" Winner?", "")
    if " at " in title:
        right, left = title.split(" at ", 1)
    else:
        return pd.Series([None, None])  
    left = re.sub(r'\bSt\.$', 'St', left.strip())
    right = re.sub(r'\bSt\.$', 'St', right.strip())
    return pd.Series([left, right])

kalshi_winners_df[['home_team', 'away_team']] = kalshi_winners_df['title'].apply(extract_teams_from_winners)
unique_rows = kalshi_winners_df.drop_duplicates(subset=['home_team', 'away_team'])
flat_teams = pd.unique(unique_rows[['home_team', 'away_team']].values.ravel())
kalshi_winners_teams = flat_teams.tolist()

#get names from kalshi_totals_df
def extract_team_from_totals(title):
    title = title.replace(": Total Points", "")
    if " at " in title:
        left = title.split(" at ", 1)[0].strip()
        left = re.sub(r'\bSt\.$', 'St', left)
        return left
    return None

kalshi_totals_df['away_team'] = kalshi_totals_df['title'].apply(extract_team_from_totals)
kalshi_totals_teams = kalshi_totals_df['away_team'].dropna().drop_duplicates().tolist()

#get names from kalshi_spreads_df
def extract_team_from_spreads(title):
    if " wins by " in title:
        team = title.split(" wins by ", 1)[0].strip()
        team = re.sub(r'\bSt\.$', 'St', team)
        return team
    return None

kalshi_spreads_df['team'] = kalshi_spreads_df['title'].apply(extract_team_from_spreads)
unique_teams_spread = kalshi_spreads_df['team'].drop_duplicates()
kalshi_spreads_teams = unique_teams_spread.tolist()

In [102]:
kalshi_spreads_df.head()

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h,points,team
0,KXNCAAMBSPREAD-25NOV22IONAORST-ORST9,Oregon St. wins by over 9.5 Points?,active,2025-12-06T17:30:00-05:00,0.21,0.38,0.62,0.79,0.17,0.17,5847.99,0.0,9.5,Oregon St
1,KXNCAAMBSPREAD-25NOV22IONAORST-ORST6,Oregon St. wins by over 6.5 Points?,active,2025-12-06T17:30:00-05:00,0.36,0.41,0.59,0.64,0.05,0.05,6301.07,13.0,6.5,Oregon St
2,KXNCAAMBSPREAD-25NOV22IONAORST-ORST3,Oregon St. wins by over 3.5 Points?,active,2025-12-06T17:30:00-05:00,0.51,0.53,0.47,0.49,0.02,0.02,18096.51,695.0,3.5,Oregon St
3,KXNCAAMBSPREAD-25NOV22IONAORST-ORST18,Oregon St. wins by over 18.5 Points?,active,2025-12-06T17:30:00-05:00,0.03,0.97,0.03,0.97,0.94,0.94,1506.76,0.0,18.5,Oregon St
4,KXNCAAMBSPREAD-25NOV22IONAORST-ORST15,Oregon St. wins by over 15.5 Points?,active,2025-12-06T17:30:00-05:00,0.09,0.28,0.72,0.91,0.19,0.19,5411.32,0.0,15.5,Oregon St


In [103]:
odds_df.head()

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb
0,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,h2h,Saint Peter's Peacocks,1.25,,Saint Peter's Peacocks,UMass Lowell River Hawks,0.8,0.7525
1,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,h2h,UMass Lowell River Hawks,3.8,,Saint Peter's Peacocks,UMass Lowell River Hawks,0.263158,0.2475
2,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,spreads,Saint Peter's Peacocks,1.87,-2.5,Saint Peter's Peacocks,UMass Lowell River Hawks,0.534759,0.5
3,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,spreads,UMass Lowell River Hawks,1.87,2.5,Saint Peter's Peacocks,UMass Lowell River Hawks,0.534759,0.5
4,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,FanDuel,h2h,Saint Peter's Peacocks,1.25,,Saint Peter's Peacocks,UMass Lowell River Hawks,0.8,0.7549


In [104]:
odds_teams_by_market = odds_df.groupby('market')['team'].unique().to_dict()

def fuzzy_match_kalshi_to_odds(kalshi_teams, odds_team_names):
    matched_kalshi = []
    matched_odds = []

    kalshi_sorted = sorted(kalshi_teams, key=lambda x: x[0] if x else '')
    remaining_odds = sorted(odds_team_names.tolist().copy(), reverse=True)

    for kalshi_name in kalshi_sorted:
        candidates = []
        for odds_name in remaining_odds:
            if kalshi_name in odds_name:
                candidates.append(odds_name)
        if len(candidates) == 1:
            matched_kalshi.append(kalshi_name)
            matched_odds.append(candidates[0])

    return matched_kalshi, matched_odds

matched_data = {}

# Winners / h2h
matched_kalshi_h2h, matched_odds_h2h = fuzzy_match_kalshi_to_odds(
    kalshi_winners_teams,
    odds_teams_by_market.get('h2h', [])
)

# Spreads
matched_kalshi_spreads, matched_odds_spreads = fuzzy_match_kalshi_to_odds(
    kalshi_spreads_teams,
    odds_teams_by_market.get('spreads', [])
)

# Totals (match only Over/Under)
totals_odds_df = odds_df[odds_df['market'] == 'totals']
odds_totals_teams = pd.unique(totals_odds_df[['home_team', 'away_team']].values.ravel())
matched_kalshi_totals, matched_odds_totals = fuzzy_match_kalshi_to_odds(
    kalshi_totals_teams,
    odds_totals_teams
)

matched_names = {
    'h2h': {
        'kalshi': matched_kalshi_h2h,
        'odds': matched_odds_h2h
    },
    'spreads': {
        'kalshi': matched_kalshi_spreads,
        'odds': matched_odds_spreads
    },
    'totals': {
        'kalshi': matched_kalshi_totals,
        'odds': matched_odds_totals
    }
}


In [105]:
assert(len(matched_names['h2h']['kalshi']) == len(matched_names['h2h']['odds']))
assert(len(matched_names['spreads']['kalshi']) == len(matched_names['spreads']['odds']))
assert(len(matched_names['totals']['kalshi']) == len(matched_names['totals']['odds']))

In [106]:
odds_df[odds_df['market'] == 'spreads']

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb
2,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,spreads,Saint Peter's Peacocks,1.87,-2.5,Saint Peter's Peacocks,UMass Lowell River Hawks,0.534759,0.5
3,CBB,86b646e4454bb759c93c31c46d156c2a,2025-11-22 12:47:00 CST,DraftKings,spreads,UMass Lowell River Hawks,1.87,2.5,Saint Peter's Peacocks,UMass Lowell River Hawks,0.534759,0.5
8,CBB,42de858124bbb1f919284da58a0f8b7f,2025-11-22 13:00:00 CST,FanDuel,spreads,Central Michigan Chippewas,1.85,8.5,Marquette Golden Eagles,Central Michigan Chippewas,0.540541,0.5053
9,CBB,42de858124bbb1f919284da58a0f8b7f,2025-11-22 13:00:00 CST,FanDuel,spreads,Marquette Golden Eagles,1.89,-8.5,Marquette Golden Eagles,Central Michigan Chippewas,0.529101,0.4947
14,CBB,42de858124bbb1f919284da58a0f8b7f,2025-11-22 13:00:00 CST,DraftKings,spreads,Central Michigan Chippewas,1.95,8.5,Marquette Golden Eagles,Central Michigan Chippewas,0.512821,0.48
...,...,...,...,...,...,...,...,...,...,...,...,...
483,CBB,e4039c727ed3f9b6e78f7cc5b76059fd,2025-11-22 21:30:07 CST,FanDuel,spreads,UC Irvine Anteaters,1.87,-1.5,UC Irvine Anteaters,Northern Iowa Panthers,0.534759,0.5105
488,CBB,e4039c727ed3f9b6e78f7cc5b76059fd,2025-11-22 21:30:07 CST,DraftKings,spreads,Northern Iowa Panthers,2.00,1.5,UC Irvine Anteaters,Northern Iowa Panthers,0.500000,0.4778
489,CBB,e4039c727ed3f9b6e78f7cc5b76059fd,2025-11-22 21:30:07 CST,DraftKings,spreads,UC Irvine Anteaters,1.83,-1.5,UC Irvine Anteaters,Northern Iowa Panthers,0.546448,0.5222
494,CBB,e4039c727ed3f9b6e78f7cc5b76059fd,2025-11-22 21:30:07 CST,Pinnacle,spreads,Northern Iowa Panthers,1.92,2.0,UC Irvine Anteaters,Northern Iowa Panthers,0.520833,0.5013


In [107]:
odds_winners_df = odds_winners_df[
    odds_winners_df['home_team'].isin(matched_names['h2h']['odds']) |
    odds_winners_df['away_team'].isin(matched_names['h2h']['odds'])
].drop_duplicates(subset='team').reset_index(drop=True)

kalshi_winners_df = kalshi_winners_df[
    kalshi_winners_df['home_team'].isin(matched_names['h2h']['kalshi']) |
    kalshi_winners_df['away_team'].isin(matched_names['h2h']['kalshi'])
].reset_index(drop=True)

odds_spreads_df = odds_spreads_df[odds_spreads_df['team'].isin(matched_names['spreads']['odds'])].reset_index(drop=True)
kalshi_spreads_df = kalshi_spreads_df[kalshi_spreads_df['team'].isin(matched_names['spreads']['kalshi'])].reset_index(drop=True)

odds_totals_df = odds_totals_df[
    odds_totals_df['home_team'].isin(matched_names['totals']['odds']) |
    odds_totals_df['away_team'].isin(matched_names['totals']['odds'])
].reset_index(drop=True)
kalshi_totals_df = kalshi_totals_df[kalshi_totals_df['away_team'].isin(matched_names['totals']['kalshi'])].reset_index(drop=True)


In [108]:
kalshi_winners_df

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h,home_team,away_team
0,KXNCAAMBGAME-25NOV22IONAORST-ORST,Iona at Oregon St. Winner?,active,2025-12-06T17:30:00-05:00,0.61,0.64,0.36,0.39,0.03,0.03,16758.13,2524.0,Oregon St,Iona
1,KXNCAAMBGAME-25NOV22IONAORST-IONA,Iona at Oregon St. Winner?,active,2025-12-06T17:30:00-05:00,0.36,0.38,0.62,0.64,0.02,0.02,19041.61,5842.0,Oregon St,Iona
2,KXNCAAMBGAME-25NOV22UNCOPORT-UNCO,Northern Colorado at Portland Winner?,active,2025-12-06T20:00:00-05:00,0.59,0.61,0.39,0.41,0.02,0.02,1248727.1,5183.0,Portland,Northern Colorado
3,KXNCAAMBGAME-25NOV22UNCOPORT-PORT,Northern Colorado at Portland Winner?,active,2025-12-06T20:00:00-05:00,0.39,0.41,0.59,0.61,0.02,0.02,1239398.31,4635.0,Portland,Northern Colorado
4,KXNCAAMBGAME-25NOV22MILWWICH-WICH,Milwaukee at Wichita St. Winner?,active,2025-12-06T19:00:00-05:00,0.84,0.85,0.15,0.16,0.01,0.01,1127889.28,10769.0,Wichita St,Milwaukee
5,KXNCAAMBGAME-25NOV22MILWWICH-MILW,Milwaukee at Wichita St. Winner?,active,2025-12-06T19:00:00-05:00,0.16,0.18,0.82,0.84,0.02,0.02,1138653.97,3164.0,Wichita St,Milwaukee
6,KXNCAAMBGAME-25NOV22WEBBRICH-WEBB,Gardner-Webb at Richmond Winner?,active,2025-12-06T19:00:00-05:00,0.04,0.06,0.94,0.96,0.02,0.02,1096378.39,2833.0,Richmond,Gardner-Webb
7,KXNCAAMBGAME-25NOV22WEBBRICH-RICH,Gardner-Webb at Richmond Winner?,active,2025-12-06T19:00:00-05:00,0.94,0.96,0.04,0.06,0.02,0.02,1098024.6,807.0,Richmond,Gardner-Webb
8,KXNCAAMBGAME-25NOV22SFMINN-SF,San Francisco at Minnesota Winner?,active,2025-12-06T17:30:00-05:00,0.51,0.53,0.47,0.49,0.02,0.02,1615832.65,13872.0,Minnesota,San Francisco
9,KXNCAAMBGAME-25NOV22SFMINN-MINN,San Francisco at Minnesota Winner?,active,2025-12-06T17:30:00-05:00,0.47,0.49,0.51,0.53,0.02,0.02,1616272.84,7813.0,Minnesota,San Francisco


In [109]:
odds_winners_df

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb,avg_fair_prb
0,CBB,85ea954d315642c472486a5a94975776,2025-11-22 15:00:00 CST,FanDuel,h2h,Duquesne Dukes,1.25,,Duquesne Dukes,Northeastern Huskies,0.8,0.7619,0.757967
1,CBB,85ea954d315642c472486a5a94975776,2025-11-22 15:00:00 CST,FanDuel,h2h,Northeastern Huskies,4.0,,Duquesne Dukes,Northeastern Huskies,0.25,0.2381,0.242033
2,CBB,d97a9e03cf58a3576404a506bb0c34f8,2025-11-22 15:00:00 CST,FanDuel,h2h,Penn State Nittany Lions,2.52,,Penn State Nittany Lions,Providence Friars,0.396825,0.3793,0.380333
3,CBB,d97a9e03cf58a3576404a506bb0c34f8,2025-11-22 15:00:00 CST,FanDuel,h2h,Providence Friars,1.54,,Penn State Nittany Lions,Providence Friars,0.649351,0.6207,0.619667
4,CBB,8b5a99d70215524f8e73004ff79b9b9d,2025-11-22 16:00:00 CST,FanDuel,h2h,Nevada Wolf Pack,1.44,,Nevada Wolf Pack,UC Santa Barbara Gauchos,0.694444,0.6636,0.6628
5,CBB,8b5a99d70215524f8e73004ff79b9b9d,2025-11-22 16:00:00 CST,FanDuel,h2h,UC Santa Barbara Gauchos,2.84,,Nevada Wolf Pack,UC Santa Barbara Gauchos,0.352113,0.3364,0.3372
6,CBB,e4c3a0e6724252afd55ec72e1e1212bc,2025-11-22 16:30:00 CST,FanDuel,h2h,Iona Gaels,2.5,,Oregon St Beavers,Iona Gaels,0.4,0.3827,0.3823
7,CBB,e4c3a0e6724252afd55ec72e1e1212bc,2025-11-22 16:30:00 CST,FanDuel,h2h,Oregon St Beavers,1.55,,Oregon St Beavers,Iona Gaels,0.645161,0.6173,0.6177
8,CBB,1b4ec87bb928f533ff03259322157af3,2025-11-22 16:30:00 CST,FanDuel,h2h,Minnesota Golden Gophers,1.91,,Minnesota Golden Gophers,San Francisco Dons,0.52356,0.5,0.492167
9,CBB,1b4ec87bb928f533ff03259322157af3,2025-11-22 16:30:00 CST,FanDuel,h2h,San Francisco Dons,1.91,,Minnesota Golden Gophers,San Francisco Dons,0.52356,0.5,0.507833


In [110]:
# Specify the columns to extract
kalshi_cols = ['ticker', 'event_start_time', 'yes_bid', 'yes_ask', 'no_bid', 'no_ask', 'home_team', 'away_team']
odds_cols = ['market', 'team', 'home_team', 'away_team', 'avg_fair_prb']

# Rename overlapping columns in odds to prevent clashes
odds_subset = odds_winners_df[odds_cols].rename(columns={
    'home_team': 'odds_home_team',
    'away_team': 'odds_away_team'
})

kalshi_subset = kalshi_winners_df[kalshi_cols]

combined_rows = []

# Loop through Kalshi rows
for _, kalshi_row in kalshi_subset.iterrows():
    kalshi_home = kalshi_row['home_team']
    for _, odds_row in odds_subset.iterrows():
        odds_home = odds_row['odds_home_team']
        if kalshi_home in odds_home:
            combined_row = pd.concat([kalshi_row, odds_row])
            combined_rows.append(combined_row)

# Create final DataFrame
combined_df = pd.DataFrame(combined_rows)
combined_df = combined_df.reset_index(drop=True)
filtered_winners_df = combined_df[(combined_df.index % 4) .isin([1, 2])].reset_index(drop=True)


In [111]:
combined_df

Unnamed: 0,ticker,event_start_time,yes_bid,yes_ask,no_bid,no_ask,home_team,away_team,market,team,odds_home_team,odds_away_team,avg_fair_prb
0,KXNCAAMBGAME-25NOV22IONAORST-ORST,2025-12-06T17:30:00-05:00,0.61,0.64,0.36,0.39,Oregon St,Iona,h2h,Iona Gaels,Oregon St Beavers,Iona Gaels,0.3823
1,KXNCAAMBGAME-25NOV22IONAORST-ORST,2025-12-06T17:30:00-05:00,0.61,0.64,0.36,0.39,Oregon St,Iona,h2h,Oregon St Beavers,Oregon St Beavers,Iona Gaels,0.6177
2,KXNCAAMBGAME-25NOV22IONAORST-IONA,2025-12-06T17:30:00-05:00,0.36,0.38,0.62,0.64,Oregon St,Iona,h2h,Iona Gaels,Oregon St Beavers,Iona Gaels,0.3823
3,KXNCAAMBGAME-25NOV22IONAORST-IONA,2025-12-06T17:30:00-05:00,0.36,0.38,0.62,0.64,Oregon St,Iona,h2h,Oregon St Beavers,Oregon St Beavers,Iona Gaels,0.6177
4,KXNCAAMBGAME-25NOV22UNCOPORT-UNCO,2025-12-06T20:00:00-05:00,0.59,0.61,0.39,0.41,Portland,Northern Colorado,h2h,N Colorado Bears,Portland Pilots,N Colorado Bears,0.604967
5,KXNCAAMBGAME-25NOV22UNCOPORT-UNCO,2025-12-06T20:00:00-05:00,0.59,0.61,0.39,0.41,Portland,Northern Colorado,h2h,Portland Pilots,Portland Pilots,N Colorado Bears,0.395033
6,KXNCAAMBGAME-25NOV22UNCOPORT-PORT,2025-12-06T20:00:00-05:00,0.39,0.41,0.59,0.61,Portland,Northern Colorado,h2h,N Colorado Bears,Portland Pilots,N Colorado Bears,0.604967
7,KXNCAAMBGAME-25NOV22UNCOPORT-PORT,2025-12-06T20:00:00-05:00,0.39,0.41,0.59,0.61,Portland,Northern Colorado,h2h,Portland Pilots,Portland Pilots,N Colorado Bears,0.395033
8,KXNCAAMBGAME-25NOV22MILWWICH-WICH,2025-12-06T19:00:00-05:00,0.84,0.85,0.15,0.16,Wichita St,Milwaukee,h2h,Milwaukee Panthers,Wichita St Shockers,Milwaukee Panthers,0.166567
9,KXNCAAMBGAME-25NOV22MILWWICH-WICH,2025-12-06T19:00:00-05:00,0.84,0.85,0.15,0.16,Wichita St,Milwaukee,h2h,Wichita St Shockers,Wichita St Shockers,Milwaukee Panthers,0.833433


In [112]:
filtered_winners_df.loc[(filtered_winners_df['avg_fair_prb'] > filtered_winners_df['yes_ask'] + 0.02) |
                        (filtered_winners_df['avg_fair_prb'] < filtered_winners_df['yes_bid'] - 0.02)]

Unnamed: 0,ticker,event_start_time,yes_bid,yes_ask,no_bid,no_ask,home_team,away_team,market,team,odds_home_team,odds_away_team,avg_fair_prb
2,KXNCAAMBGAME-25NOV22UNCOPORT-UNCO,2025-12-06T20:00:00-05:00,0.59,0.61,0.39,0.41,Portland,Northern Colorado,h2h,Portland Pilots,Portland Pilots,N Colorado Bears,0.395033
3,KXNCAAMBGAME-25NOV22UNCOPORT-PORT,2025-12-06T20:00:00-05:00,0.39,0.41,0.59,0.61,Portland,Northern Colorado,h2h,N Colorado Bears,Portland Pilots,N Colorado Bears,0.604967
6,KXNCAAMBGAME-25NOV22WEBBRICH-WEBB,2025-12-06T19:00:00-05:00,0.04,0.06,0.94,0.96,Richmond,Gardner-Webb,h2h,Richmond Spiders,Richmond Spiders,Gardner-Webb Bulldogs,0.9427
7,KXNCAAMBGAME-25NOV22WEBBRICH-RICH,2025-12-06T19:00:00-05:00,0.94,0.96,0.04,0.06,Richmond,Gardner-Webb,h2h,Gardner-Webb Bulldogs,Richmond Spiders,Gardner-Webb Bulldogs,0.0573
12,KXNCAAMBGAME-25NOV22PROVPSU-PSU,2025-12-06T16:00:00-05:00,0.4,0.41,0.59,0.6,Penn St,Providence,h2h,Providence Friars,Penn State Nittany Lions,Providence Friars,0.619667
13,KXNCAAMBGAME-25NOV22PROVPSU-PROV,2025-12-06T16:00:00-05:00,0.59,0.6,0.4,0.41,Penn St,Providence,h2h,Penn State Nittany Lions,Penn State Nittany Lions,Providence Friars,0.380333


In [113]:
# Specify the columns to extract
kalshi_cols = ['ticker', 'event_start_time', 'yes_bid', 'yes_ask', 'no_bid', 'no_ask', 'team', 'points']
odds_cols = ['market', 'team', 'home_team', 'away_team', 'avg_fair_prb', 'point']

# Rename overlapping columns in odds to prevent clashes
odds_subset = odds_spreads_df[odds_cols].rename(columns={
    'team': 'odds_team'
})

kalshi_subset = kalshi_spreads_df[kalshi_cols]

combined_rows = []

# Loop through Kalshi rows
for _, kalshi_row in kalshi_subset.iterrows():
    kalshi_home = kalshi_row['team']
    for _, odds_row in odds_subset.iterrows():
        odds_home = odds_row['odds_team']
        if (kalshi_home in odds_home) and (kalshi_row['points'] == odds_row['point']): #the point spread should be the same
            combined_row = pd.concat([kalshi_row, odds_row])
            combined_rows.append(combined_row)

# Create final DataFrame
combined_df = pd.DataFrame(combined_rows)
combined_df = combined_df.reset_index(drop=True)
filtered_spreads_df = combined_df[(combined_df.index % 4) .isin([1, 2])]


In [114]:
filtered_spreads_df

Unnamed: 0,ticker,event_start_time,yes_bid,yes_ask,no_bid,no_ask,team,points,market,odds_team,home_team,away_team,avg_fair_prb,point
1,KXNCAAMBSPREAD-25NOV22IONAORST-IONA3,2025-12-06T17:30:00-05:00,0.22,0.41,0.59,0.78,Iona,3.5,spreads,Iona Gaels,Oregon St Beavers,Iona Gaels,0.497833,3.5
2,KXNCAAMBSPREAD-25NOV22IONAORST-IONA3,2025-12-06T17:30:00-05:00,0.22,0.41,0.59,0.78,Iona,3.5,spreads,Iona Gaels,Oregon St Beavers,Iona Gaels,0.497833,3.5
5,KXNCAAMBSPREAD-25NOV22UNCOPORT-PORT3,2025-12-06T20:00:00-05:00,0.22,0.32,0.68,0.78,Portland,3.5,spreads,Portland Pilots,Portland Pilots,N Colorado Bears,0.502167,3.5
6,KXNCAAMBSPREAD-25NOV22SFMINN-MINN1,2025-12-06T17:30:00-05:00,0.43,0.47,0.53,0.57,Minnesota,1.5,spreads,Minnesota Golden Gophers,Minnesota Golden Gophers,San Francisco Dons,0.52095,1.5
