In [23]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import regex as re
import math
from collections import defaultdict
import pytz
import shin

from scipy.stats import norm
from scipy.optimize import brentq
from scipy.special import expit, logit

from rapidfuzz.fuzz import ratio

things to look at: how accurate is the prematch probability when it ends (if a team has over 50% chance to win, how often do they actually win the game?)

Todo:

bets for totals markets

api for event outcome to backtest and look at distributions of prematch probabilities to outcome

bet again during halftime break

automating daily report

automate bet placements

rerunning fetchers and jupyter for non-filled orders periodically

In [24]:
date = '2025-12-05'
odds_sport = 'nba' #cbb, cfb, nba, nfl
kalshi_sport = 'nba' #ncaab, ncaaf, nba, nfl

In [25]:
#betus good for nba, pinnacle, betonline best for everything, fanduel pretty good

odds_df = pd.read_csv(f"../data_collection/updated_scripts/oddsapi_outputs/{date}/{odds_sport}_odds.csv")
odds_df.drop(columns=['league'], inplace=True)
odds_df.rename(columns={'price': 'odds'}, inplace=True)

odds_df['vig_prob'] = 1 / odds_df['odds']

def remove_vig_probs_add(df):
    df = df.copy()
    df['fair_prb'] = np.nan

    grouped = df.groupby(['game_id', 'bookmaker', 'market'])

    for _, group in grouped:
        if len(group) < 2:
            continue
        probs = group['vig_prob']
        total = probs.sum()
        if total == 0:
            continue
        fair_probs = (probs / total).round(4)
        df.loc[group.index, 'fair_prb'] = fair_probs

    return df

def remove_vig_probs_shin(df):
    df = df.copy()
    df['fair_prb'] = np.nan

    grouped = df.groupby(['game_id', 'bookmaker', 'market'])

    for _, group in grouped:
        if len(group) < 2:
            continue
        odds = group['odds'].values
        fair_probs = shin.calculate_implied_probabilities(odds)
        df.loc[group.index, 'fair_prb'] = fair_probs

    return df

def devig_probit(p1, p2):
    """if p1 <= 0 or p2 <= 0 or p1 >= 1 or p2 >= 1:
        total = p1 + p2
        return p1 / total, p2 / total
    if p1 + p2 <= 1:
        total = p1 + p2
        return p1 / total, p2 / total"""
  
    z1 = norm.ppf(p1)
    z2 = norm.ppf(p2)
    f = lambda lam: norm.cdf(z1 - lam) + norm.cdf(z2 - lam) - 1
    lam = brentq(f, -15, 15)
    
    q1 = norm.cdf(z1 - lam)
    q2 = norm.cdf(z2 - lam)
    return q1, q2

def remove_vig_probs_probit(df):
    df = df.copy()
    df['fair_prb'] = np.nan
    grouped = df.groupby(['game_id', 'bookmaker', 'market'])
    for _, group in grouped:
        if len(group) != 2:
            continue
        p1, p2 = group['vig_prob'].values
        q1, q2 = devig_probit(p1, p2)
        df.loc[group.index, 'fair_prb'] = [q1, q2]
    return df

def devig_logit(p1, p2):
    z1 = logit(p1)
    z2 = logit(p2)
    # Solve for λ such that logistic(z1 - λ) + logistic(z2 - λ) = 1
    f = lambda lam: expit(z1 - lam) + expit(z2 - lam) - 1
    lam = brentq(f, -50, 50)
    q1 = expit(z1 - lam)
    q2 = expit(z2 - lam)
    return q1, q2

def remove_vig_probs_logit(df):
    df = df.copy()
    df['fair_prb'] = np.nan
    grouped = df.groupby(['game_id', 'bookmaker', 'market'])
    for _, group in grouped:
        if len(group) != 2:
            continue
        p1, p2 = group['vig_prob'].values
        q1, q2 = devig_logit(p1, p2)
        df.loc[group.index, 'fair_prb'] = [q1, q2]
    return df

odds_df = remove_vig_probs_logit(odds_df)


odds_winners_df = odds_df[odds_df['market'] == 'h2h'].copy()
odds_spreads_df = odds_df[odds_df['market'] == 'spreads'].copy()
odds_spreads_df = odds_spreads_df.loc[(odds_spreads_df['point'].notna()) & (odds_spreads_df['point'] < 0)]
odds_totals_df  = odds_df[odds_df['market'] == 'totals'].copy()

# Average per-team fair probabilities across DraftKings/FanDuel/Pinnacle for winners_df
WEIGHTS = {
    "Pinnacle": 0.3,
    "BetOnline.ag": 0.3,
    "BetUS": 0.2,
    "FanDuel": 0.2
}

def wavg(x, df):
    idx = x.index
    bookmakers = df.loc[idx, 'bookmaker']
    w = np.array([WEIGHTS[b] for b in bookmakers])
    return np.average(x.values, weights=w)

mask = odds_winners_df['fair_prb'].notna()
avg_by_team = (
    odds_winners_df.loc[mask]
    .groupby(['game_id', 'team'])['fair_prb']
    .transform(lambda x: wavg(x, odds_winners_df))
    .round(4)
)
odds_winners_df.loc[mask, 'avg_fair_prb'] = avg_by_team
odds_winners_df.loc[~mask, 'avg_fair_prb'] = pd.NA

#Average fair probabilities for spreads for same game, point spread, and team
mask = odds_spreads_df['fair_prb'].notna()
avg_by_point = (
    odds_spreads_df.loc[mask]
    .groupby(['game_id', 'point', 'team'])['fair_prb']
    .transform(lambda x: wavg(x, odds_spreads_df))
    .round(4)
)
odds_spreads_df['avg_fair_prb'] = avg_by_point

#Average fair probabilities for totals for same game, point spread, direction (Over/Under)
mask = odds_totals_df['fair_prb'].notna()
avg_by_tot_point = (
    odds_totals_df.loc[mask]
    .groupby(['game_id', 'point', 'team'])['fair_prb']
    .transform(lambda x: wavg(x, odds_totals_df))
    .round(4)
)
odds_totals_df['avg_fair_prb'] = avg_by_tot_point

In [26]:
kalshi_winners_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_winners.csv")
kalshi_totals_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_totals.csv")
kalshi_spreads_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_spreads.csv")

if (kalshi_sport == 'ncaaf') | (kalshi_sport == 'nfl'):
    kalshi_spreads_df['points'] = kalshi_spreads_df['title'].str.extract(r'over ([\d.]+) points\?').astype(float)
    kalshi_totals_df["points"] = kalshi_totals_df["ticker"].str.extract(r"-([0-9.]+)$").astype(float)
elif (kalshi_sport == 'ncaab') | (kalshi_sport == 'ncaabm') | (kalshi_sport == 'ncaabw') | (kalshi_sport == 'nba'):
    kalshi_spreads_df['points'] = kalshi_spreads_df['title'].str.extract(r'over ([\d.]+) Points\?').astype(float)
    kalshi_totals_df["points"] = kalshi_totals_df["ticker"].str.extract(r"-([0-9.]+)$").astype(float)

columns_to_drop = ['timestamp', 'market_type', 'yes_bid2', 'yes_ask2', 'no_bid2', 'no_ask2', 'yes_depth_bids', 'yes_depth_asks', 'no_depth_bids', 'no_depth_asks']
kalshi_winners_df.drop(columns=columns_to_drop, inplace=True)
kalshi_spreads_df.drop(columns=columns_to_drop, inplace=True)
kalshi_totals_df.drop(columns=columns_to_drop, inplace=True)

In [27]:
odds_totals_df.head()

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb,avg_fair_prb
4,NBA,61feabe0b59a9dba9be076c59ecd125d,2025-12-05 18:10:00 CST,FanDuel,totals,Over,1.93,225.5,Boston Celtics,Los Angeles Lakers,0.518135,0.494507,0.4978
5,NBA,61feabe0b59a9dba9be076c59ecd125d,2025-12-05 18:10:00 CST,FanDuel,totals,Under,1.89,225.5,Boston Celtics,Los Angeles Lakers,0.529101,0.505493,0.5022
10,NBA,61feabe0b59a9dba9be076c59ecd125d,2025-12-05 18:10:00 CST,BetOnline.ag,totals,Over,1.91,225.5,Boston Celtics,Los Angeles Lakers,0.52356,0.5,0.4978
11,NBA,61feabe0b59a9dba9be076c59ecd125d,2025-12-05 18:10:00 CST,BetOnline.ag,totals,Under,1.91,225.5,Boston Celtics,Los Angeles Lakers,0.52356,0.5,0.5022
16,NBA,61feabe0b59a9dba9be076c59ecd125d,2025-12-05 18:10:00 CST,BetUS,totals,Over,1.91,225.0,Boston Celtics,Los Angeles Lakers,0.52356,0.5,0.5


In [28]:
#get names from kalshi_winners_df
def extract_teams_from_winners(title):
    title = title.replace(" Winner?", "")
    if " at " in title:
        right, left = title.split(" at ", 1)
    elif " vs " in title:
        right, left = title.split(" vs ", 1)
    else:
        return pd.Series([None, None])  
    left = re.sub(r'\bSt\.$', 'St', left.strip())
    right = re.sub(r'\bSt\.$', 'St', right.strip())
    return pd.Series([left, right])

kalshi_winners_df[['home_team', 'away_team']] = kalshi_winners_df['title'].apply(extract_teams_from_winners)
unique_rows = kalshi_winners_df.drop_duplicates(subset=['home_team', 'away_team'])
flat_teams = pd.unique(unique_rows[['home_team', 'away_team']].values.ravel())
kalshi_winners_teams = flat_teams.tolist()

#get names from kalshi_totals_df
def extract_teams_from_totals(title):
    title = title.replace(": Total Points", "")
    if " at " in title:
        right, left = title.split(" at ", 1)
        left = re.sub(r'\bSt\.$', 'St', left.strip())
        right = re.sub(r'\bSt\.$', 'St', right.strip())
        return pd.Series([left, right])
    return None

kalshi_totals_df[['home_team', 'away_team']] = kalshi_totals_df['title'].apply(extract_teams_from_totals)
unique_rows = kalshi_winners_df.drop_duplicates(subset=['home_team', 'away_team'])
flat_teams = pd.unique(unique_rows[['home_team', 'away_team']].values.ravel())
kalshi_totals_teams = flat_teams.tolist()

#get names from kalshi_spreads_df
def extract_team_from_spreads(title):
    if " wins by " in title:
        team = title.split(" wins by ", 1)[0].strip()
        team = re.sub(r'\bSt\.$', 'St', team)
        return team
    return None

kalshi_spreads_df['team'] = kalshi_spreads_df['title'].apply(extract_team_from_spreads)
unique_teams_spread = kalshi_spreads_df['team'].drop_duplicates()
kalshi_spreads_teams = unique_teams_spread.tolist()

In [29]:
odds_spreads_df.head()

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb,avg_fair_prb
59,NBA,ffa09b2ad58a00b66f7d0aa16322155c,2025-12-05 18:40:00 CST,BetOnline.ag,totals,Under,1.87,238.5,Atlanta Hawks,Denver Nuggets,0.534759,0.510989,0.5047
71,NBA,ffa09b2ad58a00b66f7d0aa16322155c,2025-12-05 18:40:00 CST,BetUS,totals,Under,1.91,238.5,Atlanta Hawks,Denver Nuggets,0.52356,0.5,0.5047
70,NBA,ffa09b2ad58a00b66f7d0aa16322155c,2025-12-05 18:40:00 CST,BetUS,totals,Over,1.91,238.5,Atlanta Hawks,Denver Nuggets,0.52356,0.5,0.4953
65,NBA,ffa09b2ad58a00b66f7d0aa16322155c,2025-12-05 18:40:00 CST,Pinnacle,totals,Under,1.88,238.0,Atlanta Hawks,Denver Nuggets,0.531915,0.51597,0.516
64,NBA,ffa09b2ad58a00b66f7d0aa16322155c,2025-12-05 18:40:00 CST,Pinnacle,totals,Over,2.0,238.0,Atlanta Hawks,Denver Nuggets,0.5,0.48403,0.484


In [30]:
kalshi_spreads_df.head()

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h,points,team
0,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR1,Akron wins by over 1.5 Points?,active,2025-12-20T16:00:00-05:00,0.04,0.96,0.04,0.96,0.92,0.92,791.56,0.0,1.5,Akron
1,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR10,Akron wins by over 10.5 Points?,active,2025-12-20T16:00:00-05:00,0.04,0.96,0.04,0.96,0.92,0.92,787.72,0.0,10.5,Akron
2,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR13,Akron wins by over 13.5 Points?,active,2025-12-20T16:00:00-05:00,0.04,0.96,0.04,0.96,0.92,0.92,773.32,0.0,13.5,Akron
3,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR4,Akron wins by over 4.5 Points?,active,2025-12-20T16:00:00-05:00,0.56,0.59,0.41,0.44,0.03,0.03,7168.43,0.0,4.5,Akron
4,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR22,Akron wins by over 22.5 Points?,active,2025-12-20T16:00:00-05:00,0.03,0.55,0.45,0.97,0.52,0.52,2056.85,0.0,22.5,Akron


In [31]:
assert(len(matched_names['h2h']['kalshi']) == len(matched_names['h2h']['odds']))
assert(len(matched_names['spreads']['kalshi']) == len(matched_names['spreads']['odds']))
assert(len(matched_names['totals']['kalshi']) == len(matched_names['totals']['odds']))

In [32]:
odds_winners_df = odds_winners_df[
    odds_winners_df['home_team'].isin(matched_names['h2h']['odds']) |
    odds_winners_df['away_team'].isin(matched_names['h2h']['odds'])
].drop_duplicates(subset='team').sort_values(by='home_team').reset_index(drop=True)

kalshi_winners_df = kalshi_winners_df[
    kalshi_winners_df['home_team'].isin(matched_names['h2h']['kalshi']) |
    kalshi_winners_df['away_team'].isin(matched_names['h2h']['kalshi'])
].sort_values(by='home_team').reset_index(drop=True)

odds_spreads_df = odds_spreads_df[odds_spreads_df['team'].isin(matched_names['spreads']['odds'])].reset_index(drop=True)
kalshi_spreads_df = kalshi_spreads_df[kalshi_spreads_df['team'].isin(matched_names['spreads']['kalshi'])].reset_index(drop=True)

odds_totals_df = odds_totals_df[
    odds_totals_df['home_team'].isin(matched_names['totals']['odds']) |
    odds_totals_df['away_team'].isin(matched_names['totals']['odds'])
].sort_values(by='home_team').reset_index(drop=True)
kalshi_totals_df = kalshi_totals_df[
    (kalshi_totals_df['home_team'].isin(matched_names['totals']['kalshi'])) | 
    (kalshi_totals_df['away_team'].isin(matched_names['totals']['kalshi']))
    ].sort_values(by='home_team').reset_index(drop=True)


In [33]:
# Concatenate winners df

# Specify the columns to extract
kalshi_cols = ['ticker', 'yes_bid', 'yes_ask', 'no_bid', 'no_ask', 'home_team', 'away_team']
odds_cols = ['market', 'start_time', 'team', 'home_team', 'away_team', 'avg_fair_prb']

# Rename overlapping columns in odds to prevent clashes
odds_subset = odds_winners_df[odds_cols].rename(columns={
    'home_team': 'odds_home_team',
    'away_team': 'odds_away_team'
})

kalshi_subset = kalshi_winners_df[kalshi_cols].rename(columns={
    'home_team': 'kalshi_home_team',
    'away_team': 'kalshi_away_team'
})

combined_rows = []
len_matched = len(matched_names['h2h']['kalshi'])
matched_names_h2h = matched_names['h2h']

for i in range(len_matched):
    odds_name = matched_names_h2h['odds'][i]
    kalshi_name = matched_names_h2h['kalshi'][i]

    # Find the corresponding odds row
    odds_row = odds_subset.loc[odds_subset['team'] == odds_name]
    assert len(odds_row) == 1, f"Expected one row for {odds_name}, got {len(odds_row)}"

    # Find the two matching Kalshi rows
    kalshi_rows = kalshi_subset.loc[
        (kalshi_subset['kalshi_home_team'] == kalshi_name) |
        (kalshi_subset['kalshi_away_team'] == kalshi_name)
    ]
    assert len(kalshi_rows) == 2, f"Expected two rows for {kalshi_name}, got {len(kalshi_rows)}"

    # Extract rows
    k1 = kalshi_rows.iloc[0]
    k2 = kalshi_rows.iloc[1]
    midprice1 = (k1['yes_bid'] + k1['yes_ask']) / 2
    midprice2 = (k2['yes_bid'] + k2['yes_ask']) / 2

    # Extract scalar fair probability
    prb = odds_row['avg_fair_prb'].astype(float).item()

    # Choose the row closer to the odds probability
    if ((midprice1 - prb) ** 2) < ((midprice2 - prb) ** 2):
        combined_row = pd.concat([k1, odds_row.iloc[0]])
    else:
        combined_row = pd.concat([k2, odds_row.iloc[0]])

    combined_rows.append(combined_row)

combined_winners_df = pd.DataFrame(combined_rows).sort_values(by='odds_home_team')
combined_winners_df = combined_winners_df.reset_index(drop=True)

In [34]:
combined_winners_df.head()

Unnamed: 0,ticker,yes_bid,yes_ask,no_bid,no_ask,kalshi_home_team,kalshi_away_team,market,start_time,team,odds_home_team,odds_away_team,avg_fair_prb
0,KXNBAGAME-25DEC05DENATL-ATL,0.31,0.32,0.68,0.69,Atlanta,Denver,h2h,2025-12-05 18:40:00 CST,Atlanta Hawks,Atlanta Hawks,Denver Nuggets,0.3134
1,KXNBAGAME-25DEC05DENATL-DEN,0.68,0.69,0.31,0.32,Atlanta,Denver,h2h,2025-12-05 18:40:00 CST,Denver Nuggets,Atlanta Hawks,Denver Nuggets,0.6866
2,KXNBAGAME-25DEC05LALBOS-LAL,0.31,0.34,0.66,0.69,Boston,Los Angeles L,h2h,2025-12-05 18:10:00 CST,Los Angeles Lakers,Boston Celtics,Los Angeles Lakers,0.31
3,KXNBAGAME-25DEC05LALBOS-BOS,0.67,0.69,0.31,0.33,Boston,Los Angeles L,h2h,2025-12-05 18:10:00 CST,Boston Celtics,Boston Celtics,Los Angeles Lakers,0.69
4,KXNBAGAME-25DEC05INDCHI-IND,0.36,0.37,0.63,0.64,Chicago,Indiana,h2h,2025-12-05 19:10:00 CST,Indiana Pacers,Chicago Bulls,Indiana Pacers,0.3696


In [35]:
EDGE = 0.01
KELLY_UPPERBOUND = 1
BANKROLL = 300.00
Q1_WEIGHT = 1.00
Q2_WEIGHT = 1.00
Q3_WEIGHT = 1.00
Q4_WEIGHT = 1.00

midprice = (combined_winners_df['yes_bid'] + combined_winners_df['yes_ask']) / 2

edge_winners_df = combined_winners_df.loc[
    (combined_winners_df['avg_fair_prb'] >= midprice + EDGE) |
    (combined_winners_df['avg_fair_prb'] <= midprice - EDGE)
].reset_index(drop=True)

edge_winners_df = edge_winners_df.loc[((edge_winners_df['avg_fair_prb'] > 0.15) & (edge_winners_df['avg_fair_prb'] < 0.49)) |
                                      ((edge_winners_df['avg_fair_prb'] > 0.51) & (edge_winners_df['avg_fair_prb'] < 0.85)) ]

midprice_yes = (edge_winners_df['yes_bid'] + edge_winners_df['yes_ask']) / 2
midprice_no = (edge_winners_df['no_bid'] + edge_winners_df['no_ask']) / 2

q_yes = edge_winners_df['avg_fair_prb']
q_no = 1 - edge_winners_df['avg_fair_prb'] 

edge_winners_df['edge'] = np.where(q_yes > midprice_yes, q_yes - midprice_yes, q_no - midprice_no)

edge_winners_df['buy_direction'] = np.where(q_yes > midprice_yes, "yes", "no")
edge_winners_df['raw_kelly'] = np.where(q_yes > midprice_yes, edge_winners_df['edge'] / (1 - midprice_yes),
                                        edge_winners_df['edge'] / (1 - midprice_no))

total_kelly = edge_winners_df['raw_kelly'].sum() 
if total_kelly >= 1: 
    edge_winners_df['real_kelly'] = pd.DataFrame({
        'original': edge_winners_df['raw_kelly'],
        'normalized': (edge_winners_df['raw_kelly'] / total_kelly)
    }).min(axis=1)

# Define the real_kelly logic
def scale_kelly(row):
    k = row['raw_kelly']
    p = row['avg_fair_prb']
    
    if k == 0 or pd.isna(k):
        return 0
    if 0.05 <= p < 0.25:
        return min(Q1_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.25 <= p < 0.5:
        return min(Q2_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.5 <= p < 0.75:
        return min(Q3_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.75 <= p < 0.95:
        return min(Q4_WEIGHT * k, KELLY_UPPERBOUND)
    else:
        return 0 

# Apply to the DataFrame
edge_winners_df['real_kelly'] = edge_winners_df.apply(scale_kelly, axis=1)
edge_winners_df['optimal_bet'] = edge_winners_df['real_kelly'] * BANKROLL

q = edge_winners_df['avg_fair_prb']
p = midprice_yes

num_contracts = np.where(q > p, edge_winners_df['optimal_bet'] // edge_winners_df['yes_bid'], edge_winners_df['optimal_bet'] // edge_winners_df['no_bid'])
edge_winners_df['num_contracts'] = num_contracts
trading_cost = np.where(q > p, np.ceil(100*(0.0175 * num_contracts * edge_winners_df['yes_bid'] * (1 - edge_winners_df['yes_bid']))) / 100,
                        np.ceil(100*(0.0175 * num_contracts * edge_winners_df['no_bid'] * (1 - edge_winners_df['no_bid']))) / 100)
edge_winners_df['trading_cost'] = trading_cost
profit = np.where(q > p, ((1 - edge_winners_df['yes_bid']) * num_contracts - trading_cost), ((1 - edge_winners_df['no_bid']) *  num_contracts - trading_cost))
edge_winners_df['profit'] = profit
edge_winners_df['ev'] = np.where(q > p, (profit * q_yes - (edge_winners_df['optimal_bet'] + trading_cost) * (1 - q_yes)).round(2), 
                                 (profit * q_no - (edge_winners_df['optimal_bet'] + trading_cost) * (1 - q_no)).round(2))
filtered_winners_df = edge_winners_df.loc[edge_winners_df['ev'] > 0.1].reset_index(drop=True)

s = filtered_winners_df['start_time'].astype(str)
s = s.str.replace(r'\s+[A-Z]{3}$', '', regex=True)
dt = pd.to_datetime(s, errors='coerce')
filtered_winners_df['start_time'] = dt.dt.tz_localize('America/Chicago')

now = datetime.now(pytz.timezone('America/Chicago'))
#filtered_winners_df = filtered_winners_df.loc[filtered_winners_df['start_time'] > now].sort_values('odds_home_team').reset_index(drop=True)

dupe_mask = filtered_winners_df['kalshi_home_team'].duplicated(keep=False)
dupes = filtered_winners_df[dupe_mask]
uniques = filtered_winners_df[~dupe_mask]
best_dupes = dupes.loc[dupes.groupby('kalshi_home_team')['ev'].idxmax()]
filtered_winners_df = pd.concat([uniques, best_dupes], ignore_index=True)

In [36]:
team_cols = ['kalshi_home_team', 'kalshi_away_team',
             'odds_home_team', 'odds_away_team']

teams_df = filtered_winners_df[team_cols].copy()

filtered_winners_df = filtered_winners_df.drop(columns=['kalshi_home_team', 'kalshi_away_team']).reset_index(drop=True)
filtered_winners_df[['edge', 'raw_kelly', 'real_kelly']] = filtered_winners_df[['edge', 'raw_kelly', 'real_kelly']].round(4) * 100

In [37]:
filtered_winners_df

Unnamed: 0,ticker,yes_bid,yes_ask,no_bid,no_ask,market,start_time,team,odds_home_team,odds_away_team,avg_fair_prb,edge,buy_direction,raw_kelly,real_kelly,optimal_bet,num_contracts,trading_cost,profit,ev
0,KXNBAGAME-25DEC05LALBOS-LAL,0.31,0.34,0.66,0.69,h2h,2025-12-05 18:10:00-06:00,Los Angeles Lakers,Boston Celtics,Los Angeles Lakers,0.31,1.5,no,4.62,4.62,13.846154,20.0,0.08,6.72,0.32
1,KXNBAGAME-25DEC05PHXHOU-PHX,0.18,0.2,0.8,0.82,h2h,2025-12-05 19:10:00-06:00,Phoenix Suns,Houston Rockets,Phoenix Suns,0.179,1.1,no,5.79,5.79,17.368421,21.0,0.06,4.14,0.28
2,KXNBAGAME-25DEC05MIAORL-MIA,0.35,0.36,0.64,0.65,h2h,2025-12-05 18:10:00-06:00,Miami Heat,Orlando Magic,Miami Heat,0.3436,1.14,no,3.21,3.21,9.633803,15.0,0.07,5.33,0.16


In [38]:
total_loss = np.sum(filtered_winners_df['optimal_bet'])
total_profit = np.sum(filtered_winners_df['profit'])
total_ev = np.sum(filtered_winners_df['ev'])
print(f"{odds_sport} h2h portfolio summary:\n")
print(f"Max Loss: -{total_loss:.2f}")
print(f"Max Profit: {total_profit:.2f}")
print(f"Portfolio EV: {total_ev:.2f}")


nba h2h portfolio summary:

Max Loss: -40.85
Max Profit: 16.19
Portfolio EV: 0.76


In [39]:
kalshi_cols = ['ticker', 'yes_bid', 'yes_ask', 'team', 'points']
odds_cols = ['market', 'start_time', 'team', 'home_team', 'away_team', 'avg_fair_prb', 'point']

odds_subset = odds_spreads_df[odds_cols].rename(columns={
    'home_team': 'odds_home_team',
    'away_team': 'odds_away_team',
    'team': 'odds_team'
})

kalshi_subset = kalshi_spreads_df[kalshi_cols]

combined_rows = []

for _, kalshi_row in kalshi_subset.iterrows():
    kalshi_home = kalshi_row['team']
    for _, odds_row in odds_subset.iterrows():
        odds_home = odds_row['odds_team']
        if (kalshi_home in odds_home) and (kalshi_row['points'] == odds_row['point']): 
            combined_row = pd.concat([kalshi_row, odds_row])
            combined_rows.append(combined_row)

combined_spreads_df = pd.DataFrame(combined_rows).drop_duplicates(subset='ticker') #only works because oddsapi only pulls odds 
combined_spreads_df = combined_spreads_df.reset_index(drop=True)                    #for only one point line for each bookmaker 



In [40]:
kalshi_spreads_df.head()

Unnamed: 0,ticker,title,status,event_start_time,yes_bid,yes_ask,no_bid,no_ask,yes_spread,no_spread,liquidity_dollars,volume_24h,points,team
0,KXNBASPREAD-25DEC05CHATOR-CHA7,0.11,0.14,Charlotte,7.5,spreads,2025-12-05 18:40:00 CST,Charlotte Hornets,Toronto Raptors,Charlotte Hornets,0.5127,7.5,,
1,KXNBASPREAD-25DEC05DENATL-ATL6,0.15,0.19,Atlanta,6.5,spreads,2025-12-05 18:40:00 CST,Atlanta Hawks,Atlanta Hawks,Denver Nuggets,0.5082,6.5,,
2,KXNBASPREAD-25DEC05SASCLE-SAS4,0.26,0.3,San Antonio,4.5,spreads,2025-12-05 18:40:00 CST,San Antonio Spurs,Cleveland Cavaliers,San Antonio Spurs,0.5109,4.5,,


In [41]:
EDGE = 0.02
KELLY_UPPERBOUND = 1
BANKROLL = 150.00
Q1_WEIGHT = 1.00
Q2_WEIGHT = 1.00
Q3_WEIGHT = 1.00
Q4_WEIGHT = 1.00

midprice = (combined_spreads_df['yes_bid'] + combined_spreads_df['yes_ask']) / 2

edge_spreads_df = combined_spreads_df.loc[
    (combined_spreads_df['avg_fair_prb'] >= midprice + EDGE) |
    (combined_spreads_df['avg_fair_prb'] <= midprice - EDGE)
].reset_index(drop=True)

midprice_yes = (edge_spreads_df['yes_bid'] + edge_spreads_df['yes_ask']) / 2
midprice_no = (edge_spreads_df['no_bid'] + edge_spreads_df['no_ask']) / 2

q_yes = edge_spreads_df['avg_fair_prb']
q_no = 1 - edge_spreads_df['avg_fair_prb'] 

edge_spreads_df['edge'] = np.where(q_yes > midprice_yes, q_yes - midprice_yes, q_no - midprice_no)

edge_spreads_df['raw_kelly'] = np.where(q_yes > midprice_yes, edge_spreads_df['edge'] / (1 - midprice_yes),
                                        edge_spreads_df['edge'] / (1 - midprice_no))

total_kelly = edge_spreads_df['raw_kelly'].sum() 
if total_kelly >= 1: 
    edge_spreads_df['real_kelly'] = pd.DataFrame({
        'original': edge_spreads_df['raw_kelly'],
        'normalized': (edge_spreads_df['raw_kelly'] / total_kelly)
    }).min(axis=1)

# Define the real_kelly logic
def scale_kelly(row):
    k = row['raw_kelly']
    p = row['avg_fair_prb']
    
    if k == 0 or pd.isna(k):
        return 0
    if 0.05 <= p < 0.25:
        return min(Q1_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.25 <= p < 0.5:
        return min(Q2_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.5 <= p < 0.75:
        return min(Q3_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.75 <= p < 0.95:
        return min(Q4_WEIGHT * k, KELLY_UPPERBOUND)
    else:
        return 0 

# Apply to the DataFrame
edge_spreads_df['real_kelly'] = edge_spreads_df.apply(scale_kelly, axis=1)
edge_spreads_df['optimal_bet'] = edge_spreads_df['real_kelly'] * BANKROLL

q = edge_spreads_df['avg_fair_prb']
p = midprice_yes

num_contracts = np.where(q > p, edge_spreads_df['optimal_bet'] // edge_spreads_df['yes_bid'], edge_spreads_df['optimal_bet'] // edge_spreads_df['no_bid'])
edge_spreads_df['num_contracts'] = num_contracts
trading_cost = np.where(q > p, np.ceil(100*(0.0175 * num_contracts * edge_spreads_df['yes_bid'] * (1 - edge_spreads_df['yes_bid']))) / 100,
                        np.ceil(100*(0.0175 * num_contracts * edge_spreads_df['no_bid'] * (1 - edge_spreads_df['no_bid']))) / 100)
edge_spreads_df['trading_cost'] = trading_cost
profit = np.where(q > p, ((1 - edge_spreads_df['yes_bid']) * num_contracts - trading_cost), ((1 - edge_spreads_df['no_bid']) *  num_contracts - trading_cost))
edge_spreads_df['profit'] = profit
edge_spreads_df['ev'] = np.where(q > p, (profit * q_yes - (edge_spreads_df['optimal_bet'] + trading_cost) * (1 - q_yes)).round(2), 
                                 (profit * q_no - (edge_spreads_df['optimal_bet'] + trading_cost) * (1 - q_no)).round(2))
filtered_spreads_df = edge_spreads_df.loc[edge_spreads_df['ev'] > 0.10].reset_index(drop=True)

s = filtered_spreads_df['start_time'].astype(str)
s = s.str.replace(r'\s+[A-Z]{3}$', '', regex=True)
dt = pd.to_datetime(s, errors='coerce')
filtered_spreads_df['start_time'] = dt.dt.tz_localize('America/Chicago')

now = datetime.now(pytz.timezone('America/Chicago'))
#filtered_winners_df = filtered_winners_df.loc[filtered_winners_df['start_time'] > now].sort_values('odds_home_team').reset_index(drop=True)
filtered_spreads_df = filtered_spreads_df.drop(columns=['start_time', 'odds_home_team', 'odds_away_team'])
filtered_spreads_df[['edge', 'raw_kelly', 'real_kelly']] = filtered_spreads_df[['edge', 'raw_kelly', 'real_kelly']].round(4) * 100

In [42]:
# First compute midprice in combined_winners_df
combined_winners_df['midprice'] = (
    combined_winners_df['yes_bid'] + combined_winners_df['yes_ask']
) / 2

# Merge spreads df with winner probabilities
merged = filtered_spreads_df.merge(
    combined_winners_df[['team', 'midprice']],
    left_on='odds_team',      # column in filtered_spreads_df
    right_on='team',          # column in combined_winners_df
    how='left'
)

# Filter out teams with win prob < 50% OR missing (NaN)
filtered_spreads_df = merged.loc[
    merged['midprice'] >= 0.50
].reset_index(drop=True)


In [43]:
filtered_spreads_df

Unnamed: 0,ticker,yes_bid,yes_ask,no_bid,no_ask,kalshi_pts,midprice,team,avg_fair_prb,odds_pts,buy_direction,edge,raw_kelly,real_kelly,optimal_bet,num_contracts,trading_cost,profit,ev
0,KXNCAAMBSPREAD-25DEC06AKRTULN-AKR7,0.44,0.47,0.53,0.56,7.5,0.455,Akron Zips,0.4878,-7.5,yes,3.28,6.02,6.02,9.027523,20.0,0.09,11.11,0.75
1,KXNCAAMBSPREAD-25DEC06FRESARK-ARK15,0.04,0.96,0.04,0.96,15.5,0.5,Arkansas Razorbacks,0.5208,-19.5,yes,2.08,4.16,4.16,6.24,156.0,0.11,149.65,74.89
2,KXNCAAMBSPREAD-25DEC06FRESARK-ARK12,0.03,0.97,0.03,0.97,12.5,0.5,Arkansas Razorbacks,0.5208,-19.5,yes,2.08,4.16,4.16,6.24,208.0,0.11,201.65,101.98
3,KXNCAAMBSPREAD-25DEC06FRESARK-ARK6,0.03,0.97,0.03,0.97,6.5,0.5,Arkansas Razorbacks,0.5208,-19.5,yes,2.08,4.16,4.16,6.24,208.0,0.11,201.65,101.98
4,KXNCAAMBSPREAD-25DEC06FRESARK-ARK9,0.03,0.97,0.03,0.97,9.5,0.5,Arkansas Razorbacks,0.5208,-19.5,yes,2.08,4.16,4.16,6.24,208.0,0.11,201.65,101.98
5,KXNCAAMBSPREAD-25DEC06ARPBDEP-DEP23,0.5,0.53,0.47,0.5,23.5,0.515,DePaul Blue Demons,0.4945,-23.5,no,2.05,3.98,3.98,5.970874,12.0,0.06,6.3,0.2
6,KXNCAAMBSPREAD-25DEC06ILLTENN-ILL7,0.03,0.61,0.39,0.97,7.5,0.32,Illinois St Redbirds,0.511,-21.0,yes,19.1,28.09,28.09,42.132353,1404.0,0.72,1361.16,674.6
7,KXNCAAMBSPREAD-25DEC06ILLTENN-ILL7,0.03,0.61,0.39,0.97,7.5,0.32,Illinois St Redbirds,0.4873,-21.5,yes,16.73,24.6,24.6,36.904412,1230.0,0.63,1192.47,561.85
8,KXNCAAMBSPREAD-25DEC06ILLTENN-ILL1,0.4,0.44,0.56,0.6,1.5,0.42,Illinois St Redbirds,0.511,-21.0,yes,9.1,15.69,15.69,23.534483,58.0,0.25,34.55,6.02
9,KXNCAAMBSPREAD-25DEC06ILLTENN-ILL1,0.4,0.44,0.56,0.6,1.5,0.42,Illinois St Redbirds,0.4873,-21.5,yes,6.73,11.6,11.6,17.405172,43.0,0.19,25.61,3.46


In [44]:
total_loss = np.sum(filtered_spreads_df['optimal_bet'])
total_profit = np.sum(filtered_spreads_df['profit'])
total_ev = np.sum(filtered_spreads_df['ev'])
print(f"{odds_sport} spreads portfolio summary:\n")
print(f"Max Loss: -{total_loss:.2f}")
print(f"Max Profit: {total_profit:.2f}")
print(f"Portfolio EV: {total_ev:.2f}")


nba spreads portfolio summary:

Max Loss: -1239.14
Max Profit: 27979.37
Portfolio EV: 13625.62
