In [1050]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import regex as re
import math
from collections import defaultdict
import pytz

from rapidfuzz.fuzz import ratio

things to look at: how accurate is the prematch probability when it ends (if a team has over 50% chance to win, how often do they actually win the game?)

if my fair probability is wildely different than the midprice, should I weigh it less or find a new fair probability? How often is my direction right?

can i still find edge in a very wide market?

In [1051]:
date = '2025-11-26'
odds_sport = 'cbb' #cbb, cfb, nba, nfl
kalshi_sport = 'ncaab' #ncaab, ncaaf, nba, nfl

In [1052]:
odds_df = pd.read_csv(f"../data_collection/updated_scripts/oddsapi_outputs/{date}/{odds_sport}_odds.csv")
odds_df.drop(columns=['league'], inplace=True)
odds_df.rename(columns={'price': 'odds'}, inplace=True)
odds_df['vig_prob'] = 1 / odds_df['odds']

def remove_vig_probs(df):
    df = df.copy()
    df['fair_prb'] = pd.NA

    grouped = df.groupby(['game_id', 'bookmaker', 'market'])

    for _, group in grouped:
        if len(group) < 2:
            continue
        probs = group['vig_prob']
        total = probs.sum()
        if total == 0:
            continue
        fair_probs = (probs / total).round(4)
        df.loc[group.index, 'fair_prb'] = fair_probs

    return df

odds_df = remove_vig_probs(odds_df)


odds_winners_df = odds_df[odds_df['market'] == 'h2h'].copy()
odds_spreads_df = odds_df[odds_df['market'] == 'spreads'].copy()
odds_spreads_df = odds_spreads_df.loc[(odds_spreads_df['point'].notna()) & (odds_spreads_df['point'] > 0)]
odds_totals_df  = odds_df[odds_df['market'] == 'totals'].copy()

# Average per-team fair probabilities across DraftKings/FanDuel/Pinnacle for winners_df
mask = odds_winners_df['fair_prb'].notna()
avg_by_team = (
    odds_winners_df.loc[mask]
    .groupby(['game_id', 'team'])['fair_prb']
    .transform('median')
    .round(4)
)
odds_winners_df.loc[mask, 'avg_fair_prb'] = avg_by_team
odds_winners_df.loc[~mask, 'avg_fair_prb'] = pd.NA

#Average fair probabilities for spreads for same game, point spread, and team
mask = odds_spreads_df['fair_prb'].notna()
avg_by_point = (
    odds_spreads_df.loc[mask]
    .groupby(['game_id', 'point', 'team'])['fair_prb']
    .transform('mean')
    .round(4)
)
odds_spreads_df['avg_fair_prb'] = avg_by_point

#Average fair probabilities for totals for same game, point spread, direction (Over/Under)
mask = odds_totals_df['fair_prb'].notna()
avg_by_tot_point = (
    odds_totals_df.loc[mask]
    .groupby(['game_id', 'point', 'team'])['fair_prb']
    .transform('mean')
    .round(4)
)
odds_totals_df['avg_fair_prb'] = avg_by_tot_point

In [1053]:
kalshi_winners_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_winners.csv")
kalshi_totals_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_totals.csv")
kalshi_spreads_df = pd.read_csv(f"../data_collection/updated_scripts/kalshi_data_logs/{date}/{kalshi_sport}_spreads.csv")

if kalshi_sport == 'ncaaf':
    kalshi_spreads_df['points'] = kalshi_spreads_df['title'].str.extract(r'over ([\d.]+) points\?').astype(float)
    kalshi_totals_df["points"] = kalshi_totals_df["ticker"].str.extract(r"-([0-9.]+)$").astype(float)
elif (kalshi_sport == 'ncaab') | (kalshi_sport == 'nba'):
    kalshi_spreads_df['points'] = kalshi_spreads_df['title'].str.extract(r'over ([\d.]+) Points\?').astype(float)
    kalshi_totals_df["points"] = kalshi_totals_df["ticker"].str.extract(r"-([0-9.]+)$").astype(float)
    



columns_to_drop = ['timestamp', 'market_type', 'yes_bid2', 'yes_ask2', 'no_bid2', 'no_ask2', 'yes_depth_bids', 'yes_depth_asks', 'no_depth_bids', 'no_depth_asks']
kalshi_winners_df.drop(columns=columns_to_drop, inplace=True)
kalshi_spreads_df.drop(columns=columns_to_drop, inplace=True)
kalshi_totals_df.drop(columns=columns_to_drop, inplace=True)

In [1054]:
odds_totals_df.head()

Unnamed: 0,sport,game_id,start_time,bookmaker,market,team,odds,point,home_team,away_team,vig_prob,fair_prb,avg_fair_prb
2,CBB,175f8b0297ed85ce8296d2f04320acbb,2025-11-26 13:05:58 CST,BetMGM,totals,Over,1.87,155.5,Fairleigh Dickinson Knights,Army Knights,0.534759,0.5,0.5
3,CBB,175f8b0297ed85ce8296d2f04320acbb,2025-11-26 13:05:58 CST,BetMGM,totals,Under,1.87,155.5,Fairleigh Dickinson Knights,Army Knights,0.534759,0.5,0.5
10,CBB,33aee93b94479da44421c92cd8886ee5,2025-11-26 13:30:00 CST,Pinnacle,totals,Over,1.91,134.0,Bradley Braves,Liberty Flames,0.52356,0.4893,0.4893
11,CBB,33aee93b94479da44421c92cd8886ee5,2025-11-26 13:30:00 CST,Pinnacle,totals,Under,1.83,134.0,Bradley Braves,Liberty Flames,0.546448,0.5107,0.5107
14,CBB,33aee93b94479da44421c92cd8886ee5,2025-11-26 13:30:00 CST,BetMGM,totals,Over,1.98,136.5,Bradley Braves,Liberty Flames,0.505051,0.4762,0.4762


In [1055]:
#get names from kalshi_winners_df
def extract_teams_from_winners(title):
    title = title.replace(" Winner?", "")
    if " at " in title:
        right, left = title.split(" at ", 1)
    elif " vs " in title:
        right, left = title.split(" vs ", 1)
    else:
        return pd.Series([None, None])  
    left = re.sub(r'\bSt\.$', 'St', left.strip())
    right = re.sub(r'\bSt\.$', 'St', right.strip())
    return pd.Series([left, right])

kalshi_winners_df[['home_team', 'away_team']] = kalshi_winners_df['title'].apply(extract_teams_from_winners)
unique_rows = kalshi_winners_df.drop_duplicates(subset=['home_team', 'away_team'])
flat_teams = pd.unique(unique_rows[['home_team', 'away_team']].values.ravel())
kalshi_winners_teams = flat_teams.tolist()

#get names from kalshi_totals_df
def extract_teams_from_totals(title):
    title = title.replace(": Total Points", "")
    if " at " in title:
        right, left = title.split(" at ", 1)
        left = re.sub(r'\bSt\.$', 'St', left.strip())
        right = re.sub(r'\bSt\.$', 'St', right.strip())
        return pd.Series([left, right])
    return None

kalshi_totals_df[['home_team', 'away_team']] = kalshi_totals_df['title'].apply(extract_teams_from_totals)
unique_rows = kalshi_winners_df.drop_duplicates(subset=['home_team', 'away_team'])
flat_teams = pd.unique(unique_rows[['home_team', 'away_team']].values.ravel())
kalshi_totals_teams = flat_teams.tolist()

#get names from kalshi_spreads_df
def extract_team_from_spreads(title):
    if " wins by " in title:
        team = title.split(" wins by ", 1)[0].strip()
        team = re.sub(r'\bSt\.$', 'St', team)
        return team
    return None

kalshi_spreads_df['team'] = kalshi_spreads_df['title'].apply(extract_team_from_spreads)
unique_teams_spread = kalshi_spreads_df['team'].drop_duplicates()
kalshi_spreads_teams = unique_teams_spread.tolist()

In [1056]:
odds_teams_by_market = odds_df.groupby('market')['team'].unique().to_dict()

def fuzzy_match_kalshi_to_odds(kalshi_teams, odds_team_names):
    matched_kalshi = []
    matched_odds = []
    candidates_dict = defaultdict(list)

    kalshi_sorted = sorted(kalshi_teams, key=lambda x: x[0] if x else '')
    remaining_odds = sorted(odds_team_names.tolist().copy())

    for kalshi_name in kalshi_sorted:
        candidates = []
        for odds_name in remaining_odds:
            if kalshi_name in odds_name:
                candidates.append(odds_name)
        if len(candidates) == 1:
            candidates_dict[candidates[0]].append(kalshi_name)
        elif len(candidates) > 1:
            best_fit = candidates[0]
            best_ratio = ratio(best_fit, kalshi_name)
            for name in candidates:
                curr_ratio = ratio(name, kalshi_name)
                if curr_ratio > best_ratio:
                    best_fit = name
                    best_ratio = curr_ratio
            candidates_dict[best_fit].append(kalshi_name)
    
    for odd, kalsh in candidates_dict.items():
        best_fit = kalsh[0]
        best_ratio = ratio(best_fit, odd)
        if len(kalsh) > 1:
            for name in kalsh:
                curr_ratio = ratio(name, odd)
                if curr_ratio > best_ratio:
                    best_fit = name
                    best_ratio = curr_ratio
        matched_odds.append(odd)
        matched_kalshi.append(best_fit)


    return matched_kalshi, matched_odds


# Winners / h2h
matched_kalshi_h2h, matched_odds_h2h = fuzzy_match_kalshi_to_odds(
    kalshi_winners_teams,
    odds_teams_by_market.get('h2h', [])
)

# Spreads
matched_kalshi_spreads, matched_odds_spreads = fuzzy_match_kalshi_to_odds(
    kalshi_spreads_teams,
    odds_teams_by_market.get('spreads', [])
)

# Totals (match only Over/Under)
totals_odds_df = odds_df[odds_df['market'] == 'totals']
odds_totals_teams = pd.unique(totals_odds_df[['home_team', 'away_team']].values.ravel())
matched_kalshi_totals, matched_odds_totals = fuzzy_match_kalshi_to_odds(
    kalshi_totals_teams,
    odds_totals_teams
)

matched_names = {
    'h2h': {
        'kalshi': matched_kalshi_h2h,
        'odds': matched_odds_h2h
    },
    'spreads': {
        'kalshi': matched_kalshi_spreads,
        'odds': matched_odds_spreads
    },
    'totals': {
        'kalshi': matched_kalshi_totals,
        'odds': matched_odds_totals
    }
}


In [1057]:
assert(len(matched_names['h2h']['kalshi']) == len(matched_names['h2h']['odds']))
assert(len(matched_names['spreads']['kalshi']) == len(matched_names['spreads']['odds']))
assert(len(matched_names['totals']['kalshi']) == len(matched_names['totals']['odds']))

In [1058]:
odds_winners_df = odds_winners_df[
    odds_winners_df['home_team'].isin(matched_names['h2h']['odds']) |
    odds_winners_df['away_team'].isin(matched_names['h2h']['odds'])
].drop_duplicates(subset='team').sort_values(by='home_team').reset_index(drop=True)

kalshi_winners_df = kalshi_winners_df[
    kalshi_winners_df['home_team'].isin(matched_names['h2h']['kalshi']) |
    kalshi_winners_df['away_team'].isin(matched_names['h2h']['kalshi'])
].sort_values(by='home_team').reset_index(drop=True)

odds_spreads_df = odds_spreads_df[odds_spreads_df['team'].isin(matched_names['spreads']['odds'])].reset_index(drop=True)
kalshi_spreads_df = kalshi_spreads_df[kalshi_spreads_df['team'].isin(matched_names['spreads']['kalshi'])].reset_index(drop=True)

odds_totals_df = odds_totals_df[
    odds_totals_df['home_team'].isin(matched_names['totals']['odds']) |
    odds_totals_df['away_team'].isin(matched_names['totals']['odds'])
].sort_values(by='home_team').reset_index(drop=True)
kalshi_totals_df = kalshi_totals_df[
    (kalshi_totals_df['home_team'].isin(matched_names['totals']['kalshi'])) | 
    (kalshi_totals_df['away_team'].isin(matched_names['totals']['kalshi']))
    ].sort_values(by='home_team').reset_index(drop=True)


In [1059]:
# Concatenate winners df

# Specify the columns to extract
kalshi_cols = ['ticker', 'yes_bid', 'yes_ask', 'home_team', 'away_team']
odds_cols = ['market', 'start_time', 'team', 'home_team', 'away_team', 'avg_fair_prb']

# Rename overlapping columns in odds to prevent clashes
odds_subset = odds_winners_df[odds_cols].rename(columns={
    'home_team': 'odds_home_team',
    'away_team': 'odds_away_team'
})

kalshi_subset = kalshi_winners_df[kalshi_cols].rename(columns={
    'home_team': 'kalshi_home_team',
    'away_team': 'kalshi_away_team'
})

combined_rows = []
len_matched = matched_names['h2h']['kalshi']
matched_names_h2h = matched_names['h2h']

for i in range(len(len_matched)):
    odds_name = matched_names_h2h['odds'][i]
    kalshi_name = matched_names_h2h['kalshi'][i]

    # Find the corresponding odds row
    odds_row = odds_subset.loc[odds_subset['team'] == odds_name]
    assert len(odds_row) == 1, f"Expected one row for {odds_name}, got {len(odds_row)}"

    # Find the two matching Kalshi rows
    kalshi_rows = kalshi_subset.loc[
        (kalshi_subset['kalshi_home_team'] == kalshi_name) |
        (kalshi_subset['kalshi_away_team'] == kalshi_name)
    ]
    assert len(kalshi_rows) == 2, f"Expected two rows for {kalshi_name}, got {len(kalshi_rows)}"

    # Extract rows
    k1 = kalshi_rows.iloc[0]
    k2 = kalshi_rows.iloc[1]
    midprice1 = (k1['yes_bid'] + k1['yes_ask']) / 2
    midprice2 = (k2['yes_bid'] + k2['yes_ask']) / 2

    # Extract scalar fair probability
    prb = odds_row['avg_fair_prb'].astype(float).item()

    # Choose the row closer to the odds probability
    if ((midprice1 - prb) ** 2) < ((midprice2 - prb) ** 2):
        combined_row = pd.concat([k1, odds_row.iloc[0]])
    else:
        combined_row = pd.concat([k2, odds_row.iloc[0]])

    combined_rows.append(combined_row)

combined_winners_df = pd.DataFrame(combined_rows)
combined_winners_df = combined_winners_df.reset_index(drop=True)

In [1060]:
EDGE = 0.01
KELLY_UPPERBOUND = 1
BANKROLL = 200.00
Q1_WEIGHT = 1.00
Q2_WEIGHT = 1.00
Q3_WEIGHT = 1.00
Q4_WEIGHT = 1.00

edge_winners_df = combined_winners_df.loc[
    (combined_winners_df['avg_fair_prb'] >= combined_winners_df['yes_bid'] + EDGE) |
    (combined_winners_df['avg_fair_prb'] <= combined_winners_df['yes_ask'] - EDGE)
].reset_index(drop=True)

midprice = (edge_winners_df['yes_bid'] + edge_winners_df['yes_ask']) / 2

q = edge_winners_df['avg_fair_prb']
p = midprice

edge_winners_df['raw_kelly'] = np.where(
    q > p,
    (q - p) / (1 - p),      # long position
    (p - q) / p           # short position
)

total_kelly = edge_winners_df['raw_kelly'].sum()
"""if total_kelly >= 1:
    edge_winners_df['raw_kelly'] = pd.DataFrame({
        'original': edge_winners_df['raw_kelly'],
        'normalized': (edge_winners_df['raw_kelly'] / total_kelly)
    }).min(axis=1)"""

# Define the real_kelly logic
def scale_kelly(row):
    k = row['raw_kelly']
    p = row['avg_fair_prb']
    
    if k == 0 or pd.isna(k):
        return 0
    if 0.05 <= p < 0.25:
        return min(Q1_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.25 <= p < 0.5:
        return min(Q2_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.5 <= p < 0.75:
        return min(Q3_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.75 <= p < 0.95:
        return min(Q4_WEIGHT * k, KELLY_UPPERBOUND)
    else:
        return 0  # fallback if out of range

# Apply to the DataFrame
edge_winners_df['real_kelly'] = edge_winners_df.apply(scale_kelly, axis=1)
edge_winners_df['optimal_bet'] = edge_winners_df['real_kelly'] * BANKROLL

#fix this logic so that instead of shorting, should be buying 'No'. So probs should add back no_bid, no_ask columns.
q = edge_winners_df['avg_fair_prb']
p = midprice

num_contracts = np.where(q > p, edge_winners_df['optimal_bet'] // edge_winners_df['yes_bid'], -1 * edge_winners_df['optimal_bet'] // edge_winners_df['yes_ask'])
edge_winners_df['num_contracts'] = num_contracts
trading_cost = np.where(q > p, np.ceil(100*(0.0175 * abs(num_contracts) * edge_winners_df['yes_bid'] * (1 - edge_winners_df['yes_bid']))) / 100,
                        np.ceil(100*(0.0175 * abs(num_contracts) * edge_winners_df['yes_ask'] * (1 - edge_winners_df['yes_ask']))) / 100)
edge_winners_df['trading_cost'] = trading_cost
profit = np.where(q > p, ((1 - edge_winners_df['yes_bid']) *  num_contracts - trading_cost), (edge_winners_df['yes_ask'] *  abs(num_contracts) - trading_cost))
edge_winners_df['profit'] = profit
edge_winners_df['ev'] = np.where(q > p, (profit * edge_winners_df['avg_fair_prb'] - edge_winners_df['optimal_bet'] * (1 - edge_winners_df['avg_fair_prb'])).round(2), 
                                 (profit * (1 - edge_winners_df['avg_fair_prb']) - edge_winners_df['optimal_bet'] * edge_winners_df['avg_fair_prb']).round(2))
filtered_winners_df = edge_winners_df.loc[edge_winners_df['ev'] > 0.15].reset_index(drop=True)

filtered_winners_df['start_time'] = pd.to_datetime(filtered_winners_df['start_time'])
filtered_winners_df['start_time'] = (
    filtered_winners_df['start_time']
    .dt.tz_localize('America/Chicago', ambiguous='NaT', nonexistent='NaT')
)
now = datetime.now(pytz.timezone('America/Chicago'))
filtered_winners_df = filtered_winners_df.loc[filtered_winners_df['start_time'] > now].sort_values('odds_home_team').reset_index(drop=True)

  filtered_winners_df['start_time'] = pd.to_datetime(filtered_winners_df['start_time'])


In [1061]:
filtered_winners_df

Unnamed: 0,ticker,yes_bid,yes_ask,kalshi_home_team,kalshi_away_team,market,start_time,team,odds_home_team,odds_away_team,avg_fair_prb,raw_kelly,real_kelly,optimal_bet,num_contracts,trading_cost,profit,ev
0,KXNCAAMBGAME-25NOV26UNCOAFA-AFA,0.33,0.34,Air Force,Northern Colorado,h2h,2025-11-26 17:00:00-06:00,Air Force Falcons,Air Force Falcons,N Colorado Bears,0.3556,0.030977,0.030977,6.195489,18.0,0.07,11.99,0.27
1,KXNCAAMBGAME-25NOV26SIUMEM-SIU,0.15,0.16,Memphis,Southern Illinois,h2h,2025-11-26 19:00:00-06:00,Southern Illinois Salukis,Memphis Tigers,Southern Illinois Salukis,0.1706,0.018462,0.018462,3.692308,24.0,0.06,20.34,0.41
2,KXNCAAMBGAME-25NOV26ALSTUNM-ALST,0.04,0.05,New Mexico,Alabama St,h2h,2025-11-26 20:00:00-06:00,Alabama St Hornets,New Mexico Lobos,Alabama St Hornets,0.0566,0.012147,0.012147,2.429319,60.0,0.05,57.55,0.97
3,KXNCAAMBGAME-25NOV26BAYSDSU-SDSU,0.45,0.47,San Diego St,Baylor,h2h,2025-11-26 21:30:00-06:00,San Diego St Aztecs,San Diego St Aztecs,Baylor Bears,0.4531,0.015,0.015,3.0,-7.0,0.04,3.25,0.42
4,KXNCAAMBGAME-25NOV26DENWYO-DEN,0.22,0.23,Wyoming,Denver,h2h,2025-11-26 19:30:00-06:00,Denver Pioneers,Wyoming Cowboys,Denver Pioneers,0.2395,0.01871,0.01871,3.741935,17.0,0.06,13.2,0.32


In [1062]:
total_loss = np.sum(filtered_winners_df['optimal_bet'])
total_profit = np.sum(filtered_winners_df['profit'])
total_ev = np.sum(filtered_winners_df['ev'])
print(f"{odds_sport} h2h portfolio summary:\n")
print(f"Max Loss: -{total_loss:.2f}")
print(f"Max Profit: {total_profit:.2f}")
print(f"Portfolio EV: {total_ev:.2f}")


cbb h2h portfolio summary:

Max Loss: -19.06
Max Profit: 106.33
Portfolio EV: 2.39


In [1063]:
kalshi_cols = ['ticker', 'yes_bid', 'yes_ask', 'team', 'points']
odds_cols = ['market', 'start_time', 'team', 'home_team', 'away_team', 'avg_fair_prb', 'point']

odds_subset = odds_spreads_df[odds_cols].rename(columns={
    'home_team': 'odds_home_team',
    'away_team': 'odds_away_team',
    'team': 'odds_team'
})

kalshi_subset = kalshi_spreads_df[kalshi_cols]

combined_rows = []

for _, kalshi_row in kalshi_subset.iterrows():
    kalshi_home = kalshi_row['team']
    for _, odds_row in odds_subset.iterrows():
        odds_home = odds_row['odds_team']
        if (kalshi_home in odds_home) and (kalshi_row['points'] == odds_row['point']): 
            combined_row = pd.concat([kalshi_row, odds_row])
            combined_rows.append(combined_row)

combined_spreads_df = pd.DataFrame(combined_rows).drop_duplicates(subset='ticker') #only works because oddsapi only pulls odds 
combined_spreads_df = combined_spreads_df.reset_index(drop=True)                    #for only one point line for each bookmaker 



In [1064]:
EDGE = 0.01
KELLY_UPPERBOUND = 1
BANKROLL = 100.00
Q1_WEIGHT = 1.00
Q2_WEIGHT = 1.00
Q3_WEIGHT = 1.00
Q4_WEIGHT = 1.00

edge_spreads_df = combined_spreads_df.loc[(combined_spreads_df['avg_fair_prb'] >= combined_spreads_df['yes_ask'] + EDGE) |
                        (combined_spreads_df['avg_fair_prb'] <= combined_spreads_df['yes_bid'] - EDGE)].reset_index(drop=True)

midprice = (edge_spreads_df['yes_bid'] + edge_spreads_df['yes_ask']) / 2

q = edge_spreads_df['avg_fair_prb']
p = midprice

edge_spreads_df['raw_kelly'] = np.where(
    q > p,
    (q - p) / (1 - p),      # long position
    (p - q) / p           # short position
)

total_kelly = edge_spreads_df['raw_kelly'].sum()
edge_spreads_df['raw_kelly'] = pd.DataFrame({
    'original': edge_spreads_df['raw_kelly'],
    'normalized': (edge_spreads_df['raw_kelly'] / total_kelly)
}).min(axis=1)

# Define the real_kelly logic
def scale_kelly(row):
    k = row['raw_kelly']
    p = row['avg_fair_prb']
    
    if k == 0 or pd.isna(k):
        return 0
    if 0.1 <= p < 0.25:
        return min(Q1_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.25 <= p < 0.5:
        return min(Q2_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.5 <= p < 0.75:
        return min(Q3_WEIGHT * k, KELLY_UPPERBOUND)
    elif 0.75 <= p < 0.9:
        return min(Q4_WEIGHT * k, KELLY_UPPERBOUND)
    else:
        return 0  # fallback if out of range

# Apply to the DataFrame
edge_spreads_df['real_kelly'] = edge_spreads_df.apply(scale_kelly, axis=1)
edge_spreads_df['optimal_bet'] = edge_spreads_df['real_kelly'] * BANKROLL

num_contracts = edge_spreads_df['optimal_bet'] // edge_spreads_df['yes_bid']
edge_spreads_df['num_contracts'] = num_contracts
trading_cost = np.ceil(100 * (0.0175 * num_contracts * edge_spreads_df['yes_bid'] * (1 - edge_spreads_df['yes_bid']))) / 100
edge_spreads_df['trading_cost'] = trading_cost
profit = (1 - edge_spreads_df['yes_bid']) *  num_contracts - trading_cost
edge_spreads_df['profit'] = profit
edge_spreads_df['ev'] = (profit * edge_spreads_df['avg_fair_prb'] - edge_spreads_df['optimal_bet'] * (1 - edge_spreads_df['avg_fair_prb'])).round(2)
filtered_spreads_df = edge_spreads_df.loc[edge_spreads_df['ev'] > 0].reset_index(drop=True)

In [1065]:
edge_spreads_df

Unnamed: 0,ticker,yes_bid,yes_ask,team,points,market,start_time,odds_team,odds_home_team,odds_away_team,avg_fair_prb,point,raw_kelly,real_kelly,optimal_bet,num_contracts,trading_cost,profit,ev
0,KXNCAAMBSPREAD-25NOV26UNITLSA-TLSA1,0.4,0.44,Tulsa,1.5,spreads,2025-11-26 15:30:08 CST,Tulsa Golden Hurricane,Tulsa Golden Hurricane,Northern Iowa Panthers,0.4824,1.5,0.107586,0.107586,10.758621,26.0,0.11,15.49,1.9
1,KXNCAAMBSPREAD-25NOV26USCASU-ASU7,0.0,0.2,Arizona St,7.5,spreads,2025-11-26 13:32:43 CST,Arizona St Sun Devils,Arizona St Sun Devils,USC Trojans,0.472,7.5,0.413333,0.413333,41.333333,inf,,,
2,KXNCAAMBSPREAD-25NOV26BAYSDSU-SDSU1,0.41,0.45,San Diego St,1.5,spreads,2025-11-26 21:30:00 CST,San Diego St Aztecs,San Diego St Aztecs,Baylor Bears,0.48625,1.5,0.098684,0.098684,9.868421,24.0,0.11,14.05,1.76


In [1066]:
# First compute midprice in combined_winners_df
combined_winners_df['midprice'] = (
    combined_winners_df['yes_bid'] + combined_winners_df['yes_ask']
) / 2

# Merge spreads df with winner probabilities
merged = filtered_spreads_df.merge(
    combined_winners_df[['team', 'midprice']],
    left_on='odds_team',      # column in filtered_spreads_df
    right_on='team',          # column in combined_winners_df
    how='left'
)

# Filter out teams with win prob < 50% OR missing (NaN)
filtered_spreads_df = merged.loc[
    merged['midprice'] >= 0.50
].reset_index(drop=True)


In [1067]:
filtered_spreads_df

Unnamed: 0,ticker,yes_bid,yes_ask,team_x,points,market,start_time,odds_team,odds_home_team,odds_away_team,...,point,raw_kelly,real_kelly,optimal_bet,num_contracts,trading_cost,profit,ev,team_y,midprice


In [1068]:
total_loss = np.sum(filtered_spreads_df['optimal_bet'])
total_profit = np.sum(filtered_spreads_df['profit'])
total_ev = np.sum(filtered_spreads_df['ev'])
print(f"{odds_sport} spreads portfolio summary:\n")
print(f"Max Loss: -{total_loss:.2f}")
print(f"Max Profit: {total_profit:.2f}")
print(f"Portfolio EV: {total_ev:.2f}")


cbb spreads portfolio summary:

Max Loss: -0.00
Max Profit: 0.00
Portfolio EV: 0.00
