# 003. Daily Bets
- This identifies the day's top bets 
- Type: Evaluation
- Run Frequency: Irregular
- Sources:
    - Sportsbook Review
- Created: 3/30/2024
- Updated: 7/15/2025

### Imports

In [None]:
%run "C:\Users\james\Documents\MLB\Code\U01. Imports.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U02. Functions.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U03. Classes.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U04. Datasets.ipynb"
%run "C:\Users\james\Documents\MLB\Code\U05. Models.ipynb"

In [None]:
# Set option to display numbers without scientific notation
pd.set_option('display.float_format', '{:.6f}'.format)

### Settings

In [None]:
start_date, end_date = todaysdate, todaysdate

### Data

##### Games

Read in games from MLB API

In [None]:
%%time
historic_game_df = pd.read_csv(os.path.join(baseball_path, "game_df.csv"))
recent_game_df = create_games(yesterdaysdate, todaysdate, team_dict)
historic_game_df = historic_game_df[~historic_game_df['date'].astype(str).isin([yesterdaysdate, todaysdate])]
all_game_df = pd.concat([historic_game_df, recent_game_df], axis=0)
all_game_df.to_csv(os.path.join(baseball_path, "game_df.csv"), index=False)

In [None]:
game_df = all_game_df[(all_game_df['date'].astype(str) >= start_date) & (all_game_df['date'].astype(str) <= end_date)].reset_index(drop=True)

In [None]:
game_df['date'] = game_df['date'].astype(int)

##### Odds

Read in game odds from Sportsbook Review

In [None]:
odds_df_list = []
# Read in odds
for date in game_df['date'].unique():
    odds_df = pd.read_csv(os.path.join(baseball_path, "A08. Odds Sportsbook Review", f"Odds {date}.csv"))
    odds_df_list.append(odds_df)

odds_df = pd.concat(odds_df_list, axis=0, ignore_index=True)

Determine game number (in case of double headers)

In [None]:
odds_df['game_num'] = odds_df.groupby(['HomeTeamShort', 'date']).cumcount()+1

Use standardized team abbreviations

In [None]:
odds_df['away_team'] = odds_df['VisitorTeamShort'].map(team_dict)
odds_df['home_team'] = odds_df['HomeTeamShort'].map(team_dict)

##### Merge

Merge gambling data onto game data

In [None]:
game_df_column_list = ['game_id', 'game_datetime', 'date', 'game_type', 'status', 'venue_id', 'venue_name', 'away_team', 'home_team', 'game_num', 'away_score', 'home_score']

In [None]:
odds_df = pd.merge(game_df[game_df_column_list], odds_df, on=['date', 'game_num', 'away_team', 'home_team'], how='left')

##### Sims

Calculate model gambling expectations

In [None]:
def process_sims(game_id, date, Spread, OU):
    try:
        df = pd.read_csv(os.path.join(baseball_path, "B02. Simulations", "1. Game Sims", f"Matchups {date}", f"game_{game_id}.csv"))
    except FileNotFoundError:
        return pd.Series()  # empty to signal skip

    ### Spread
    df['Spread'] = Spread
    df['SpreadCover1'] = (df['away_score'] > df['home_score'] + df['Spread']).astype(int)
    df['SpreadCover2'] = (df['away_score'] < df['home_score'] + df['Spread']).astype(int)
    df['SpreadCover3'] = (df['away_score'] == df['home_score'] + df['Spread']).astype(int)  

    ### Over/Under
    df['OU'] = OU
    df['OuCover1'] = (df['away_score'] + df['home_score'] > df['OU']).astype(int)
    df['OuCover2'] = (df['away_score'] + df['home_score'] < df['OU']).astype(int)
    df['OuCover3'] = (df['away_score'] + df['home_score'] == df['OU']).astype(int)

    ### ML
    df['MLCover1'] = (df['away_score'] > df['home_score']).astype(int)
    df['MLCover2'] = (df['away_score'] < df['home_score']).astype(int)

    ### Victory Margin
    df['Margin'] = abs(df['away_score'] - df['home_score'])

    # Rename to avoid compatibility issues with actual scores
    df.rename(columns={'away_score': 'ModelScore1',
                       'home_score': 'ModelScore2'}, inplace=True)

    means = df[['ModelScore1', 'ModelScore2', 
                'SpreadCover1', 'SpreadCover2', 'SpreadCover3', 
                'OuCover1', 'OuCover2', 'OuCover3', 
                'MLCover1', 'MLCover2']].mean()

    away_counts = dict(Counter(df['ModelScore1']))
    home_counts = dict(Counter(df['ModelScore2']))
    total_counts = dict(Counter(df['ModelScore1'] + df['ModelScore2']))
    margin_counts = dict(Counter(df['Margin']))

    result = pd.concat([means, pd.Series({
        'MLCounts1': away_counts,
        'MLCounts2': home_counts,
        'MLCounts3': total_counts,
        'MarginCounts': margin_counts
    })])


    return result

Run safely in parallel

In [None]:
def safe_process_row(row):
    try:
        return process_sims(row['game_id'], row['date'], row['Spread'], row['OU'])
    except FileNotFoundError:
        return pd.Series({
            'ModelScore1': np.nan,
            'ModelScore2': np.nan,
            'SpreadCover1': np.nan,
            'SpreadCover2': np.nan,
            'SpreadCover3': np.nan,
            'OuCover1': np.nan,
            'OuCover2': np.nan,
            'OuCover3': np.nan,
            'MLCover1': np.nan,
            'MLCover2': np.nan,
            'MLCounts1': {},
            'MLCounts2': {},
            'MLCounts3': {},
            'MarginCounts': {}
        })

Run

In [None]:
results_df = odds_df.apply(safe_process_row, axis=1)
odds_df = pd.concat([odds_df, results_df], axis=1)

### Clean

Convert American odds to payout multipliers

In [None]:
def multipliers(odds):
    if odds == 0:
        return np.nan
    
    elif odds > 0:
        mult = odds / 100 + 1
    else: 
        mult = 100 / odds * - 1 + 1
        
    return mult

Determine odds

In [None]:
for bet in ['Spread', 'Ou', 'ML']:
    odds_df[f'{bet}Multiplier1'] = odds_df[f'{bet}Money1'].apply(multipliers)
    odds_df[f'{bet}Multiplier2'] = odds_df[f'{bet}Money2'].apply(multipliers)
    
    odds_df[f'{bet}VegasOdds1TempA'] = 1 / odds_df[f'{bet}Multiplier1'] 
    odds_df[f'{bet}VegasOdds1TempB'] = 1 - (1 / odds_df[f'{bet}Multiplier2'])
    odds_df[f'{bet}VegasOdds1'] = odds_df[[f'{bet}VegasOdds1TempA', f'{bet}VegasOdds1TempB']].mean(axis=1)
    odds_df[f'{bet}VegasOdds2'] = 1 - odds_df[f'{bet}VegasOdds1']

Determine Vegas Dog

In [None]:
odds_df[f'MLVegasDog1'] = np.select(
    [
        odds_df['MLVegasOdds1'] > 0.5,
        odds_df['MLVegasOdds1'] < 0.5
    ],
    [
        "Favorite",
        "Underdog"
    ],
    default="Neither"
)

odds_df[f'MLVegasDog2'] = np.select(
    [
        odds_df['MLVegasOdds2'] > 0.5,
        odds_df['MLVegasOdds2'] < 0.5
    ],
    [
        "Favorite",
        "Underdog"
    ],
    default="Neither"
)

Determine Model Dog

In [None]:
odds_df[f'MLModelDog1'] = np.select(
    [
        odds_df['MLCover1'] > 0.5,
        odds_df['MLCover1'] < 0.5
    ],
    [
        "Favorite",
        "Underdog"
    ],
    default="Neither"
)

odds_df[f'MLModelDog2'] = np.select(
    [
        odds_df['MLCover2'] > 0.5,
        odds_df['MLCover2'] < 0.5
    ],
    [
        "Favorite",
        "Underdog"
    ],
    default="Neither"
)

Overwrite Spread favorite to reflect who is favorited in the game, not against the spread

In [None]:
odds_df['SpreadVegasDog1'] = odds_df['MLVegasDog1']
odds_df['SpreadVegasDog2'] = odds_df['MLVegasDog2']

odds_df['SpreadModelDog1'] = odds_df['MLModelDog1']
odds_df['SpreadModelDog2'] = odds_df['MLModelDog2']

Impute Vegas runs

Methodology:
- Calculate win probabilities using ML payouts
- Assign runs from O/U using win probability

Limitations:
- Using win probability to assign runs is imperfect
- Ignores different payouts for overs and unders

In [None]:
odds_df['VegasScore1'] = odds_df['OU'] * odds_df['MLVegasOdds1'] * (9/17.5/0.5)
odds_df['VegasScore2'] = odds_df['OU'] * odds_df['MLVegasOdds2'] * (8.5/17.5/0.5)

Convert to long

In [None]:
prefixes = ['Spread', 'Ou', 'ML']
suffixes = ['1', '2']
pattern = re.compile(rf"^({'|'.join(prefixes)})([A-Za-z0-9_]+)({'|'.join(suffixes)})$")

bet_columns = [col for col in odds_df.columns if pattern.match(col)]
game_columns = [col for col in odds_df.columns if col not in bet_columns]

# List of known dict-type columns
unhashable_cols = ['MLCounts1', 'MLCounts2', 'MLCounts3', 'MarginCounts']

# Convert dicts to strings only if they are actually dicts
for col in unhashable_cols:
    if col in odds_df.columns:
        odds_df[col] = odds_df[col].apply(lambda x: str(x) if isinstance(x, dict) else x)

# Melt the DataFrame
df_long = odds_df.melt(id_vars=game_columns, value_vars=bet_columns,
                       var_name='original_column', value_name='value')

df_long[['BetType', 'field', 'side']] = df_long['original_column'].str.extract(pattern)

# Pivot back to one row per bet
bet_df = df_long.pivot(index=game_columns + ['BetType', 'side'], columns='field', values='value').reset_index()
bet_df.columns.name = None

# Convert strings back to dicts safely
def safe_literal_eval(val):
    if isinstance(val, str) and val.strip().startswith('{'):
        try:
            return ast.literal_eval(val)
        except (ValueError, SyntaxError):
            return val
    return val

for col in unhashable_cols:
    if col in bet_df.columns:
        bet_df[col] = bet_df[col].apply(safe_literal_eval)

Determine bet side (away/home, over/under)

In [None]:
# Define conditions
cond_ou = bet_df['BetType'] == 'Ou'
cond_1 = bet_df['side'] == '1'
cond_2 = bet_df['side'] == '2'

# Apply logic
bet_df['BetSide'] = np.select(
    [cond_ou & cond_1, cond_ou & cond_2, cond_1, cond_2],
    ['Over', 'Under', 'Away', 'Home'],
    default=None
)

bet_df.drop(columns={'side'}, inplace=True)

Determine scores

In [None]:
bet_df['Score'] = np.select(
    [
        bet_df['BetSide'] == "Away",
        bet_df['BetSide'] == "Home"
    ],
    [
        bet_df['away_score'],
        bet_df['home_score']
    ],
    default=np.nan
)

bet_df['VegasScore'] = np.select(
    [
        bet_df['BetSide'] == "Away",
        bet_df['BetSide'] == "Home"
    ],
    [
        bet_df['VegasScore1'],
        bet_df['VegasScore2']
    ],
    default=np.nan
)

bet_df['ModelScore'] = np.select(
    [
        bet_df['BetSide'] == "Away",
        bet_df['BetSide'] == "Home"
    ],
    [
        bet_df['ModelScore1'],
        bet_df['ModelScore2']
    ],
    default=np.nan
)

Determine expected payout

In [None]:
bet_df['ExpectedPayout'] = bet_df['Cover'] * bet_df['Multiplier'] 

for bet in ['Spread', 'Ou']:
    mask = bet_df['BetType'] == bet
    bet_df.loc[mask, 'ExpectedPayout'] += bet_df.loc[mask, f'{bet}Cover3']

bet_df['PositiveEV'] = (bet_df['ExpectedPayout'] > 1).astype(int)

Determine actual result

In [None]:
# Pre-calc total and spread difference
total_score = bet_df['away_score'] + bet_df['home_score']
spread_diff = bet_df['away_score'] - bet_df['home_score']

def determine_result(row):
    bet_type = row['BetType']
    side = row['BetSide']
    away = row['away_score']
    home = row['home_score']
    ou = row['OU']
    spread = row['Spread']

    if bet_type == 'ML':
        if side == 'Away':
            return 'Win' if away > home else 'Loss'
        elif side == 'Home':
            return 'Win' if home > away else 'Loss'

    elif bet_type == 'Ou':
        if total_score.loc[row.name] == ou:
            return 'Push'
        elif side == 'Over':
            return 'Win' if total_score.loc[row.name] > ou else 'Loss'
        elif side == 'Under':
            return 'Win' if total_score.loc[row.name] < ou else 'Loss'

    elif bet_type == 'Spread':
        if spread_diff.loc[row.name] == spread:
            return 'Push'
        elif side == 'Away':
            return 'Win' if (away - home) > spread else 'Loss'
        elif side == 'Home':
            return 'Win' if (home - away) > -spread else 'Loss' 

    return np.nan  # if none apply

bet_df['BetResult'] = bet_df.apply(determine_result, axis=1)
bet_df['BetWin'] = (bet_df['BetResult'] == "Win").astype(int)

In [None]:
bet_df.query('BetType == "Ou"')['BetResult'].value_counts()

Determine actual payout

In [None]:
bet_df['ActualPayout'] = (bet_df['BetResult'] == "Win").astype(int) * bet_df['Multiplier'] + (bet_df['BetResult'] == "Push")

Label bet pick

In [None]:
bet_df['BetPick'] = np.where(bet_df['BetType'] == "Ou", bet_df['BetSide'], bet_df['VegasDog'])

Determine/Calculate Totals

In [None]:
bet_df.rename(columns={'MLCounts3': 'TotalCounts'}, inplace=True)

In [None]:
bet_df['TotalScore'] = bet_df[['away_score', 'home_score']].sum(axis=1)
bet_df['ModelTotalScore'] = bet_df[['ModelScore1', 'ModelScore2']].sum(axis=1)

### Top Bets

In [None]:
bet_df[bet_df['PositiveEV'] == 1].sort_values('ExpectedPayout', ascending=False).head(10)[['date', 'game_id', 'away_team', 'home_team', 'BetType', 'BetSide', 'BetPick', 'Cover', 'Multiplier', 'OU', 'Spread', 'Money', 'ExpectedPayout', 'ActualPayout']]