<a href="https://colab.research.google.com/github/alexcontarino/personal-projects/blob/main/Sports_Betting/NFL_Season_Simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Uses the dataframe created by "NFL Moneyline Scrape" to simulate the NFL season a user-specified number of times to calculate expected wins, distribution of wins, and playoff probabilities for each team.

## Set-Up

In [1]:
import numpy as np
import pandas as pd
import math
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
gamelines = pd.read_csv("drive/MyDrive/2025_gamelines.csv")
gamelines.head()

Unnamed: 0,away,home,home_spread,away_line,home_line,away_prob,home_prob
0,Cowboys,Eagles,-7.0,+260,−325,0.266458,0.733542
1,Chiefs,Chargers,3.0,−148,+124,0.57206,0.42794
2,Bengals,Browns,5.5,−230,+190,0.669007,0.330993
3,Giants,Commanders,-7.0,+230,−285,0.290456,0.709544
4,Cardinals,Saints,5.5,−238,+195,0.675031,0.324969


## Season Simulation - Helper Functions


In [4]:
# Randomly assigns away team as winner of game based on win probability
def away_Ws(away_probs, rng):

    away_probs = np.array(away_probs).reshape(-1,1)
    rands =  rng.uniform(0,1, size=(len(away_probs),1))

    away_wins = rands < away_probs
    away_wins = np.multiply(away_wins,1)

    return away_wins

In [5]:
# Simulates single season
def simulate_season(gamelines, rng):

    gamelines_w_results = gamelines
    gamelines_w_results["Away_W"] = away_Ws(gamelines_w_results.away_prob, rng)
    gamelines_w_results["Home_W"] = 1 - gamelines_w_results["Away_W"]

    return gamelines_w_results

In [6]:
# Calculates wins for each team based on simulated season results
def summarize_season(gamelines_w_results):

    summary_dict =  {team:0 for team in gamelines_w_results.away.unique()}

    for game in gamelines_w_results.index:
        away_team = gamelines_w_results.loc[game,"away"]
        home_team = gamelines_w_results.loc[game,"home"]

        if gamelines_w_results.loc[game,"Away_W"] == 1:
            summary_dict[away_team] += 1
        else:
            summary_dict[home_team] += 1

    df = pd.DataFrame([{"team": key, "wins": value} for key, value in summary_dict.items()])

    return df

In [7]:
def rough_playoff_prob(wins, playoff_probs_by_wins):
  '''
  input:
  --wins: list of wins
  --playoff_probs_by_wins: Dictionary matching win values to playoff probabilities.
  ------------------------ Function assumes 100% chance if wins greater than any provided key
  ------------------------ Function assumes 0% chance if wins less than any provided key
  output:
  --mean_playoff_prob: estimated playoff probability based on wins
  '''

  win_keys = list(playoff_probs_by_wins.keys())
  win_keys.sort()

  playoff_probs = []

  for win in wins:
    if win in playoff_probs_by_wins:
      playoff_probs.append(playoff_probs_by_wins[win])
    elif win > win_keys[0]:
      playoff_probs.append(1)
    elif win < win_keys[-1]:
      playoff_probs.append(0)
    else:
      print("Unexpected value encountered")
      return None

  mean_playoff_prob = np.mean(playoff_probs)

  return mean_playoff_prob

In [8]:
def probs_to_odds(prb, vig=0.00):

  prb = prb * (1 + vig)

  if prb == 0.5:

    return 100

  elif prb > 0.5:

    # ensure probabilities stay within feasible values in case given vig pushes give prb past 1
    prb = np.min([0.999,prb])

    return str(math.floor(-100 * prb / (1 - prb)))

  else:

    # ensure probabilities stay within feasible values in case given vig pushes give prb past 1
    prb = np.max([0.001,prb])

    return "+" + str(math.floor(100 * (1 - prb) / prb))

## Season Simulation - Main Loop

In [9]:
# User Input
n_sims = 10000
starting_seed = 2025

In [10]:
# Main Loop
all_results_dict =  {team:[] for team in gamelines.away.unique()}

for trial in range(n_sims):

    rng = np.random.default_rng(seed=starting_seed + trial)
    gamelines_w_results = simulate_season(gamelines, rng)
    season_results = summarize_season(gamelines_w_results)

    for i in season_results.index:
        team = season_results.iloc[i,0]
        wins = season_results.iloc[i,1]
        all_results_dict[team].append(wins)

## Season Simulation - Results

### Output: Simulation results summarized in a dataframe, displaying the mean wins, 10th percentile of wins, 90th percentile of wins, and estimated playoff probability and odds for each team.

**Note**: playoff probabilities are *roughly* estimated based on number of wins a team accumulates in a season and not accounting for tiebreakers, or division or conference quality. Playoff probabilities are assign to each team according to the following rules:

- 100% chance to make the playoffs if a team wins $\geq$ 11 games
- 90% chance if a team wins 10 games
- 67% chance if a team wins 9 games  
- 1% chance if a team wins 8 games
- 0% chance otherwise

Implementing this heuristic works out to ~14 expected playoff teams for an average season. These values can be adjusted in the `playoff_probs_by_wins` dictionary below.

Playoff odds are computed from the playoff probabilities, with an assumed vig of 5% charged by the sportsbook. The `assumed_vig` variable defines this below.

In [11]:
# User Inputs
assumed_vig = 0.05

playoff_probs_by_wins = {11: 1,
                         10: 9/10,
                         9: 2/3,
                         8: 1/100}

In [12]:
# Create summary dataframe

mean_wins = pd.DataFrame([{"team": key, "mean_wins": np.mean(value)} for key, value in all_results_dict.items()])
perc_10 = pd.DataFrame([{"team": key, "10th_perc": np.percentile(value, 10)} for key, value in all_results_dict.items()])
perc_90 = pd.DataFrame([{"team": key, "90th_perc": np.percentile(value, 90)} for key, value in all_results_dict.items()])
playoff_prb = pd.DataFrame([{"team": key, "playoff_prb": rough_playoff_prob(value, playoff_probs_by_wins)} for key, value in all_results_dict.items()])

sim_summary = pd.merge(pd.merge(pd.merge(mean_wins,perc_10,on='team'),perc_90,on='team'),playoff_prb,on='team')

sim_summary["playoff_odds"] = sim_summary.playoff_prb.apply(probs_to_odds, vig=assumed_vig)

sim_summary = sim_summary.sort_values("mean_wins", ascending=False)
sim_summary.set_index(pd.Index(range(1,33)), inplace=True)
sim_summary

Unnamed: 0,team,mean_wins,10th_perc,90th_perc,playoff_prb,playoff_odds
1,Bills,11.5238,9.0,14.0,0.897513,-1636
2,Ravens,11.2905,9.0,14.0,0.880843,-1232
3,Chiefs,10.7752,8.0,13.0,0.813251,-585
4,Eagles,10.6108,8.0,13.0,0.793205,-499
5,49ers,10.3811,8.0,13.0,0.759064,-393
6,Lions,9.7052,7.0,12.0,0.65123,-217
7,Broncos,9.6792,7.0,12.0,0.645331,-211
8,Buccaneers,9.6455,7.0,12.0,0.638993,-204
9,Rams,9.5875,7.0,12.0,0.627399,-194
10,Commanders,9.5748,7.0,12.0,0.625859,-192
