In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
file_path = '../data/nfl_scores.csv'

data = pd.read_csv(file_path)
nfl_df = pd.DataFrame(data)


In [3]:
# Elo probability function
def calc_expected_res(team_rating, opp_rating):
  return 1 / (1 + 10 ** ((opp_rating - team_rating) / 400))

In [4]:
# Update teams Elo's based on expected result vs actual result
def update_rating(team_rating, opp_rating, actual_result, k=20):
  expected_result = calc_expected_res(team_rating, opp_rating)
  return team_rating + k * (actual_result - expected_result)

In [5]:
def calc_elo_rankings(games):
  # Initialize Elo ratings for each team
  temp = pd.DataFrame({'teams': games['team_home'].unique(),
                       'elo': 1500})

  for i, game in games.iterrows():
    home_team = game['team_home']
    away_team = game['team_away']

    score_home = game['score_home']
    score_away = game['score_away']

    home_elo = temp.loc[temp['teams'] == home_team, 'elo'].iloc[0]
    away_elo = temp.loc[temp['teams'] == away_team, 'elo'].iloc[0]

    res = 1 if score_home > score_away else 0
    new_home_elo = update_rating(home_elo, away_elo, res)
    new_away_elo = update_rating(away_elo, home_elo, 1 - res)

    temp.loc[temp['teams'] == home_team, 'elo'] = int(new_home_elo)
    temp.loc[temp['teams'] == away_team, 'elo'] = int(new_away_elo)

  return temp
  

In [6]:
# Map old team names to current team name since 2002
team_names = {'Washington Redskins': 'Washington Commanders', 
              'Washington Football Team': 'Washington Commanders', 
              'Oakland Raiders': 'Las Vegas Raiders',
              'Houston Oilers': 'Tennessee Titans',
              'Tennessee Oilers': 'Tennessee Titans',
              'St. Louis Rams': 'Los Angeles Rams',
              'St. Louis Cardinals': 'Arizona Cardinals',
              'Phoenix Cardinals': 'Arizona Cardinals',
              'San Diego Chargers': 'Los Angeles Chargers',
              'Baltimore Colts': 'Baltimore Ravens',
              'Boston Patriots': 'New England Patriots',
              'Los Angeles Raiders': 'Las Vegas Raiders'}

# Replace team names
nfl_df['team_home'] = nfl_df['team_home'].replace(team_names)
nfl_df['team_away'] = nfl_df['team_away'].replace(team_names)

In [7]:
nfl_df["result_home"] = nfl_df["score_home"] - nfl_df["score_away"]
nfl_2020_2022 = nfl_df[(nfl_df['schedule_season'] >= 2020) & (nfl_df['schedule_season'] < 2023)]
display(nfl_2020_2022['team_home'].unique())
print(len(nfl_2020_2022['team_home'].unique()))

array(['Kansas City Chiefs', 'Atlanta Falcons', 'Baltimore Ravens',
       'Buffalo Bills', 'Carolina Panthers', 'Cincinnati Bengals',
       'Detroit Lions', 'Jacksonville Jaguars', 'Los Angeles Rams',
       'Minnesota Vikings', 'New England Patriots', 'New Orleans Saints',
       'San Francisco 49ers', 'Washington Commanders', 'Denver Broncos',
       'New York Giants', 'Cleveland Browns', 'Arizona Cardinals',
       'Chicago Bears', 'Dallas Cowboys', 'Green Bay Packers',
       'Houston Texans', 'Indianapolis Colts', 'Los Angeles Chargers',
       'Miami Dolphins', 'New York Jets', 'Philadelphia Eagles',
       'Pittsburgh Steelers', 'Seattle Seahawks', 'Tampa Bay Buccaneers',
       'Tennessee Titans', 'Las Vegas Raiders'], dtype=object)

32


In [8]:
def season_simulation(games, elo_ratings, start_week, num_sim):
    sim_outcome = pd.DataFrame({'Teams': elo_ratings['teams'].unique(),
                                'Wins': 0,
                                'Loss': 0})

    sim_outcome2 = pd.DataFrame({'Teams': elo_ratings['teams'].unique(),
                                'Wins': 0,
                                'Loss': 0})

    for week in range(1, start_week):
        game_week = games[games['week'] == week]
        for i, game in game_week.iterrows():
            home_team = game['home_team']
            away_team = game['away_team']
            result_bool = game['result_home']
            res = 1 if result_bool else 0
            sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Wins'] += res
            sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Loss'] += (1 - res)
            sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Wins'] += (1 - res)
            sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Loss'] += res

            home_elo = elo_ratings.loc[elo_ratings['teams'] == home_team, 'elo'].iloc[0]
            away_elo = elo_ratings.loc[elo_ratings['teams'] == away_team, 'elo'].iloc[0]

            new_home_elo = update_rating(home_elo, away_elo, res, k=40)
            new_away_elo = update_rating(away_elo, home_elo, 1 - res, k=40)

            elo_ratings.loc[elo_ratings['teams'] == home_team, 'elo'] = int(new_home_elo)
            elo_ratings.loc[elo_ratings['teams'] == away_team, 'elo'] = int(new_away_elo)

    # display(sim_outcome.sort_values(by='Wins', ascending=False))
    # display(elo_ratings.sort_values(by='elo', ascending=False))

    for sim in range(num_sim):
        if sim % 10 == 0:
            print("Simulation:", sim)
        elo = elo_ratings.copy()
        for week_index in range(start_week, 19):
            curr = games[games['week'] == week_index]
            for i, game in curr.iterrows():
                home_team = game['home_team']
                away_team = game['away_team']

                home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
                away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

                # Prediction here
                expected = calc_expected_res(home_elo, away_elo)
                res = 1 if random.random() < expected else 0

                sim_outcome2.loc[sim_outcome2['Teams'] == home_team, 'Wins'] += res
                sim_outcome2.loc[sim_outcome2['Teams'] == home_team, 'Loss'] += (1 - res)
                sim_outcome2.loc[sim_outcome2['Teams'] == away_team, 'Wins'] += (1 - res)
                sim_outcome2.loc[sim_outcome2['Teams'] == away_team, 'Loss'] += res
    
                # Update model
                new_home_elo = update_rating(home_elo, away_elo, res)
                new_away_elo = update_rating(away_elo, home_elo, 1 - res)

                elo.loc[elo['teams'] == home_team, 'elo'] = int(new_home_elo)
                elo.loc[elo['teams'] == away_team, 'elo'] = int(new_away_elo)

    win_ratio = (sim_outcome2['Wins'] / num_sim)
    loss_ratio = (sim_outcome2['Loss'] / num_sim)
    sim_outcome['Wins'] += win_ratio
    sim_outcome['Loss'] += loss_ratio
    return sim_outcome

In [9]:
elo_data = calc_elo_rankings(nfl_2020_2022)
# display(elo_data.sort_values(by='elo', ascending=False))
nfl_2023 = pd.read_csv('../data/nfl_scraped_2023.csv')
simulated_season_standings = season_simulation(nfl_2023, elo_data, start_week=14, num_sim=250)

Simulation: 0
Simulation: 10
Simulation: 20
Simulation: 30
Simulation: 40
Simulation: 50
Simulation: 60
Simulation: 70
Simulation: 80
Simulation: 90
Simulation: 100
Simulation: 110
Simulation: 120
Simulation: 130
Simulation: 140
Simulation: 150
Simulation: 160
Simulation: 170
Simulation: 180
Simulation: 190
Simulation: 200
Simulation: 210
Simulation: 220
Simulation: 230
Simulation: 240


In [10]:
# Predicted regular season win/loss rankings
simulated_season_standings.sort_values(by="Wins", ascending=False)

Unnamed: 0,Teams,Wins,Loss
26,Philadelphia Eagles,13.82,3.18
12,San Francisco 49ers,12.712,4.288
7,Jacksonville Jaguars,12.112,4.888
6,Detroit Lions,11.956,5.044
24,Miami Dolphins,11.904,5.096
19,Dallas Cowboys,11.812,5.188
2,Baltimore Ravens,11.632,5.368
0,Kansas City Chiefs,11.536,5.464
16,Cleveland Browns,9.94,7.06
27,Pittsburgh Steelers,9.9,7.1


In [11]:
# Manually create a dataframe with AFC division information
afc_team_data = {
    "Division": {
        "East": ["Buffalo Bills", "Miami Dolphins", "New England Patriots", "New York Jets"],
        "North": ["Baltimore Ravens", "Cincinnati Bengals", "Cleveland Browns", "Pittsburgh Steelers"],
        "South": ["Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars", "Tennessee Titans"],
        "West": ["Denver Broncos", "Kansas City Chiefs", "Las Vegas Raiders", "Los Angeles Chargers"]
    }
}

afc_df = pd.DataFrame()

for division, teams in afc_team_data["Division"].items():
    for team in teams:
        afc_df = pd.concat([afc_df, pd.DataFrame({"Teams": [team], "Conference": ["AFC"], "Division": [division]})])

afc_df.reset_index(drop=True, inplace=True)

In [12]:
# Manually create a dataframe with NFC division information
nfc_team_data = {
    "Division": {
        "East": ["Dallas Cowboys", "New York Giants", "Philadelphia Eagles", "Washington Commanders"],
        "North": ["Chicago Bears", "Detroit Lions", "Green Bay Packers", "Minnesota Vikings"],
        "South": ["Atlanta Falcons", "Carolina Panthers", "New Orleans Saints", "Tampa Bay Buccaneers"],
        "West": ["Arizona Cardinals", "Los Angeles Rams", "San Francisco 49ers", "Seattle Seahawks"]
    }
}

nfc_df = pd.DataFrame()

for division, teams in nfc_team_data["Division"].items():
    for team in teams:
        nfc_df = pd.concat([nfc_df, pd.DataFrame({"Teams": [team], "Conference": ["NFC"], "Division": [division]})])

nfc_df.reset_index(drop=True, inplace=True)

In [13]:
# Merge with predicted win/loss rankings to create conference/division rankings
afc_standings_df = pd.merge(afc_df, simulated_season_standings, on='Teams')
nfc_standings_df = pd.merge(nfc_df, simulated_season_standings, on='Teams')
nfc_standings_df 

Unnamed: 0,Teams,Conference,Division,Wins,Loss
0,Dallas Cowboys,NFC,East,11.812,5.188
1,New York Giants,NFC,East,5.476,11.524
2,Philadelphia Eagles,NFC,East,13.82,3.18
3,Washington Commanders,NFC,East,5.272,11.728
4,Chicago Bears,NFC,North,5.752,11.248
5,Detroit Lions,NFC,North,11.956,5.044
6,Green Bay Packers,NFC,North,9.344,7.656
7,Minnesota Vikings,NFC,North,8.404,8.596
8,Atlanta Falcons,NFC,South,8.604,8.396
9,Carolina Panthers,NFC,South,2.228,14.772


In [14]:
# Implements playoff seeding selection from each conference
# Reference: https://en.wikipedia.org/wiki/NFL_playoffs#Current_playoff_system
def get_playoff_df(conf_standings):
  # Select the team with the best record for each division in the given conference
  division_champs = conf_standings.sort_values(by=['Wins', 'Loss'], ascending=[False, True]) \
                               .groupby('Division').head(1)
  division_champs["Seed"] = [1,2,3,4]
  # Select 3 remaining best records from the overall conference as wildcards
  wild_cards = conf_standings.sort_values(by=['Wins', 'Loss'], ascending=[False, True]) \
                          .groupby('Division').tail(3).head(3)
  wild_cards["Seed"] = [5,6,7]
  return pd.concat([division_champs, wild_cards]).reset_index(drop=True)

In [15]:
# 7 AFC playoff teams
afc_playoff = get_playoff_df(afc_standings_df)
# 7 NFC playoff teams
nfc_playoff = get_playoff_df(nfc_standings_df)

In [16]:
# The 14 predicted postseason playoff teams including their seed
display(afc_playoff)
display(nfc_playoff)

Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Jacksonville Jaguars,AFC,South,12.112,4.888,1
1,Miami Dolphins,AFC,East,11.904,5.096,2
2,Baltimore Ravens,AFC,North,11.632,5.368,3
3,Kansas City Chiefs,AFC,West,11.536,5.464,4
4,Cleveland Browns,AFC,North,9.94,7.06,5
5,Pittsburgh Steelers,AFC,North,9.9,7.1,6
6,Indianapolis Colts,AFC,South,9.548,7.452,7


Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Philadelphia Eagles,NFC,East,13.82,3.18,1
1,San Francisco 49ers,NFC,West,12.712,4.288,2
2,Detroit Lions,NFC,North,11.956,5.044,3
3,Atlanta Falcons,NFC,South,8.604,8.396,4
4,Dallas Cowboys,NFC,East,11.812,5.188,5
5,Green Bay Packers,NFC,North,9.344,7.656,6
6,Los Angeles Rams,NFC,West,8.588,8.412,7


In [17]:
# Merge AFC and NFC teams to create the postseason playoff bracket
nfl_playoff = pd.concat([nfc_playoff, afc_playoff])

In [18]:
# Generate pairings based on a provided team dataframe and pairing order
def generate_matchups(playoff_teams, pairings):
  matchups = pd.DataFrame(pairings)
  matchups = matchups.merge(playoff_teams, left_on="team_home_seed", right_on="Seed", suffixes=('_home', '_away')) \
                     .merge(playoff_teams, left_on="team_away_seed", right_on="Seed", suffixes=('_home', '_away'))

  matchups = matchups[['team_home_seed', 'team_away_seed', 'Teams_home', 'Teams_away']]
  matchups.columns = ['team_home_seed', 'team_away_seed', 'team_home', 'team_away']
  return matchups

In [19]:
# Round 1 pairing order
# - 2nd seed vs 7th seed
# - 3rd seed vs 6th seed
# - 4th seed vs 5th seed
def generate_round_one(playoff_teams):
  pairings = {
      "team_home_seed": [2, 3, 4],
      "team_away_seed": [7, 6, 5]
  }
  return generate_matchups(playoff_teams, pairings)

In [20]:
# Simulate the provided round of the postseason
def simulate_postseason_round(curr_round, teams, elo):
  sim_outcome = pd.DataFrame()

  for _, game in curr_round.iterrows():
      home_team = game['team_home']
      away_team = game['team_away']

      home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
      away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

      # Prediction here
      expected = calc_expected_res(home_elo, away_elo)
      res = 1 if random.random() < expected else 0
      if res:
        row = teams.loc[teams['Teams'] == home_team]
        sim_outcome = pd.concat([sim_outcome, row])
      else:
        row = teams.loc[teams['Teams'] == away_team]
        sim_outcome = pd.concat([sim_outcome, row])

      new_home_elo = update_rating(home_elo, away_elo, res)
      new_away_elo = update_rating(away_elo, home_elo, 1 - res)
    
      # Update model
      elo.loc[elo['teams'] == home_team, 'elo'] = int(new_home_elo)
      elo.loc[elo['teams'] == away_team, 'elo'] = int(new_away_elo)

  return pd.concat([sim_outcome]).reset_index(drop=True)

In [21]:
def round1_simulation(wildcard, teams, elo):
  sim_outcome = pd.DataFrame()

  for _, game in wildcard.iterrows():

      home_team = game['team_home']
      away_team = game['team_away']

      home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
      away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

      expected = calc_expected_res(home_elo, away_elo)
      res = 1 if random.random() < expected else 0

      if res:
        row = teams.loc[teams['Teams'] == home_team]
        sim_outcome = pd.concat([sim_outcome, row])
      else:
        row = teams.loc[teams['Teams'] == away_team]
        sim_outcome = pd.concat([sim_outcome, row])

      new_home_elo = update_rating(home_elo, away_elo, res)
      new_away_elo = update_rating(away_elo, home_elo, 1 - res)
    
      # Update model
      elo.loc[elo['teams'] == home_team, 'elo'] = int(new_home_elo)
      elo.loc[elo['teams'] == away_team, 'elo'] = int(new_away_elo)

  return pd.concat([sim_outcome, teams.head(1)]).reset_index(drop=True)

In [22]:
# Round 2 pairing order
# - 1st seed vs lowest seed
# - 2nd highest vs 2nd lowest seed
def generate_round_two(round1_results):
    order = round1_results["Seed"].sort_values().reset_index(drop=True)
    pairings = {
        "team_home_seed": [order[0], order[1]],
        "team_away_seed": [order[3], order[2]]
    }
    return generate_matchups(round1_results, pairings)

In [23]:
# Division Championship pairing order
# - Remaining 2 teams
def generate_division_champ(results):
    order = results["Seed"].sort_values().reset_index(drop=True)
    pairings = {
        "team_home_seed": [order[0]],
        "team_away_seed": [order[1]]
    }
    return generate_matchups(results, pairings)

In [24]:
# Superbowl pairing order
def generate_superbowl(results):
    matchup = pd.DataFrame({
        "team_home_seed": [str(results["Seed"].iloc[0])],
        "team_away_seed": [str(results["Seed"].iloc[1])],
        "team_home": [results["Teams"].iloc[0]],
        "team_away": [results["Teams"].iloc[1]]
    })
    return matchup

In [25]:
# Simulate the postseason
def simulate_playoffs(conf, elo_data):
  round1 = generate_round_one(conf)
  round2 = round1_simulation(round1, conf, elo_data)
  divisional = generate_round_two(round2)
  round3 = simulate_postseason_round(divisional, conf, elo_data)
  finals = generate_division_champ(round3)
  return simulate_postseason_round(finals, conf, elo_data)

# Predict the Super Bowl winner
def predict_super_bowl_winner(conf1, conf2, elo_data):
  superbowl = pd.concat([simulate_playoffs(nfc_playoff, elo_data), simulate_playoffs(afc_playoff, elo_data)])
  superbowl_matchup = generate_superbowl(superbowl)
  return simulate_postseason_round(superbowl_matchup, pd.concat([conf1, conf2]), elo_data) 

In [26]:
# Proof of concept:
# Just predict the Super Bowl winner
predict_super_bowl_winner(nfc_playoff, afc_playoff, elo_data)

Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Philadelphia Eagles,NFC,East,13.82,3.18,1


In [27]:
# Simulate the entire postseason and keep track of the number of times each team wins each round
def playoff_simulation(conf1, conf2, elo_rankings, teams, num_sim=5000):
  sim_outcome = pd.DataFrame({'Teams': teams['Teams'].unique(),
                              'Round 1': 0,
                              'Round 2': 0,
                              'Conference': 0,
                              'Superbowl': 0})
  
  for i in range(num_sim):
    elo_data = elo_rankings.copy()
    if i % 100 == 0:
      print("Simulation:", i)
    # Simulate Conference 1 Playoff bracket 
    round1_pairings = generate_round_one(conf1)
    round1_results = round1_simulation(round1_pairings, conf1, elo_data)
    for _, result in round1_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 1'] += 1

    round2_pairings = generate_round_two(round1_results)
    round2_results = simulate_postseason_round(round2_pairings, conf1, elo_data)

    for _, result in round2_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 2'] += 1

    conf_pairing = generate_division_champ(round2_results)
    conf_results = simulate_postseason_round(conf_pairing, conf1, elo_data)

    for _, result in conf_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Conference'] += 1
    
    # Simulate conference 2 Playoff bracket 
    round1_pairings_conf2 = generate_round_one(conf2)
    round1_results_conf2 = round1_simulation(round1_pairings_conf2, conf2, elo_data)
    for _, result in round1_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 1'] += 1

    round2_pairings_conf2 = generate_round_two(round1_results_conf2)
    round2_results_conf2 = simulate_postseason_round(round2_pairings_conf2, conf2, elo_data)

    for _, result in round2_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 2'] += 1

    conf_pairing_conf2 = generate_division_champ(round2_results_conf2)
    conf_results_conf2 = simulate_postseason_round(conf_pairing_conf2, conf2, elo_data)

    for _, result in conf_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Conference'] += 1

    # Simulate superbowl
    superbowl_teams = pd.concat([conf_results, conf_results_conf2])
    superbowl_pairing = generate_superbowl(superbowl_teams)
    superbowl_result = simulate_postseason_round(superbowl_pairing, pd.concat([conf1, conf2]), elo_data) 

    for _, result in superbowl_result.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Superbowl'] += 1
  
  sim_outcome['Round 1'] /= num_sim
  sim_outcome['Round 2'] /= num_sim
  sim_outcome['Conference'] /= num_sim
  sim_outcome['Superbowl'] /= num_sim
  return sim_outcome

In [28]:
playoff_sim = playoff_simulation(nfc_playoff, afc_playoff, elo_data, nfl_playoff, num_sim=500)

Simulation: 0
Simulation: 100
Simulation: 200
Simulation: 300
Simulation: 400


In [29]:
# Playoff simulation results from predicted postseason teams
# Teams with P(Round 1) = 1 had a bye week because they are the 1st seed in their conference
display(playoff_sim.sort_values(by='Superbowl', ascending=False))

Unnamed: 0,Teams,Round 1,Round 2,Conference,Superbowl
0,Philadelphia Eagles,1.0,0.66,0.384,0.25
1,San Francisco 49ers,0.692,0.412,0.204,0.13
10,Kansas City Chiefs,0.644,0.378,0.222,0.13
9,Baltimore Ravens,0.596,0.32,0.174,0.084
4,Dallas Cowboys,0.73,0.344,0.174,0.078
8,Miami Dolphins,0.586,0.252,0.138,0.07
2,Detroit Lions,0.594,0.256,0.116,0.066
7,Jacksonville Jaguars,1.0,0.492,0.238,0.064
13,Indianapolis Colts,0.414,0.196,0.082,0.034
12,Pittsburgh Steelers,0.404,0.194,0.078,0.028


In [34]:
# Simulate the entire rest of season and post season. 
# Keep track of the number of times each team wins each round
def season_simulation(games, elo_ratings, start_week, num_sim=1000):
    global nfc_df
    global afc_df
    # Regular season win/loss simulation results
    sim_outcome = pd.DataFrame({'Teams': elo_ratings['teams'].unique(),
                                'Wins': 0,
                                'Loss': 0})

    postseason_probs = pd.DataFrame({'Teams': elo_ratings['teams'].unique(),
                                  'Round 1': 0,
                                  'Round 2': 0,
                                  'Conference': 0,
                                  'Superbowl': 0})

    # Set type as float for probability columns
    postseason_probs['Round 1'] = postseason_probs['Round 1'].astype(float)
    postseason_probs['Round 2'] = postseason_probs['Round 2'].astype(float)
    postseason_probs['Conference'] = postseason_probs['Conference'].astype(float)
    postseason_probs['Superbowl'] = postseason_probs['Superbowl'].astype(float)

    for week in range(1, start_week):
        game_week = games[games['week'] == week]
        for i, game in game_week.iterrows():
            home_team = game['home_team']
            away_team = game['away_team']
            result_bool = game['result_home']
            res = 1 if result_bool else 0
            sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Wins'] += res
            sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Loss'] += (1 - res)
            sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Wins'] += (1 - res)
            sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Loss'] += res

            home_elo = elo_ratings.loc[elo_ratings['teams'] == home_team, 'elo'].iloc[0]
            away_elo = elo_ratings.loc[elo_ratings['teams'] == away_team, 'elo'].iloc[0]

            new_home_elo = update_rating(home_elo, away_elo, res, k=40)
            new_away_elo = update_rating(away_elo, home_elo, 1 - res, k=40)

            elo_ratings.loc[elo_ratings['teams'] == home_team, 'elo'] = int(new_home_elo)
            elo_ratings.loc[elo_ratings['teams'] == away_team, 'elo'] = int(new_away_elo)

    for sim in range(num_sim):
        if sim % 10 == 0:
            print("Simulation:", sim)
        elo = elo_ratings.copy()
        sim_data = sim_outcome.copy()
        for week_index in range(start_week, 19):
            curr = games[games['week'] == week_index]
            for i, game in curr.iterrows():
                home_team = game['home_team']
                away_team = game['away_team']

                home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
                away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

                # Prediction here
                expected = calc_expected_res(home_elo, away_elo)
                res = 1 if random.random() < expected else 0

                sim_data.loc[sim_data['Teams'] == home_team, 'Wins'] += res
                sim_data.loc[sim_data['Teams'] == home_team, 'Loss'] += (1 - res)
                sim_data.loc[sim_data['Teams'] == away_team, 'Wins'] += (1 - res)
                sim_data.loc[sim_data['Teams'] == away_team, 'Loss'] += res
    
                # Update model
                new_home_elo = update_rating(home_elo, away_elo, res)
                new_away_elo = update_rating(away_elo, home_elo, 1 - res)

                elo.loc[elo['teams'] == home_team, 'elo'] = int(new_home_elo)
                elo.loc[elo['teams'] == away_team, 'elo'] = int(new_away_elo)

        combined_teams_df = pd.concat([afc_df, nfc_df])
        afc_df_merge = pd.merge(afc_df, sim_data, on='Teams')
        nfc_df_merge = pd.merge(nfc_df, sim_data, on='Teams')

        afc_playoff = get_playoff_df(afc_df_merge)
        nfc_playoff = get_playoff_df(nfc_df_merge)

        # Based on the playoff teams determined from above, simulate the entire playoffs 100 times
        playoff_sim = playoff_simulation(nfc_playoff, afc_playoff, elo, combined_teams_df, num_sim=101)
        for _, row in playoff_sim.iterrows():
            if row['Teams'] in postseason_probs['Teams'].values:
                postseason_probs.loc[postseason_probs['Teams'] == row['Teams'], 'Round 1'] += row['Round 1']
                postseason_probs.loc[postseason_probs['Teams'] == row['Teams'], 'Round 2'] += row['Round 2']
                postseason_probs.loc[postseason_probs['Teams'] == row['Teams'], 'Conference'] += row['Conference']
                postseason_probs.loc[postseason_probs['Teams'] == row['Teams'], 'Superbowl'] += row['Superbowl']

    postseason_probs['Round 1'] /= num_sim
    postseason_probs['Round 2'] /= num_sim
    postseason_probs['Conference'] /= num_sim
    postseason_probs['Superbowl'] /= num_sim
    return postseason_probs

In [35]:
elo_data = calc_elo_rankings(nfl_2020_2022)
nfl_2023 = pd.read_csv('../data/nfl_scraped_2023.csv')
playoff_data = season_simulation(nfl_2023, elo_data, start_week=14, num_sim=10)

Simulation: 0
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100
Simulation: 0
Simulation: 100


In [36]:
# Playoff probabilites from simulating the rest of regular season and postseason
display(playoff_data.sort_values(by="Superbowl", ascending=False).head(16))

Unnamed: 0,Teams,Round 1,Round 2,Conference,Superbowl
26,Philadelphia Eagles,0.89802,0.574257,0.342574,0.215842
12,San Francisco 49ers,0.675248,0.40297,0.223762,0.124752
19,Dallas Cowboys,0.809901,0.449505,0.232673,0.119802
0,Kansas City Chiefs,0.645545,0.375248,0.224752,0.116832
24,Miami Dolphins,0.848515,0.469307,0.238614,0.113861
2,Baltimore Ravens,0.717822,0.383168,0.20495,0.10099
7,Jacksonville Jaguars,0.581188,0.263366,0.122772,0.049505
27,Pittsburgh Steelers,0.273267,0.115842,0.061386,0.026733
6,Detroit Lions,0.528713,0.226733,0.074257,0.026733
16,Cleveland Browns,0.353465,0.155446,0.057426,0.019802
