In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
file_path = '../data/nfl_scores.csv'

data = pd.read_csv(file_path)
nfl_df = pd.DataFrame(data)

In [3]:
def calc_expected_res(team_rating, opp_rating):
  return 1 / (1 + 10 ** ((opp_rating - team_rating) / 400))

In [4]:
# Given home and away team update elo
def update_rating(team_rating, opp_rating, res, k=20):
  expected = calc_expected_res(team_rating, opp_rating)
  return team_rating + k * (res - expected)

In [16]:
def calc_elo_rankings(games):
  temp = pd.DataFrame({'teams': games['team_home'].unique(),
                       'elo': 1500})

  for i, game in games.iterrows():
    home_team = game['team_home']
    away_team = game['team_away']

    score_home = game['score_home']
    score_away = game['score_away']

    home_elo = temp.loc[temp['teams'] == home_team, 'elo'].iloc[0]
    away_elo = temp.loc[temp['teams'] == away_team, 'elo'].iloc[0]

    res = 1 if score_home > score_away else 0
    new_home_elo = update_rating(home_elo, away_elo, res)
    new_away_elo = update_rating(away_elo, home_elo, 1 - res)

    temp.loc[temp['teams'] == home_team, 'elo'] = new_home_elo
    temp.loc[temp['teams'] == away_team, 'elo'] = new_away_elo

  return temp
  

In [112]:
team_names = {'Washington Redskins': 'Washington Commanders', 
              'Washington Football Team': 'Washington Commanders', 
              'Oakland Raiders': 'Las Vegas Raiders',
              'Houston Oilers': 'Tennessee Titans',
              'Tennessee Oilers': 'Tennessee Titans',
              'St. Louis Rams': 'Los Angeles Rams',
              'St. Louis Cardinals': 'Arizona Cardinals',
              'Phoenix Cardinals': 'Arizona Cardinals',
              'San Diego Chargers': 'Los Angeles Chargers',
              'Baltimore Colts': 'Baltimore Ravens',
              'Boston Patriots': 'New England Patriots',
              'Los Angeles Raiders': 'Las Vegas Raiders'}

nfl_df['team_home'] = nfl_df['team_home'].replace(team_names)
nfl_df['team_away'] = nfl_df['team_away'].replace(team_names)

In [114]:
nfl_df["result_home"] = nfl_df["score_home"] - nfl_df["score_away"]
nfl_2016 = nfl_df[(nfl_df['schedule_season'] >= 2016) & (nfl_df['schedule_season'] < 2022)]
display(nfl_2016['team_home'].unique())
print(len(nfl_2016['team_home'].unique()))

array(['Denver Broncos', 'Arizona Cardinals', 'Atlanta Falcons',
       'Baltimore Ravens', 'Dallas Cowboys', 'Houston Texans',
       'Indianapolis Colts', 'Jacksonville Jaguars', 'Kansas City Chiefs',
       'New Orleans Saints', 'New York Jets', 'Philadelphia Eagles',
       'Seattle Seahawks', 'Tennessee Titans', 'San Francisco 49ers',
       'Washington Commanders', 'Buffalo Bills', 'Carolina Panthers',
       'Cleveland Browns', 'Detroit Lions', 'Los Angeles Rams',
       'Minnesota Vikings', 'New England Patriots', 'New York Giants',
       'Las Vegas Raiders', 'Pittsburgh Steelers', 'Los Angeles Chargers',
       'Chicago Bears', 'Cincinnati Bengals', 'Green Bay Packers',
       'Miami Dolphins', 'Tampa Bay Buccaneers'], dtype=object)

32


In [18]:
def season_simulation(games, elo_ratings, num_sim=100):
    sim_outcome = pd.DataFrame({'Teams': elo_ratings['teams'].unique(),
                              'Wins': 0,
                              'Loss': 0})

    start_week = 1
    for sim in range(num_sim):
        if sim % 10 == 0:
            print("Simulation:", sim)
        elo = elo_ratings.copy()
        for week_index in range(start_week, 19):
            curr = games[games['schedule_week'] == str(week_index)]
            for i, game in curr.iterrows():
                home_team = game['team_home']
                away_team = game['team_away']

                home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
                away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

                # Prediction here
                expected = calc_expected_res(home_elo, away_elo)
                res = 1 if random.random() < expected else 0

                sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Wins'] += res
                sim_outcome.loc[sim_outcome['Teams'] == home_team, 'Loss'] += (1 - res)
                sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Wins'] += (1 - res)
                sim_outcome.loc[sim_outcome['Teams'] == away_team, 'Loss'] += res
    
                # Update model
                new_home_elo = update_rating(home_elo, away_elo, res)
                new_away_elo = update_rating(away_elo, home_elo, 1 - res)

                elo.loc[elo['teams'] == home_team, 'elo'] = new_home_elo
                elo.loc[elo['teams'] == away_team, 'elo'] = new_away_elo


    sim_outcome['Wins'] /= num_sim
    sim_outcome['Loss'] /= num_sim
    return sim_outcome

In [115]:
nfl_2022 = nfl_df[(nfl_df['schedule_season'] == 2022) & (nfl_df['schedule_playoff'] == False)]
elo_data = calc_elo_rankings(nfl_2016)
display(elo_data.sort_values(by='elo', ascending=False))
pred = season_simulation(nfl_2022, elo_data)

  temp.loc[temp['teams'] == home_team, 'elo'] = new_home_elo


Unnamed: 0,teams,elo
8,Kansas City Chiefs,1686.611548
29,Green Bay Packers,1637.972036
20,Los Angeles Rams,1632.245368
31,Tampa Bay Buccaneers,1606.431341
16,Buffalo Bills,1606.063101
9,New Orleans Saints,1602.839902
13,Tennessee Titans,1589.655252
22,New England Patriots,1567.083681
3,Baltimore Ravens,1557.614433
25,Pittsburgh Steelers,1555.923992


Simulation: 0
Simulation: 10
Simulation: 20
Simulation: 30
Simulation: 40
Simulation: 50
Simulation: 60
Simulation: 70
Simulation: 80
Simulation: 90


In [116]:
pred.sort_values(by="Wins", ascending=False)

Unnamed: 0,Teams,Wins,Loss
8,Kansas City Chiefs,12.79,4.21
29,Green Bay Packers,12.31,4.69
13,Tennessee Titans,11.16,5.84
20,Los Angeles Rams,10.91,6.09
9,New Orleans Saints,10.75,6.25
3,Baltimore Ravens,10.61,6.39
4,Dallas Cowboys,9.95,7.05
31,Tampa Bay Buccaneers,9.93,7.07
16,Buffalo Bills,9.82,6.18
22,New England Patriots,9.77,7.23


In [21]:
afc_team_data = {
    "Division": {
        "East": ["Buffalo Bills", "Miami Dolphins", "New England Patriots", "New York Jets"],
        "North": ["Baltimore Ravens", "Cincinnati Bengals", "Cleveland Browns", "Pittsburgh Steelers"],
        "South": ["Houston Texans", "Indianapolis Colts", "Jacksonville Jaguars", "Tennessee Titans"],
        "West": ["Denver Broncos", "Kansas City Chiefs", "Las Vegas Raiders", "Los Angeles Chargers"]
    }
}

afc_df = pd.DataFrame()

for division, teams in afc_team_data["Division"].items():
    for team in teams:
        afc_df = pd.concat([afc_df, pd.DataFrame({"Teams": [team], "Conference": ["AFC"], "Division": [division]})])

afc_df.reset_index(drop=True, inplace=True)

print(afc_df)


                   Teams Conference Division
0          Buffalo Bills        AFC     East
1         Miami Dolphins        AFC     East
2   New England Patriots        AFC     East
3          New York Jets        AFC     East
4       Baltimore Ravens        AFC    North
5     Cincinnati Bengals        AFC    North
6       Cleveland Browns        AFC    North
7    Pittsburgh Steelers        AFC    North
8         Houston Texans        AFC    South
9     Indianapolis Colts        AFC    South
10  Jacksonville Jaguars        AFC    South
11      Tennessee Titans        AFC    South
12        Denver Broncos        AFC     West
13    Kansas City Chiefs        AFC     West
14     Las Vegas Raiders        AFC     West
15  Los Angeles Chargers        AFC     West


In [22]:
nfc_team_data = {
    "Division": {
        "East": ["Dallas Cowboys", "New York Giants", "Philadelphia Eagles", "Washington Commanders"],
        "North": ["Chicago Bears", "Detroit Lions", "Green Bay Packers", "Minnesota Vikings"],
        "South": ["Atlanta Falcons", "Carolina Panthers", "New Orleans Saints", "Tampa Bay Buccaneers"],
        "West": ["Arizona Cardinals", "Los Angeles Rams", "San Francisco 49ers", "Seattle Seahawks"]
    }
}

nfc_df = pd.DataFrame()

for division, teams in nfc_team_data["Division"].items():
    for team in teams:
        nfc_df = pd.concat([nfc_df, pd.DataFrame({"Teams": [team], "Conference": ["NFC"], "Division": [division]})])

nfc_df.reset_index(drop=True, inplace=True)
display(nfc_df)

Unnamed: 0,Teams,Conference,Division
0,Dallas Cowboys,NFC,East
1,New York Giants,NFC,East
2,Philadelphia Eagles,NFC,East
3,Washington Commanders,NFC,East
4,Chicago Bears,NFC,North
5,Detroit Lions,NFC,North
6,Green Bay Packers,NFC,North
7,Minnesota Vikings,NFC,North
8,Atlanta Falcons,NFC,South
9,Carolina Panthers,NFC,South


In [23]:
combined_teams_df = pd.concat([afc_df, nfc_df])
afc_df = pd.merge(afc_df, pred, on='Teams')
nfc_df = pd.merge(nfc_df, pred, on='Teams')
nfc_df

Unnamed: 0,Teams,Conference,Division,Wins,Loss
0,Dallas Cowboys,NFC,East,9.58,7.42
1,New York Giants,NFC,East,5.78,11.22
2,Philadelphia Eagles,NFC,East,7.76,9.24
3,Washington Commanders,NFC,East,7.8,9.2
4,Chicago Bears,NFC,North,8.15,8.85
5,Detroit Lions,NFC,North,6.32,10.68
6,Green Bay Packers,NFC,North,11.6,5.4
7,Minnesota Vikings,NFC,North,8.24,8.76
8,Atlanta Falcons,NFC,South,5.98,11.02
9,Carolina Panthers,NFC,South,5.77,11.23


In [24]:
def get_playoff_df(conf_record):
  # Select best record in each division
  division_champs = conf_record.sort_values(by=['Wins', 'Loss'], ascending=[False, True]) \
                               .groupby('Division').head(1)
  division_champs["Seed"] = [1,2,3,4]
  # Select 3 best remaining records as wildcard
  wild_cards = conf_record.sort_values(by=['Wins', 'Loss'], ascending=[False, True]) \
                          .groupby('Division').tail(3).head(3)
  wild_cards["Seed"] = [5,6,7]
  return pd.concat([division_champs, wild_cards]).reset_index(drop=True)

In [25]:
afc_playoff = get_playoff_df(afc_df)
nfc_playoff = get_playoff_df(nfc_df)

In [26]:
afc_playoff

Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Kansas City Chiefs,AFC,West,11.71,5.29,1
1,Tennessee Titans,AFC,South,10.92,6.08,2
2,Buffalo Bills,AFC,East,10.09,5.91,3
3,Baltimore Ravens,AFC,North,9.7,7.3,4
4,Indianapolis Colts,AFC,South,9.92,7.08,5
5,Pittsburgh Steelers,AFC,North,9.38,7.62,6
6,Miami Dolphins,AFC,East,9.13,7.87,7


In [27]:
nfc_playoff

Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Green Bay Packers,NFC,North,11.6,5.4,1
1,Tampa Bay Buccaneers,NFC,South,10.82,6.18,2
2,Los Angeles Rams,NFC,West,10.41,6.59,3
3,Dallas Cowboys,NFC,East,9.58,7.42,4
4,New Orleans Saints,NFC,South,10.12,6.88,5
5,San Francisco 49ers,NFC,West,9.46,7.54,6
6,Seattle Seahawks,NFC,West,8.93,8.07,7


In [79]:
nfl_playoff = pd.concat([nfc_playoff, afc_playoff])
nfl_playoff

Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Green Bay Packers,NFC,North,11.6,5.4,1
1,Tampa Bay Buccaneers,NFC,South,10.82,6.18,2
2,Los Angeles Rams,NFC,West,10.41,6.59,3
3,Dallas Cowboys,NFC,East,9.58,7.42,4
4,New Orleans Saints,NFC,South,10.12,6.88,5
5,San Francisco 49ers,NFC,West,9.46,7.54,6
6,Seattle Seahawks,NFC,West,8.93,8.07,7
0,Kansas City Chiefs,AFC,West,11.71,5.29,1
1,Tennessee Titans,AFC,South,10.92,6.08,2
2,Buffalo Bills,AFC,East,10.09,5.91,3


In [28]:
def generate_matchups(playoff_teams, pairings):
  matchups = pd.DataFrame(pairings)
  matchups = matchups.merge(playoff_teams, left_on="team_home_seed", right_on="Seed", suffixes=('_home', '_away')) \
                     .merge(playoff_teams, left_on="team_away_seed", right_on="Seed", suffixes=('_home', '_away'))

  matchups = matchups[['team_home_seed', 'team_away_seed', 'Teams_home', 'Teams_away']]
  matchups.columns = ['team_home_seed', 'team_away_seed', 'team_home', 'team_away']
  return matchups

In [29]:
def generate_round_one(playoff_teams):
  pairings = {
      "team_home_seed": [2, 3, 4],
      "team_away_seed": [7, 6, 5]
  }
  return generate_matchups(playoff_teams, pairings)

In [30]:
def simulate_postseason_round(curr_round, teams, elo):
  sim_outcome = pd.DataFrame()

  for _, game in curr_round.iterrows():
      home_team = game['team_home']
      away_team = game['team_away']

      home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
      away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

      # Prediction here
      expected = calc_expected_res(home_elo, away_elo)
      res = 1 if random.random() < expected else 0
      if res:
        row = teams.loc[teams['Teams'] == home_team]
        sim_outcome = pd.concat([sim_outcome, row])
      else:
        row = teams.loc[teams['Teams'] == away_team]
        sim_outcome = pd.concat([sim_outcome, row])

      new_home_elo = update_rating(home_elo, away_elo, res)
      new_away_elo = update_rating(away_elo, home_elo, 1 - res)
    
      # Update model
      elo.loc[elo['teams'] == home_team, 'elo'] = new_home_elo
      elo.loc[elo['teams'] == away_team, 'elo'] = new_away_elo

  return pd.concat([sim_outcome]).reset_index(drop=True)

In [31]:
def round1_simulation(wildcard, teams, elo):
  sim_outcome = pd.DataFrame()

  for _, game in wildcard.iterrows():
      home_team = game['team_home']
      away_team = game['team_away']

      home_elo = elo.loc[elo['teams'] == home_team, 'elo'].iloc[0]
      away_elo = elo.loc[elo['teams'] == away_team, 'elo'].iloc[0]

      # Prediction here
      expected = calc_expected_res(home_elo, away_elo)
      res = 1 if random.random() < expected else 0
      if res:
        row = teams.loc[teams['Teams'] == home_team]
        sim_outcome = pd.concat([sim_outcome, row])
      else:
        row = teams.loc[teams['Teams'] == away_team]
        sim_outcome = pd.concat([sim_outcome, row])

      new_home_elo = update_rating(home_elo, away_elo, res)
      new_away_elo = update_rating(away_elo, home_elo, 1 - res)
    
      # Update model
      elo.loc[elo['teams'] == home_team, 'elo'] = new_home_elo
      elo.loc[elo['teams'] == away_team, 'elo'] = new_away_elo

  return pd.concat([sim_outcome, teams.head(1)]).reset_index(drop=True)

In [32]:
def generate_round_two(round1_results):
    order = round1_results["Seed"].sort_values().reset_index(drop=True)
    pairings = {
        "team_home_seed": [order[0], order[1]],
        "team_away_seed": [order[3], order[2]]
    }
    return generate_matchups(round1_results, pairings)

In [33]:
def generate_division_champ(results):
    order = results["Seed"].sort_values().reset_index(drop=True)
    pairings = {
        "team_home_seed": [order[0]],
        "team_away_seed": [order[1]]
    }
    return generate_matchups(results, pairings)

In [34]:
def generate_superbowl(results):
    matchup = pd.DataFrame({
        "team_home_seed": [str(results["Seed"].iloc[0])],
        "team_away_seed": [str(results["Seed"].iloc[1])],
        "team_home": [results["Teams"].iloc[0]],
        "team_away": [results["Teams"].iloc[1]]
    })
    return matchup

In [35]:
def simulate_playoffs(conf, elo_data):
  round1 = generate_round_one(conf)
  round2 = round1_simulation(round1, conf, elo_data)
  divisional = generate_round_two(round2)
  round3 = simulate_postseason_round(divisional, conf, elo_data)
  finals = generate_division_champ(round3)
  return simulate_postseason_round(finals, conf, elo_data)

def predict_super_bowl_winner(conf1, conf2, elo_data):
  superbowl = pd.concat([simulate_playoffs(nfc_playoff, elo_data), simulate_playoffs(afc_playoff, elo_data)])
  superbowl_matchup = generate_superbowl(superbowl)
  return simulate_postseason_round(superbowl_matchup, pd.concat([conf1, conf2]), elo_data) 

In [36]:
round1 = generate_round_one(nfc_playoff)
round1_results = round1_simulation(round1, nfc_playoff, elo_data)
print(round1)
print(round1_results)
predict_super_bowl_winner(nfc_playoff, afc_playoff, elo_data)

   team_home_seed  team_away_seed             team_home            team_away
0               2               7  Tampa Bay Buccaneers     Seattle Seahawks
1               3               6      Los Angeles Rams  San Francisco 49ers
2               4               5        Dallas Cowboys   New Orleans Saints
                 Teams Conference Division   Wins  Loss  Seed
0     Seattle Seahawks        NFC     West   8.93  8.07     7
1  San Francisco 49ers        NFC     West   9.46  7.54     6
2       Dallas Cowboys        NFC     East   9.58  7.42     4
3    Green Bay Packers        NFC    North  11.60  5.40     1


Unnamed: 0,Teams,Conference,Division,Wins,Loss,Seed
0,Indianapolis Colts,AFC,South,9.92,7.08,5


In [118]:
def playoff_simulation(conf1, conf2, elo_rankings, teams, num_sim=5000):
  sim_outcome = pd.DataFrame({'Teams': teams['Teams'].unique(),
                              'Round 1': 0,
                              'Round 2': 0,
                              'Conference': 0,
                              'Superbowl': 0})
  
  for i in range(num_sim):
    elo_data = elo_rankings.copy()
    if i % 100 == 0:
      print("Simulation:", i)
    # Simulate Conf1 Playoff bracket 
    round1_pairings = generate_round_one(conf1)
    round1_results = round1_simulation(round1_pairings, conf1, elo_data)
    for _, result in round1_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 1'] += 1

    round2_pairings = generate_round_two(round1_results)
    round2_results = simulate_postseason_round(round2_pairings, conf1, elo_data)

    for _, result in round2_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 2'] += 1

    conf_pairing = generate_division_champ(round2_results)
    conf_results = simulate_postseason_round(conf_pairing, conf1, elo_data)

    for _, result in conf_results.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Conference'] += 1
    
    # Simulate Conf2 Playoff bracket 
    round1_pairings_conf2 = generate_round_one(conf2)
    round1_results_conf2 = round1_simulation(round1_pairings_conf2, conf2, elo_data)
    for _, result in round1_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 1'] += 1

    round2_pairings_conf2 = generate_round_two(round1_results_conf2)
    round2_results_conf2 = simulate_postseason_round(round2_pairings_conf2, conf2, elo_data)

    for _, result in round2_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Round 2'] += 1

    conf_pairing_conf2 = generate_division_champ(round2_results_conf2)
    conf_results_conf2 = simulate_postseason_round(conf_pairing_conf2, conf2, elo_data)

    for _, result in conf_results_conf2.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Conference'] += 1

    # Simulate superbowl
    superbowl_teams = pd.concat([conf_results, conf_results_conf2])
    superbowl_pairing = generate_superbowl(superbowl_teams)
    superbowl_result = simulate_postseason_round(superbowl_pairing, pd.concat([conf1, conf2]), elo_data) 

    for _, result in superbowl_result.iterrows():
      sim_outcome.loc[result['Teams'] == sim_outcome['Teams'], 'Superbowl'] += 1
  
  sim_outcome['Round 1'] /= num_sim
  sim_outcome['Round 2'] /= num_sim
  sim_outcome['Conference'] /= num_sim
  sim_outcome['Superbowl'] /= num_sim
  return sim_outcome

# display(elo_data.sort_values(by='elo', ascending=False))
playoff_sim = playoff_simulation(nfc_playoff, afc_playoff, elo_data, nfl_playoff)
display(playoff_sim.sort_values(by='Superbowl', ascending=False))

Simulation: 0
Simulation: 100
Simulation: 200
Simulation: 300
Simulation: 400
Simulation: 500
Simulation: 600
Simulation: 700
Simulation: 800
Simulation: 900
Simulation: 1000
Simulation: 1100
Simulation: 1200
Simulation: 1300
Simulation: 1400
Simulation: 1500
Simulation: 1600
Simulation: 1700
Simulation: 1800
Simulation: 1900
Simulation: 2000
Simulation: 2100
Simulation: 2200
Simulation: 2300
Simulation: 2400
Simulation: 2500
Simulation: 2600
Simulation: 2700
Simulation: 2800
Simulation: 2900
Simulation: 3000
Simulation: 3100
Simulation: 3200
Simulation: 3300
Simulation: 3400
Simulation: 3500
Simulation: 3600
Simulation: 3700
Simulation: 3800
Simulation: 3900
Simulation: 4000
Simulation: 4100
Simulation: 4200
Simulation: 4300
Simulation: 4400
Simulation: 4500
Simulation: 4600
Simulation: 4700
Simulation: 4800
Simulation: 4900


Unnamed: 0,Teams,Round 1,Round 2,Conference,Superbowl
7,Kansas City Chiefs,1.0,0.684,0.4284,0.2526
0,Green Bay Packers,1.0,0.6032,0.3236,0.169
2,Los Angeles Rams,0.6262,0.3468,0.191,0.0974
1,Tampa Bay Buccaneers,0.6006,0.291,0.1474,0.0774
9,Buffalo Bills,0.572,0.3162,0.1542,0.0744
4,New Orleans Saints,0.595,0.292,0.145,0.0708
8,Tennessee Titans,0.615,0.3162,0.1352,0.0616
10,Baltimore Ravens,0.5402,0.221,0.0922,0.039
12,Pittsburgh Steelers,0.428,0.181,0.0774,0.0326
3,Dallas Cowboys,0.405,0.1636,0.0674,0.0296


In [38]:
nfl_df

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,result_home
0,9/2/1966,1966,1,False,Miami Dolphins,14.0,23.0,Las Vegas Raiders,,,,Orange Bowl,False,83.0,6.0,71.0,,-9.0
1,9/3/1966,1966,1,False,Houston Oilers,45.0,7.0,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70.0,,38.0
2,9/4/1966,1966,1,False,San Diego Chargers,27.0,7.0,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82.0,,20.0
3,9/9/1966,1966,2,False,Miami Dolphins,14.0,19.0,New York Jets,,,,Orange Bowl,False,82.0,11.0,78.0,,-5.0
4,9/10/1966,1966,1,False,Green Bay Packers,24.0,3.0,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62.0,,21.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13783,1/7/2024,2023,18,False,New Orleans Saints,,,Atlanta Falcons,,,,Caesars Superdome,False,72.0,0.0,,indoor,
13784,1/7/2024,2023,18,False,New York Giants,,,Philadelphia Eagles,,,,MetLife Stadium,False,,,,,
13785,1/7/2024,2023,18,False,San Francisco 49ers,,,Los Angeles Rams,,,,Levi's Stadium,False,,,,,
13786,1/7/2024,2023,18,False,Tennessee Titans,,,Jacksonville Jaguars,,,,Nissan Stadium,False,,,,,


In [39]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

In [40]:
nfl_data = nfl_2018.dropna(axis=1)
nfl_data = nfl_data.drop('schedule_date', axis=1)
nfl_data

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,result_home
12411,2019,1,False,Chicago Bears,3.0,10.0,Green Bay Packers,CHI,-3.0,46.5,Soldier Field,False,-7.0
12412,2019,1,False,Arizona Cardinals,27.0,27.0,Detroit Lions,DET,-3.0,45.5,University of Phoenix Stadium,False,0.0
12413,2019,1,False,Carolina Panthers,27.0,30.0,Los Angeles Rams,LAR,-2.0,49.5,Bank of America Stadium,False,-3.0
12414,2019,1,False,Cleveland Browns,13.0,43.0,Tennessee Titans,CLE,-5.5,44,FirstEnergy Stadium,False,-30.0
12415,2019,1,False,Dallas Cowboys,35.0,17.0,New York Giants,DAL,-7.0,44,AT&T Stadium,False,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13227,2021,Division,True,Kansas City Chiefs,42.0,36.0,Buffalo Bills,KC,-2.5,54,Arrowhead Stadium,False,6.0
13228,2021,Division,True,Tampa Bay Buccaneers,27.0,30.0,Los Angeles Rams,TB,-3.0,48,Raymond James Stadium,False,-3.0
13229,2021,Conference,True,Kansas City Chiefs,24.0,27.0,Cincinnati Bengals,KC,-7.0,54.5,Arrowhead Stadium,False,-3.0
13230,2021,Conference,True,Los Angeles Rams,20.0,17.0,San Francisco 49ers,LAR,-3.5,46,SoFi Stadium,False,3.0


In [41]:
X = nfl_data.drop('result_home', axis=1)
y = nfl_data['result_home']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [42]:
nfl_labels = nfl_data["result_home"].tolist()
nfl_features = nfl_data._get_numeric_data().columns.values.tolist()[1:-1]
nfl_feature_data = nfl_data[nfl_features]
X_train, X_test, y_train, y_test = train_test_split(nfl_feature_data, nfl_labels, test_size=0.3)
nfl_features

['schedule_playoff',
 'score_home',
 'score_away',
 'spread_favorite',
 'stadium_neutral']

In [43]:
params = {
    "n_estimators": 500,
    "learning_rate": 0.01,
    "loss": "log_loss",
}

In [44]:
gb_model = GradientBoostingClassifier(**params)

# Train the Gradient Boosting model
gb_model.fit(X_train, y_train)

# Make predictions on the test set
gb_predictions = gb_model.predict(X_test)

# Evaluate the Gradient Boosting model accuracy
gb_accuracy = accuracy_score(y_test, gb_predictions)
print(f'Gradient Boosting Model Accuracy: {gb_accuracy}')

Gradient Boosting Model Accuracy: 0.2793522267206478


In [45]:
rf_model = RandomForestClassifier(n_estimators=1000)

rf_model.fit(X_train, y_train)

rf_predictions = rf_model.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f'Random Forest Model Accuracy: {rf_accuracy}')

Random Forest Model Accuracy: 0.2591093117408907
