In [1]:
# Import packages

import pandas as pd 
import numpy as np 
import os
from bs4 import BeautifulSoup
import time
import requests
import nfl_data_py as nfl
import matplotlib.pyplot as plt
import plotly
import plotly.express as px

# Set certain settings for the notebook
pd.set_option("display.max_columns", None)

In [2]:
# Check current directory
os.getcwd()

'C:\\Users\\imacd_0odruq3\\Documents\\sports_gambling\\sports_gambling_algorithm'

In [3]:
# Use nfl_data_py package to get data
# Only need data from the current season and possibly previous season for predictions

current_season = 2022
previous_season = current_season-1
previous_previous_season = current_season - 2

nfl_df = nfl.import_pbp_data([previous_previous_season, previous_season, current_season])

2020 done.
2021 done.
2022 done.
Downcasting floats.


In [4]:
# Add column detailing if there was a qb designed run

rosters = nfl.import_rosters([previous_season, current_season])
qb_roster = rosters[rosters.position == 'QB']
qb_roster = qb_roster[['position', 'player_name', 'player_id']].drop_duplicates()
qb_roster['player_id_string'] = qb_roster.apply(lambda x: str(x.player_id), axis=1)

def is_designed_qb_run(play, qb_roster):
    if play.rusher_id:
        if qb_roster.player_id_string.str.contains(str(play.rusher_id)).any():
            return 1 
    else: 
        return 0 
    
nfl_df['qb_designed_run'] = nfl_df.apply(lambda x: is_designed_qb_run(x, qb_roster), axis=1)

In [5]:
# Get important offensive NFL stats per game per team

# Get only offensive plays, take out qb kneels

offense = nfl_df[nfl_df.play_type.isin(['run', 'pass'])]
offense = offense[offense.qb_kneel == 0]
# offense

In [6]:
# Get rushing stats

# Total rushes, total rush yards, rushing epa

rushes = offense[offense.play_type == 'run']
rushing_grouped = rushes.groupby(by=['season', 'week', 'posteam'])
rush_df = rushing_grouped.count()['play_id'].rename('total_rushes').to_frame()
rush_df[['total_rush_yards', 'rushing_epa', 'rush_tds']] = rushing_grouped.sum()[['yards_gained', 'epa', 'rush_touchdown']]

# rush_df

In [7]:
# Get passing stats

# Total pass attempts, total pass yards, passing epa

passing = offense[offense.play_type == 'pass']
passing_grouped = passing.groupby(by=['season', 'week', 'posteam'])
passing_grouped_no_sacks = passing[passing.sack == 0].groupby(by=['season', 'week', 'posteam'])
pass_df = passing_grouped_no_sacks.count()['play_id'].rename('total_pass_attempts').to_frame()
pass_df['total_passing_yards'] = passing_grouped_no_sacks.sum()['yards_gained']
pass_df[['completions', 'passing_epa', 'pass_tds']] = passing_grouped.sum()[['complete_pass', 'epa', 'pass_touchdown']]
pass_df['avg_cpoe'] = passing_grouped_no_sacks.mean()['cpoe'].rename('avg_cpoe').to_frame()

# pass_df

In [8]:
# Get other offensive stats

# Get qb epa 
qb_stats = offense[(offense.play_type == 'pass') | (offense.qb_scramble == 1) | (offense.qb_designed_run == 1)]
qb_epa_df = qb_stats.groupby(by = ['season', 'week', 'posteam']).sum()['epa'].rename('qb_epa').to_frame()

# Get sacks, interceptions, fumbles, fumbles lost, turnovers
turnovers_df = offense.groupby(by = ['season', 'week', 'posteam']).sum()[['sack', 'fumble', 'fumble_lost', 'interception']]
turnovers_df.columns = ['sacks_allowed', 'fumbles', 'lost_fumbles', 'interceptions_thrown']
# turnovers_df

In [9]:
# Get important defensive NFL stats per game per team

# Get only defensive plays, take out qb kneels

defense = nfl_df[nfl_df.play_type.isin(['run', 'pass'])]
defense = defense[defense.qb_kneel == 0]
defense

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,nflverse_game_id,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,qb_designed_run
2,54.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,SF,75.0,2020-09-13,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,15:00,SF 25,10.0,41.0,(15:00) (Shotgun) 10-J.Garoppolo pass short ri...,pass,5.0,1.0,0.0,1.0,0.0,0.0,0.0,short,right,4.0,1.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004661,0.144037,0.002072,0.226051,0.212601,0.003828,0.406750,0.0,0.0,1.474098,1.294838,1.294838,-1.294838,0.000000,0.000000,1.294838,-1.294838,-0.132787,1.427625,-0.132787,1.427625,-0.132787,0.132787,1.427625,-1.427625,-0.132787,0.132787,1.427625,-1.427625,0.546262,0.453738,0.546262,0.453738,0.033715,-2.336264e-03,-2.336264e-03,0.579976,0.420024,0.737399,0.737399,0.000000,0.000000,0.033715,-0.033715,0.0,0.033715,0.0,0.033715,0.000000,0.000000,0.033715,-0.033715,0.000000,0.000000,0.033715,-0.033715,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,00-0031345,J.Garoppolo,5.0,00-0033288,G.Kittle,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,,00-0036356,,I.Simmons,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,ARI,00-0036356,I.Simmons,15.0,0.0,,Horse Collar Tackle,0.0,0.0,0.0,0.0,,,2020,0.782279,21.772110,1.0,1.0,First down,54.0,16:25:00,20:26:28,Levi's Stadium,"Hazy Temp: 66° F, Humidity: 68%, Wind: NNW 6 mph",10160000-0581-80a3-3e67-fadbeaac892d,10,0.0,PASS,0.0,,,SF 45,1.0,Field goal,,6.0,3:10,2.0,0.0,1.0,1.0,1.0,15.0,KICKOFF,FIELD_GOAL,15:00,11:50,SF 25,ARI 34,39.0,197.0,24,20,Home,-4,44,7.0,48.5,1,outdoors,grass,66.0,6.0,Kyle Shanahan,Kliff Kingsbury,SFO01,Levi's Stadium,0.0,1.0,J.Garoppolo,10.0,,,G.Kittle,85.0,1.0,0.0,1.0,0.0,1.0,00-0031345,,00-0033288,J.Garoppolo,10.0,00-0031345,G.Kittle,00-0033288,G.Kittle,00-0033288,0.0,1.0,1.294838,0.503370,4.275047,2.0,0.619306,0.239695,0.515058,48.494156,2020_01_ARI_SF,SF,SHOTGUN,"2 RB, 1 TE, 2 WR",7.0,"2 DL, 5 LB, 4 DB",4.0,40078;46078;45069;46113;38551;45185;42718;4781...,00-0029892;00-0034847;00-0033221;00-0034860;00...,00-0029585;00-0035236;00-0031557;00-0035705;00...,11.0,11.0,0.0
3,93.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,SF,55.0,2020-09-13,882.0,1782.0,3582.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,14:42,SF 45,10.0,41.0,(14:42) (Shotgun) 31-R.Mostert right tackle to...,run,14.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,,right,tackle,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004407,0.099809,0.001486,0.154657,0.249845,0.004835,0.484961,0.0,0.0,2.768936,0.857214,2.152052,-2.152052,0.857214,-0.857214,1.294838,-1.294838,,,0.000000,0.000000,-0.132787,0.132787,1.427625,-1.427625,-0.132787,0.132787,1.427625,-1.427625,0.579976,0.420024,0.579976,0.420024,0.024499,4.160225e-02,4.160225e-02,0.604475,0.395525,0.735063,0.735063,0.024499,-0.024499,0.033715,-0.033715,,,0.0,0.000000,0.000000,0.000000,0.033715,-0.033715,0.000000,0.000000,0.033715,-0.033715,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0031687,R.Mostert,14.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,,00-0035705,,J.Thompson,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2020,,,2.0,1.0,First down,93.0,16:25:00,20:27:19,Levi's Stadium,"Hazy Temp: 66° F, Humidity: 68%, Wind: NNW 6 mph",10160000-0581-80a3-3e67-fadbeaac892d,14,0.0,RUSH,0.0,,14:32,ARI 41,1.0,Field goal,,6.0,3:10,2.0,0.0,1.0,1.0,1.0,15.0,KICKOFF,FIELD_GOAL,15:00,11:50,SF 25,ARI 34,39.0,197.0,24,20,Home,-4,44,7.0,48.5,1,outdoors,grass,66.0,6.0,Kyle Shanahan,Kliff Kingsbury,SFO01,Levi's Stadium,0.0,1.0,,,R.Mostert,31.0,,,0.0,1.0,1.0,0.0,1.0,,00-0031687,,R.Mostert,31.0,00-0031687,R.Mostert,00-0031687,R.Mostert,00-0031687,0.0,1.0,0.857214,,,,,,0.413357,-41.335732,2020_01_ARI_SF,SF,SHOTGUN,"2 RB, 1 TE, 2 WR",7.0,"2 DL, 5 LB, 4 DB",,40078;46078;45069;46113;38551;45185;42718;4781...,00-0029892;00-0034847;00-0033221;00-0034860;00...,00-0029585;00-0035236;00-0031557;00-0035705;00...,11.0,11.0,
4,118.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,ARI,41.0,2020-09-13,839.0,1739.0,3539.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,13:59,ARI 41,10.0,41.0,(13:59) 31-R.Mostert left end to ARI 39 for 2 ...,run,2.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,left,end,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004444,0.070264,0.001132,0.101997,0.295102,0.004311,0.522751,0.0,0.0,3.626150,-0.454665,1.697387,-1.697387,0.402549,-0.402549,1.294838,-1.294838,,,0.000000,0.000000,-0.132787,0.132787,1.427625,-1.427625,-0.132787,0.132787,1.427625,-1.427625,0.604475,0.395525,0.604475,0.395525,-0.004065,3.862798e-03,3.862798e-03,0.600411,0.399589,0.776665,0.776665,0.020434,-0.020434,0.033715,-0.033715,,,0.0,0.000000,0.000000,0.000000,0.033715,-0.033715,0.000000,0.000000,0.033715,-0.033715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0031687,R.Mostert,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,,00-0036356,,I.Simmons,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2020,,,3.0,0.0,Field goal,118.0,16:25:00,20:28:02,Levi's Stadium,"Hazy Temp: 66° F, Humidity: 68%, Wind: NNW 6 mph",10160000-0581-80a3-3e67-fadbeaac892d,6,0.0,RUSH,0.0,,13:51,ARI 39,1.0,Field goal,,6.0,3:10,2.0,0.0,1.0,1.0,1.0,15.0,KICKOFF,FIELD_GOAL,15:00,11:50,SF 25,ARI 34,39.0,197.0,24,20,Home,-4,44,7.0,48.5,1,outdoors,grass,66.0,6.0,Kyle Shanahan,Kliff Kingsbury,SFO01,Levi's Stadium,0.0,0.0,,,R.Mostert,31.0,,,0.0,1.0,0.0,0.0,1.0,,00-0031687,,R.Mostert,31.0,00-0031687,R.Mostert,00-0031687,R.Mostert,00-0031687,0.0,1.0,-0.454665,,,,,,0.446920,-44.692024,2020_01_ARI_SF,SF,SINGLEBACK,"2 RB, 1 TE, 2 WR",7.0,"2 DL, 5 LB, 4 DB",,40078;46078;46113;45069;38551;45185;42718;4781...,00-0029892;00-0034847;00-0034860;00-0033221;00...,00-0029585;00-0035236;00-0031557;00-0029747;00...,11.0,11.0,
5,143.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,ARI,39.0,2020-09-13,801.0,1701.0,3501.0,Half1,0.0,1.0,0.0,1.0,2.0,0.0,13:21,ARI 39,8.0,41.0,(13:21) (Shotgun) 31-R.Mostert right end to AR...,run,-6.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,,right,end,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004862,0.080394,0.001079,0.119630,0.320622,0.004703,0.468709,0.0,0.0,3.171485,-1.780660,-0.083272,0.083272,-1.378110,1.378110,1.294838,-1.294838,,,0.000000,0.000000,-0.132787,0.132787,1.427625,-1.427625,-0.132787,0.132787,1.427625,-1.427625,0.600411,0.399589,0.600411,0.399589,-0.077705,-7.492870e-02,-7.492870e-02,0.522706,0.477294,0.780528,0.780528,-0.057271,0.057271,0.033715,-0.033715,,,0.0,0.000000,0.000000,0.000000,0.033715,-0.033715,0.000000,0.000000,0.033715,-0.033715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0031687,R.Mostert,-6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0032129,J.Hicks,,,,,,,,,,,,,ARI,,00-0032129,,J.Hicks,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2020,,,3.0,0.0,Field goal,143.0,16:25:00,20:28:39,Levi's Stadium,"Hazy Temp: 66° F, Humidity: 68%, Wind: NNW 6 mph",10160000-0581-80a3-3e67-fadbeaac892d,9,0.0,RUSH,0.0,,13:14,ARI 45,1.0,Field goal,,6.0,3:10,2.0,0.0,1.0,1.0,1.0,15.0,KICKOFF,FIELD_GOAL,15:00,11:50,SF 25,ARI 34,39.0,197.0,24,20,Home,-4,44,7.0,48.5,1,outdoors,grass,66.0,6.0,Kyle Shanahan,Kliff Kingsbury,SFO01,Levi's Stadium,0.0,0.0,,,R.Mostert,31.0,,,0.0,1.0,0.0,0.0,1.0,,00-0031687,,R.Mostert,31.0,00-0031687,R.Mostert,00-0031687,R.Mostert,00-0031687,0.0,1.0,-1.780660,,,,,,0.681858,-68.185814,2020_01_ARI_SF,SF,SHOTGUN,"2 RB, 1 TE, 2 WR",7.0,"2 DL, 5 LB, 4 DB",,40078;46078;45069;46113;38551;45185;42718;4781...,00-0029892;00-0034847;00-0033221;00-0034860;00...,00-0029585;00-0035236;00-0029747;00-0027686;00...,11.0,11.0,
6,165.0,2020_01_ARI_SF,2020091311,SF,ARI,REG,1,SF,home,ARI,ARI,45.0,2020-09-13,759.0,1659.0,3459.0,Half1,0.0,1.0,0.0,1.0,3.0,0.0,12:39,ARI 45,14.0,41.0,(12:39) (Shotgun) 10-J.Garoppolo pass short mi...,pass,11.0,1.0,0.0,1.0,0.0,0.0,0.0,short,middle,10.0,1.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005812,0.146966,0.001768,0.208018,0.283249,0.007533,0.346654,0.0,0.0,1.390825,-0.538702,-0.621975,0.621975,-1.378110,1.378110,0.756136,-0.756136,0.383346,-0.922049,0.383346,-0.922049,0.250560,-0.250560,0.505576,-0.505576,0.250560,-0.250560,0.505576,-0.505576,0.522706,0.477294,0.522706,0.477294,0.026239,-6.458998e-03,-6.458998e-03,0.548945,0.451055,0.705600,0.705600,-0.057271,0.057271,0.059954,-0.059954,0.0,0.026239,0.0,0.026239,0.000000,0.000000,0.059954,-0.059954,0.000000,0.000000,0.059954,-0.059954,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,00-0031345,J.Garoppolo,11.0,00-0033288,G.Kittle,11.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,,00-0032129,,J.Hicks,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2020,0.663767,33.623322,3.0,0.0,Field goal,165.0,16:25:00,20:29:22,Levi's Stadium,"Hazy Temp: 66° F, Humidity: 68%, Wind: NNW 6 mph",10160000-0581-80a3-3e67-fadbeaac892d,4,0.0,PASS,0.0,,12:23,ARI 34,1.0,Field goal,,6.0,3:10,2.0,0.0,1.0,1.0,1.0,15.0,KICKOFF,FIELD_GOAL,15:00,11:50,SF 25,ARI 34,39.0,197.0,24,20,Home,-4,44,7.0,48.5,1,outdoors,grass,66.0,6.0,Kyle Shanahan,Kliff Kingsbury,SFO01,Levi's Stadium,0.0,0.0,J.Garoppolo,10.0,,,G.Kittle,85.0,1.0,0.0,0.0,0.0,1.0,00-0031345,,00-0033288,J.Garoppolo,10.0,00-0031345,G.Kittle,00-0033288,G.Kittle,00-0033288,0.0,1.0,-0.538702,1.054806,4.119796,2.0,0.837606,0.374325,0.971992,2.800822,2020_01_ARI_SF,SF,SHOTGUN,"1 RB, 1 TE, 3 WR",5.0,"1 DL, 5 LB, 5 DB",4.0,46078;38547;45069;46113;38551;41325;45185;4781...,00-0034847;00-0033221;00-0034860;00-0031376;00...,00-0029560;00-0029585;00-0035236;00-0029747;00...,11.0,11.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128198,3486.0,2022_11_WAS_HOU,2022112003,HOU,WAS,REG,11,WAS,away,HOU,HOU,34.0,2022-11-20,147.0,147.0,147.0,Half2,0.0,21.0,0.0,4.0,3.0,0.0,02:27,HOU 34,2.0,38.0,(2:27) 24-A.Gibson right guard to HOU 16 for 1...,run,18.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,right,guard,,,,,1.0,3.0,0.0,,,,,3.0,1.0,10.0,23.0,23.0,10.0,13.0,23.0,10.0,13.0,0.290738,0.037384,0.000324,0.050097,0.281639,0.001315,0.338504,0.0,0.0,2.753596,0.813966,-20.788610,20.788610,-0.771691,0.771691,-19.780273,19.780273,,,0.000000,0.000000,-7.543125,7.543125,5.273963,-5.273963,-5.809467,5.809467,-4.109294,4.109294,0.997577,0.002423,0.002423,0.997577,0.001540,6.472468e-04,-6.472468e-04,0.000883,0.999117,0.999138,0.000862,-0.150716,0.150716,-0.504211,0.504211,,,0.0,0.000000,-0.071751,0.071751,-0.158471,0.158471,-0.071751,0.071751,-0.263982,0.263982,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,00-0036328,A.Gibson,18.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0033566,J.Pitre,HOU,00-0033566,D.King,HOU,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2022,,,48.0,1.0,First down,3486.0,13:00:00,20:48:44,NRG Stadium,"Sunny Temp: 50° F, Humidity: 65%, Wind: NE 9 mph",10160000-0589-9283-8907-f8b9d2d9a815,2,0.0,RUSH,0.0,,02:21,HOU 16,21.0,End of half,,7.0,3:19,2.0,1.0,0.0,4.0,4.0,0.0,KICKOFF,END_GAME,03:19,00:00,HOU 42,HOU 7,3415.0,3633.0,23,10,Home,-13,33,-3.0,41.0,0,closed,astroturf,,,Lovie Smith,Ron Rivera,HOU00,NRG Stadium,0.0,1.0,,,A.Gibson,24.0,,,0.0,1.0,1.0,0.0,1.0,,00-0036328,,A.Gibson,24.0,00-0036328,A.Gibson,00-0036328,A.Gibson,00-0036328,0.0,0.0,0.813966,,,,,,0.242909,-24.290918,2022_11_WAS_HOU,WAS,SINGLEBACK,"1 RB, 3 TE, 1 WR",8.0,"4 DL, 3 LB, 4 DB",,41475;55043;46148;44964;41349;54502;53480;4244...,00-0031095;00-0031260;00-0036618;00-0031362;00...,00-0037457;00-0034831;00-0033566;00-0037246;00...,11.0,11.0,
128200,3517.0,2022_11_WAS_HOU,2022112003,HOU,WAS,REG,11,WAS,away,HOU,HOU,16.0,2022-11-20,138.0,138.0,138.0,Half2,0.0,21.0,0.0,4.0,1.0,0.0,02:18,HOU 16,10.0,38.0,(2:18) 24-A.Gibson up the middle to HOU 14 for...,run,2.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,middle,,,,,,0.0,3.0,0.0,,,,,3.0,0.0,10.0,23.0,23.0,10.0,13.0,23.0,10.0,13.0,0.272831,0.006958,0.000316,0.014839,0.309297,0.001051,0.394707,0.0,0.0,3.567562,-0.261325,-20.527287,20.527287,-0.510366,0.510366,-19.780273,19.780273,,,0.000000,0.000000,-7.543125,7.543125,5.273963,-5.273963,-5.809467,5.809467,-4.109294,4.109294,0.999117,0.000883,0.000883,0.999117,-0.000160,7.152557e-07,-7.152557e-07,0.001043,0.998957,0.999785,0.000215,-0.150556,0.150556,-0.504211,0.504211,,,0.0,0.000000,-0.071751,0.071751,-0.158471,0.158471,-0.071751,0.071751,-0.263982,0.263982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,00-0036328,A.Gibson,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0034807,O.Okoronkwo,HOU,00-0027881,J.Hughes,HOU,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2022,,,49.0,1.0,First down,3517.0,13:00:00,20:49:49,NRG Stadium,"Sunny Temp: 50° F, Humidity: 65%, Wind: NE 9 mph",10160000-0589-9283-8907-f8b9d2d9a815,7,0.0,RUSH,0.0,,02:13,HOU 14,21.0,End of half,,7.0,3:19,2.0,1.0,0.0,4.0,4.0,0.0,KICKOFF,END_GAME,03:19,00:00,HOU 42,HOU 7,3415.0,3633.0,23,10,Home,-13,33,-3.0,41.0,0,closed,astroturf,,,Lovie Smith,Ron Rivera,HOU00,NRG Stadium,0.0,0.0,,,A.Gibson,24.0,,,0.0,1.0,0.0,0.0,1.0,,00-0036328,,A.Gibson,24.0,00-0036328,A.Gibson,00-0036328,A.Gibson,00-0036328,0.0,0.0,-0.261325,,,,,,0.026716,-2.671629,2022_11_WAS_HOU,WAS,SINGLEBACK,"1 RB, 3 TE, 1 WR",8.0,"4 DL, 3 LB, 4 DB",,55043;41475;44964;55045;41349;54502;53480;4244...,00-0031095;00-0031260;00-0036618;00-0031362;00...,00-0037457;00-0033566;00-0037459;00-0037246;00...,11.0,11.0,
128202,3545.0,2022_11_WAS_HOU,2022112003,HOU,WAS,REG,11,WAS,away,HOU,HOU,14.0,2022-11-20,120.0,120.0,120.0,Half2,0.0,21.0,0.0,4.0,2.0,0.0,02:00,HOU 14,8.0,38.0,(2:00) 24-A.Gibson left guard to HOU 7 for 7 y...,run,7.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,left,guard,,,,,0.0,3.0,0.0,,,,,3.0,0.0,10.0,23.0,23.0,10.0,13.0,23.0,10.0,13.0,0.270898,0.005913,0.000352,0.010797,0.394622,0.001106,0.316311,0.0,0.0,3.306237,1.004675,-21.531961,21.531961,-1.515041,1.515041,-19.780273,19.780273,,,0.000000,0.000000,-7.543125,7.543125,5.273963,-5.273963,-5.809467,5.809467,-4.109294,4.109294,0.998957,0.001043,0.001043,0.998957,0.000730,1.184344e-04,-1.184344e-04,0.000313,0.999687,0.999785,0.000215,-0.151286,0.151286,-0.504211,0.504211,,,0.0,0.000000,-0.071751,0.071751,-0.158471,0.158471,-0.071751,0.071751,-0.263982,0.263982,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0036328,A.Gibson,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,,00-0034485,,J.Owens,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2022,,,49.0,1.0,First down,3545.0,13:00:00,20:53:02,NRG Stadium,"Sunny Temp: 50° F, Humidity: 65%, Wind: NE 9 mph",10160000-0589-9283-8907-f8b9d2d9a815,12,0.0,RUSH,0.0,,01:56,HOU 7,21.0,End of half,,7.0,3:19,2.0,1.0,0.0,4.0,4.0,0.0,KICKOFF,END_GAME,03:19,00:00,HOU 42,HOU 7,3415.0,3633.0,23,10,Home,-13,33,-3.0,41.0,0,closed,astroturf,,,Lovie Smith,Ron Rivera,HOU00,NRG Stadium,0.0,1.0,,,A.Gibson,24.0,,,0.0,1.0,0.0,0.0,1.0,,00-0036328,,A.Gibson,24.0,00-0036328,A.Gibson,00-0036328,A.Gibson,00-0036328,0.0,0.0,1.004675,,,,,,0.039020,-3.901979,2022_11_WAS_HOU,WAS,SINGLEBACK,"1 RB, 3 TE, 1 WR",8.0,"4 DL, 3 LB, 4 DB",,55043;41475;44964;41349;55045;54502;53480;4244...,00-0031095;00-0031260;00-0036618;00-0031362;00...,00-0037457;00-0033566;00-0037459;00-0037246;00...,11.0,11.0,
128204,3566.0,2022_11_WAS_HOU,2022112003,HOU,WAS,REG,11,WAS,away,HOU,HOU,7.0,2022-11-20,72.0,72.0,72.0,Half2,0.0,21.0,0.0,4.0,3.0,0.0,01:12,HOU 7,1.0,38.0,(1:12) 8-B.Robinson up the middle to HOU 7 for...,run,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,middle,,,,,,0.0,2.0,0.0,,,,,2.0,0.0,10.0,23.0,23.0,10.0,13.0,23.0,10.0,13.0,0.183958,0.003347,0.000336,0.005288,0.321072,0.001313,0.484686,0.0,0.0,4.310912,-1.365027,-20.166935,20.166935,-0.150014,0.150014,-19.780273,19.780273,,,0.000000,0.000000,-7.543125,7.543125,5.273963,-5.273963,-5.809467,5.809467,-4.109294,4.109294,0.999687,0.000313,0.000313,0.999687,-0.000035,4.255772e-05,-4.255772e-05,0.000348,0.999652,0.999904,0.000096,-0.151251,0.151251,-0.504211,0.504211,,,0.0,0.000000,-0.071751,0.071751,-0.158471,0.158471,-0.071751,0.071751,-0.263982,0.263982,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0037746,B.Robinson,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HOU,,00-0034807,,O.Okoronkwo,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2022,,,49.0,1.0,First down,3566.0,13:00:00,20:54:53,NRG Stadium,"Sunny Temp: 50° F, Humidity: 65%, Wind: NE 9 mph",10160000-0589-9283-8907-f8b9d2d9a815,7,0.0,RUSH,0.0,,01:08,HOU 7,21.0,End of half,,7.0,3:19,2.0,1.0,0.0,4.0,4.0,0.0,KICKOFF,END_GAME,03:19,00:00,HOU 42,HOU 7,3415.0,3633.0,23,10,Home,-13,33,-3.0,41.0,0,closed,astroturf,,,Lovie Smith,Ron Rivera,HOU00,NRG Stadium,0.0,0.0,,,B.Robinson,8.0,,,0.0,1.0,0.0,0.0,1.0,,00-0037746,,B.Robinson,8.0,00-0037746,B.Robinson,00-0037746,B.Robinson,00-0037746,0.0,0.0,-1.365027,,,,,,0.069445,-6.944500,2022_11_WAS_HOU,WAS,I_FORM,"2 RB, 2 TE, 1 WR",8.0,"4 DL, 3 LB, 4 DB",,55043;41475;54563;44964;55045;54502;53480;4244...,00-0031095;00-0037746;00-0036618;00-0031362;00...,00-0037457;00-0033566;00-0037459;00-0037246;00...,11.0,11.0,


In [10]:
# Get rushing defense stats

# Total rushes, total rush yards, rushing epa

rushes_def = defense[defense.play_type == 'run']
rushing_grouped_def = rushes.groupby(by=['season', 'week', 'defteam'])
rush_df_def = rushing_grouped_def.count()['play_id'].rename('total_rushes_allowed').to_frame()
rush_df_def[['total_rush_yards_allowed', 'rushing_epa_allowed', 'rush_tds_allowed']] = rushing_grouped_def.sum()[['yards_gained', 'epa', 'rush_touchdown']]

# rush_df_def

In [11]:
# Get passing defense stats

# Total pass attempts, total pass yards, passing epa

passing_def = defense[defense.play_type == 'pass']
passing_grouped_def = passing_def.groupby(by=['season', 'week', 'defteam'])
passing_grouped_def_no_sacks = passing_def[passing_def.sack == 0].groupby(by=['season', 'week', 'defteam'])
pass_df_def = passing_grouped_def_no_sacks.count()['play_id'].rename('total_pass_attempts_allowed').to_frame()
pass_df_def['passing_yards_allowed'] = passing_grouped_def_no_sacks.sum()['yards_gained']
pass_df_def[['completions_allowed', 'passing_epa_allowed', 'pass_tds_allowed']] = passing_grouped_def.sum()[['complete_pass', 'epa', 'pass_touchdown']]
pass_df_def['avg_cpoe_allowed'] = passing_grouped_def.mean()['cpoe'].rename('avg_cpoe').to_frame()

# pass_df_def

In [12]:
# Get other defensive stats

# Get qb epa 
qb_stats_def = defense[(defense.play_type == 'pass') | (defense.qb_scramble == 1) | (defense.qb_designed_run == 1)]
qb_epa_df_def = qb_stats_def.groupby(by = ['season', 'week', 'defteam']).sum()['epa'].rename('qb_epa_allowed').to_frame()

# Get sacks, interceptions, fumbles, fumbles lost, turnovers
turnovers_df_def = defense.groupby(by = ['season', 'week', 'defteam']).sum()[['sack', 'fumble_forced', 'interception']]
turnovers_df_def.columns = ['sacks', 'fumbles_forced', 'interceptions']
fumble_recovery_df = defense[(defense.defteam == defense.fumble_recovery_1_team)].groupby(by = ['season', 'week', 'defteam']).sum()['fumble_forced'].rename('fumbles_recovered').to_frame()
turnovers_df_def = turnovers_df_def.join(fumble_recovery_df, how='left').fillna(0)

# turnovers_df_def

In [13]:
# Get special teams stats

specials = nfl_df[nfl_df.special == 1]
specials_grouped = specials.groupby(by = ['season', 'week', 'posteam'])
specials_epa_df_one = specials_grouped.sum()['epa'].rename('special_teams_epa_one').to_frame()

specials = nfl_df[nfl_df.special == 1]
specials_grouped = specials.groupby(by = ['season', 'week', 'defteam'])
specials_epa_df_two = specials_grouped.sum()['epa'].rename('special_teams_epa_two').to_frame()

specials_epa_df = specials_epa_df_one.merge(specials_epa_df_two, left_on=['season', 'week', 'posteam'], right_index=True)
specials_epa_df['special_teams_epa'] = specials_epa_df.special_teams_epa_one - specials_epa_df.special_teams_epa_two
specials_epa_df.drop(columns = ['special_teams_epa_one', 'special_teams_epa_two'], inplace=True)

# specials_epa_df

In [14]:
# Get overall score and other game total stats

home_scores_and_etc_df = nfl_df.groupby(by = ['season', 'week', 'home_team', 'away_team']).max()[['home_score', 'away_score']]
home_scores_and_etc_df.index.names = ['season', 'week', 'team', 'opponent']
home_scores_and_etc_df.columns = ['score', 'opponent_score']
away_scores_and_etc_df = nfl_df.groupby(by = ['season', 'week', 'away_team', 'home_team']).max()[['away_score', 'home_score']]
away_scores_and_etc_df.index.names = ['season', 'week', 'team', 'opponent']
away_scores_and_etc_df.columns = ['score', 'opponent_score']

final_nfl_df = pd.concat([home_scores_and_etc_df, away_scores_and_etc_df])
final_nfl_df = final_nfl_df.sort_index()

# final_nfl_df

  home_scores_and_etc_df = nfl_df.groupby(by = ['season', 'week', 'home_team', 'away_team']).max()[['home_score', 'away_score']]
  away_scores_and_etc_df = nfl_df.groupby(by = ['season', 'week', 'away_team', 'home_team']).max()[['away_score', 'home_score']]


In [15]:
# Combine all stats together into one data frame

final_nfl_df = final_nfl_df.merge(
    pass_df, left_on=['season', 'week', 'team'], right_index=True).merge(
    rush_df, left_on=['season', 'week', 'team'], right_index=True).merge(
    qb_epa_df, left_on=['season', 'week', 'team'], right_index=True).merge(
    turnovers_df, left_on=['season', 'week', 'team'], right_index=True).merge(
    rush_df_def, left_on=['season', 'week', 'team'], right_index=True).merge(
    pass_df_def, left_on=['season', 'week', 'team'], right_index=True).merge(
    turnovers_df_def, left_on=['season', 'week', 'team'], right_index=True).merge(
    specials_epa_df, left_on=['season', 'week', 'team'], right_index=True)

# final_nfl_df

In [16]:
# Flip sign of def epa 

final_nfl_df['passing_epa_def'] = -final_nfl_df['passing_epa_allowed']
final_nfl_df['rushing_epa_def'] = -final_nfl_df['rushing_epa_allowed']

# Add total epa stats

final_nfl_df['total_epa'] = final_nfl_df.apply(lambda x: x.passing_epa + x.rushing_epa + x.special_teams_epa + x.rushing_epa_def + x.passing_epa_def, axis = 1)
final_nfl_df['total_opposing_epa'] = -final_nfl_df.total_epa

In [17]:
# View final nfl data frame

# final_nfl_df

In [18]:
# Store copy of final_nfl_df, rename it

nfl_per_game = final_nfl_df.copy()

In [19]:
# Read in elo data

elo = pd.read_csv('https://projects.fivethirtyeight.com/nfl-api/nfl_elo.csv')

# elo

In [20]:
# Filter elo to only have data from current and previous season

elo = elo[elo.season >= previous_season]

In [21]:
# Drop some unnecesssary columns, rename some columns to match our other conventions

elo = elo.drop(columns = ['neutral', 'playoff'])
elo = elo.rename(columns = {'team1': 'home', 'team2': 'away', 'score1':'home_score', 'score2':'away_score'})

In [22]:
# Map team names from abbreviations to full names
# Note: for any teams that changed their names since the 2014 season, all games will refer to them using their current 
# name, even if they had a different name at the time of the game. 

def fix_home_team_names(game):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team'
    }
    
    return team_mapping[game['home']]
    
def fix_away_team_names(game):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team'
    }
    
    return team_mapping[game['away']]


elo['home_full_name'] = elo.apply(lambda x: fix_home_team_names(x), axis=1)
elo['away_full_name'] = elo.apply(lambda x: fix_away_team_names(x), axis=1)

In [23]:
# Store copy of elo, rename it

nfl_elo = elo.copy()

# nfl_elo

In [24]:
# Add full team names to nfl data 

# Map team names from abbreviations to full names
# Note: for any teams that changed their names since the 2014 season, all games will refer to them using their current 
# name, even if they had a different name at the time of the game. 

def fix_team_names(game):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    return team_mapping[game['team']]
    
def fix_opponent_team_names(game):
    team_mapping = {
        'ARI':'Arizona Cardinals',
        'ATL':'Atlanta Falcons',
        'BAL':'Baltimore Ravens',
        'BUF':'Buffalo Bills',
        'CAR':'Carolina Panthers',
        'CHI':'Chicago Bears',
        'CIN':'Cincinnati Bengals',
        'CLE':'Cleveland Browns',
        'DAL':'Dallas Cowboys',
        'DEN':'Denver Broncos',
        'DET':'Detroit Lions',
        'GB':'Green Bay Packers',
        'HOU':'Houston Texans',
        'IND':'Indianapolis Colts',
        'JAX':'Jacksonville Jaguars',
        'KC':'Kansas City Chiefs',
        'OAK':'Las Vegas Raiders',
        'LV':'Las Vegas Raiders',
        'LAC':'Los Angeles Chargers',
        'LAR':'Los Angeles Rams',
        'LA':'Los Angeles Rams',
        'MIA':'Miami Dolphins',
        'MIN':'Minnesota Vikings',
        'NE':'New England Patriots',
        'NO':'New Orleans Saints',
        'NYG':'New York Giants',
        'NYJ':'New York Jets',
        'PHI':'Philadelphia Eagles',
        'PIT':'Pittsburgh Steelers',
        'SF':'San Francisco 49ers',
        'SEA':'Seattle Seahawks',
        'TB':'Tampa Bay Buccaneers',
        'TEN':'Tennessee Titans',
        'WSH':'Washington Football Team',
        'WAS':'Washington Football Team'
    }
    
    return team_mapping[game['opponent']]

nfl_per_game = nfl_per_game.reset_index()
nfl_per_game['team_full_name'] = nfl_per_game.apply(lambda x: fix_team_names(x), axis=1)
nfl_per_game['opponent_full_name'] = nfl_per_game.apply(lambda x: fix_opponent_team_names(x), axis=1)

In [25]:
# Add week column to elo data 

keep_cols = [col for col in nfl_elo.columns]
keep_cols.append('week')
nfl_elo = nfl_elo.merge(nfl_per_game, how='inner',
                                 left_on=['season', 'home_full_name', 'away_full_name', 'home_score', 'away_score'],
                                 right_on = ['season', 'team_full_name', 'opponent_full_name', 'score', 'opponent_score'], 
                                 suffixes = [None, '_'])
nfl_elo = nfl_elo[keep_cols]

nfl_elo[['season', 'week', 'home', 'away', 'home_full_name', 'away_full_name', 'qb1', 'qb2']].to_csv('../data/elo_qb_data.csv')
nfl_elo.to_csv('../data/nfl_elo_pred_1_output.csv')
nfl_per_game.to_csv('../data/nfl_per_game_pred_1_output.csv')

In [26]:
nfl_per_game

Unnamed: 0,season,week,team,opponent,score,opponent_score,total_pass_attempts,total_passing_yards,completions,passing_epa,pass_tds,avg_cpoe,total_rushes,total_rush_yards,rushing_epa,rush_tds,qb_epa,sacks_allowed,fumbles,lost_fumbles,interceptions_thrown,total_rushes_allowed,total_rush_yards_allowed,rushing_epa_allowed,rush_tds_allowed,total_pass_attempts_allowed,passing_yards_allowed,completions_allowed,passing_epa_allowed,pass_tds_allowed,avg_cpoe_allowed,sacks,fumbles_forced,interceptions,fumbles_recovered,special_teams_epa,passing_epa_def,rushing_epa_def,total_epa,total_opposing_epa,team_full_name,opponent_full_name
0,2020,1,ARI,SF,24,20,39,230.0,26.0,-3.574992,1.0,-2.229409,34,189.0,9.081035,2.0,7.326585,2.0,2.0,0.0,1.0,25,123.0,-3.209652,0.0,33,259.0,19.0,0.376995,2.0,-6.229403,3.0,0.0,0.0,0.0,-1.717432,-0.376995,3.209652,6.621267,-6.621267,Arizona Cardinals,San Francisco 49ers
1,2020,1,ATL,SEA,25,38,55,450.0,37.0,6.093836,2.0,4.676274,21,72.0,-7.005064,1.0,5.986197,2.0,1.0,1.0,1.0,19,85.0,-1.168455,1.0,35,322.0,31.0,19.570177,4.0,19.803183,3.0,0.0,0.0,0.0,2.051288,-19.570177,1.168455,-17.261661,17.261661,Atlanta Falcons,Seattle Seahawks
2,2020,1,BAL,CLE,38,6,26,284.0,21.0,13.760624,3.0,21.383049,29,112.0,-2.126354,2.0,17.708914,2.0,2.0,1.0,0.0,26,139.0,-5.782499,0.0,39,189.0,21.0,-10.646843,1.0,-4.231688,2.0,3.0,1.0,2.0,9.567104,10.646843,5.782499,37.630716,-37.630716,Baltimore Ravens,Cleveland Browns
3,2020,1,BUF,NYJ,27,17,46,312.0,33.0,13.801008,2.0,6.882114,30,100.0,-11.138154,1.0,9.786316,3.0,2.0,2.0,0.0,15,52.0,0.218274,1.0,33,215.0,21.0,-9.061004,1.0,-3.159330,3.0,1.0,1.0,1.0,0.358292,9.061004,-0.218274,11.863877,-11.863877,Buffalo Bills,New York Jets
4,2020,1,CAR,LV,30,34,35,271.0,22.0,6.376943,1.0,0.545695,30,129.0,1.638044,2.0,6.920155,1.0,0.0,0.0,0.0,31,133.0,4.343050,3.0,30,239.0,22.0,10.732628,1.0,8.987310,0.0,1.0,0.0,0.0,-0.614895,-10.732628,-4.343050,-7.675586,7.675586,Carolina Panthers,Las Vegas Raiders
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1431,2022,11,PHI,IND,17,16,25,190.0,18.0,-10.893359,1.0,9.174621,31,143.0,2.977979,1.0,-0.853701,3.0,4.0,2.0,0.0,26,99.0,-5.706077,1.0,32,213.0,23.0,-3.337152,0.0,0.595780,4.0,1.0,0.0,1.0,0.498398,3.337152,5.706077,1.626248,-1.626248,Philadelphia Eagles,Indianapolis Colts
1432,2022,11,PIT,CIN,30,37,42,265.0,25.0,-2.217121,1.0,2.688385,24,102.0,-0.409353,2.0,-1.311158,2.0,0.0,0.0,0.0,22,64.0,-2.903927,0.0,39,355.0,24.0,12.716263,4.0,-1.119596,2.0,0.0,2.0,0.0,-3.314667,-12.716263,2.903927,-15.753478,15.753478,Pittsburgh Steelers,Cincinnati Bengals
1433,2022,11,SF,ARI,38,10,29,228.0,20.0,16.238575,4.0,0.417145,26,161.0,4.211570,1.0,17.081697,0.0,0.0,0.0,0.0,24,67.0,-4.994961,1.0,44,277.0,30.0,-6.740174,0.0,3.672127,3.0,1.0,2.0,0.0,0.898471,6.740174,4.994961,33.083751,-33.083751,San Francisco 49ers,Arizona Cardinals
1434,2022,11,TEN,GB,27,17,29,337.0,24.0,17.332626,3.0,25.256855,29,91.0,-2.641367,1.0,17.332626,3.0,0.0,0.0,1.0,19,56.0,-4.261430,0.0,40,229.0,24.0,2.710083,2.0,-0.817273,1.0,1.0,0.0,0.0,-1.360685,-2.710083,4.261430,14.881921,-14.881921,Tennessee Titans,Green Bay Packers
