In [1]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import os

pd.set_option('display.max_columns', None)

# Scoring rules
https://www.espn.com/fantasy/football/ffl/story?page=fflrulesstandardscoring

Kicker:
- 5pt - 50+ yd FG
- 4pt - 40-49 yd FG
- 3pt - 0-39 yd FG
- 2pt - 2pt conversion
- 1pt - XP
- -2pt - missed FG (0-39 yd)
- -1pt - missed FG (40-49 yd)



# Read data

In [2]:
try:
    # Create the data directory if it doesn't exist
    if not os.path.exists('data'):
        os.makedirs('data')

    # Check if the Feather file exists
    if not os.path.exists('data/pbp_1999_2024.feather'):
        print("Downloading play-by-play data...")
        # Fetch data from the source
        df_pbp = pd.DataFrame(nfl.import_pbp_data([2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 
                                                2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 
                                                2008, 2007, 2006, 2005, 2004, 2003, 2002, 2001, 
                                                2000, 1999]))
        # Save the DataFrame to a Feather file
        df_pbp.to_feather("data/pbp_1999_2024.feather")
        print("Data download complete. File saved to 'data/pbp_1999_2024.feather'.")
    else:
        print("Loading play-by-play data from local Feather file...")
        # Read the data from the local Feather file
        df_pbp = pd.read_feather('data/pbp_1999_2024.feather')
        print("Data successfully loaded from 'data/pbp_1999_2024.feather'.")

except Exception as e:
    print(f"An error occurred: {e}")

Loading play-by-play data from local Feather file...
Data successfully loaded from 'data/pbp_1999_2024.feather'.


In [3]:
df_pbp.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,old_game_id_x,nflverse_game_id,old_game_id_y,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type
0,1.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,,,,,,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,,0.0,1.0,,0.0,15:00,BUF 35,0.0,,GAME,,,0.0,0.0,,0.0,0.0,0.0,,,,,,,,,,,3.0,3.0,,,,,,,,0.0,0.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.770222,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,-0.0,-0.0,0.0,,,0.250386,0.749614,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,2024,,,1.0,1.0,First down,1.0,"9/8/24, 13:03:02",,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,GAME_START,0.0,,,,1.0,Touchdown,,,,,,,,,,,,,,,,,,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,0.0,,,,,,,0.0,0.0,,0.0,0.0,,,,,,,,,,,0.0,0.0,-0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,40.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,BUF,35.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,,0.0,15:00,BUF 35,0.0,70.0,2-T.Bass kicks 65 yards from BUF 35 to end zon...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004568,0.143585,0.002325,0.275986,0.215226,0.003265,0.355046,0.0,0.0,0.770222,0.257819,-0.257819,0.257819,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,0.000338,0.003076,-0.003076,0.566454,0.433546,0.250386,0.749614,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,T.Bass,00-0036162,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,40.0,"9/8/24, 13:03:02",2024-09-08T17:03:02.957Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,KICK_OFF,1.0,,2024-09-08T17:03:06.833Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.257819,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,61.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,ARI,70.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,15:00,ARI 30,10.0,70.0,(15:00) 6-J.Conner up the middle to ARI 33 for...,run,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,middle,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004585,0.140649,0.003159,0.26066,0.209893,0.003228,0.377827,0.0,0.0,1.028041,-0.200602,-0.057217,0.057217,0.200602,-0.200602,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433546,0.566454,0.566454,0.433546,-0.00727,-0.003599,0.003599,0.573724,0.426276,0.253462,0.746538,0.00727,-0.00727,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,00-0033553,J.Conner,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0034376,Ta.Johnson,BUF,,,,,,,,,,1.0,00-0037254,T.Bernard,BUF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,61.0,"9/8/24, 13:03:02",2024-09-08T17:03:40.463Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,RUSH,0.0,,2024-09-08T17:03:43.660Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,0.0,,,J.Conner,6.0,,,0.0,1.0,0.0,0.0,1.0,,00-0033553,,J.Conner,6.0,00-0033553,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,-0.200602,,,,,,0.456761,-45.676102,,,,,,,,,,,,,,,,,,,,
3,83.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,ARI,67.0,2024-09-08,867.0,1767.0,3567.0,Half1,0.0,1.0,0.0,1.0,2.0,0.0,14:27,ARI 33,7.0,70.0,(14:27) 1-K.Murray pass short left to 6-J.Conn...,pass,22.0,0.0,0.0,1.0,0.0,0.0,0.0,short,left,-3.0,25.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004817,0.142571,0.002405,0.270632,0.220688,0.003977,0.35491,0.0,0.0,0.827439,2.028874,-2.086091,2.086091,0.200602,-0.200602,-2.028874,2.028874,-1.083852,3.112726,-1.083852,3.112726,1.083852,-1.083852,-3.112726,3.112726,1.083852,-1.083852,-3.112726,3.112726,0.426276,0.573724,0.573724,0.426276,0.053842,0.051482,-0.051482,0.519882,0.480118,0.249864,0.750136,0.00727,-0.00727,-0.053842,0.053842,0.0,0.053842,0.0,0.053842,0.0,0.0,-0.053842,0.053842,0.0,0.0,-0.053842,0.053842,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,00-0035228,K.Murray,22.0,00-0033553,J.Conner,22.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0033551,R.Douglas,BUF,,,,,,,,,,1.0,00-0036888,D.Hamlin,BUF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,0.868591,13.140899,1.0,1.0,First down,83.0,"9/8/24, 13:03:02",2024-09-08T17:04:12.743Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,PASS,0.0,,2024-09-08T17:04:20.843Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,K.Murray,1.0,,,J.Conner,6.0,1.0,0.0,1.0,0.0,1.0,00-0035228,,00-0033553,K.Murray,1.0,00-0035228,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,2.028874,1.345418,9.321221,8.0,0.509778,0.363807,0.576656,42.334431,,,,,,,,,,,,,,,,,,,,
4,108.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,BUF,45.0,2024-09-08,823.0,1723.0,3523.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,13:43,BUF 45,10.0,70.0,(13:43) (Shotgun) 1-K.Murray pass short middle...,pass,9.0,1.0,0.0,1.0,0.0,0.0,0.0,short,middle,2.0,7.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004927,0.080094,0.001128,0.144228,0.314572,0.004124,0.450927,0.0,0.0,2.856313,0.754242,-2.840333,2.840333,0.200602,-0.200602,-2.783116,2.783116,-0.567367,1.321609,-0.567367,1.321609,1.651219,-1.651219,-4.434335,4.434335,1.651219,-1.651219,-4.434335,4.434335,0.480118,0.519882,0.519882,0.480118,0.054495,0.018542,-0.018542,0.465387,0.534613,0.301346,0.698654,0.00727,-0.00727,-0.108337,0.108337,0.0,0.054495,0.0,0.054495,0.0,0.0,-0.108337,0.108337,0.0,0.0,-0.108337,0.108337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,00-0035228,K.Murray,9.0,00-0033553,J.Conner,9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0035663,T.Rapp,BUF,00-0038557,Do.Williams,BUF,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,0.807773,19.222683,2.0,1.0,First down,108.0,"9/8/24, 13:03:02",2024-09-08T17:04:57.067Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,PASS,0.0,,2024-09-08T17:05:03.770Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,K.Murray,1.0,,,J.Conner,6.0,1.0,0.0,0.0,0.0,1.0,00-0035228,,00-0033553,K.Murray,1.0,00-0035228,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,0.754242,0.882798,5.78356,4.0,0.668478,0.25514,0.426443,57.35569,,,,,,,,,,,,,,,,,,,,


## Filter to kicker plays

In [4]:
# Filter rows where 'kicker_player_name' is not null and the play type is relevant
df_kicker_pbp = df_pbp.loc[
    df_pbp['kicker_player_name'].notnull() & 
    df_pbp['play_type'].isin(['field_goal', 'extra_point', 'kickoff'])
].copy() 

# Ensure 'posteam' and 'defteam' columns exist
if 'posteam' in df_kicker_pbp.columns and 'defteam' in df_kicker_pbp.columns:
    # Create a mask for kickoff plays
    kickoff_mask = df_kicker_pbp['play_type'] == 'kickoff'

    # Log the number of kickoff plays being processed
    print(f"Swapping 'posteam' and 'defteam' for {kickoff_mask.sum()} kickoff plays...")

    # Swap values using the mask
    df_kicker_pbp.loc[kickoff_mask, ['posteam', 'defteam']] = (
        df_kicker_pbp.loc[kickoff_mask, ['defteam', 'posteam']].values
    )

    print("Swap complete.")
else:
    print("Error: Required columns 'posteam' and 'defteam' are missing from the DataFrame.")

# Convert 'game_date' column to datetime format, with error handling
try:
    df_kicker_pbp['game_date'] = pd.to_datetime(df_kicker_pbp['game_date'], errors='coerce')
    if df_kicker_pbp['game_date'].isnull().any():
        print("Warning: Some 'game_date' entries could not be converted and have been set to NaT.")
except Exception as e:
    print(f"An error occurred while converting 'game_date' to datetime: {e}")

# Final log for confirmation
print("Data processing for 'df_kicker_pbp' completed.")

Swapping 'posteam' and 'defteam' for 68223 kickoff plays...
Swap complete.
Data processing for 'df_kicker_pbp' completed.


In [5]:
df_kicker_pbp.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,old_game_id_x,nflverse_game_id,old_game_id_y,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type
1,40.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,BUF,away,ARI,BUF,35.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,,0.0,15:00,BUF 35,0.0,70.0,2-T.Bass kicks 65 yards from BUF 35 to end zon...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004568,0.143585,0.002325,0.275986,0.215226,0.003265,0.355046,0.0,0.0,0.770222,0.257819,-0.257819,0.257819,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,0.000338,0.003076,-0.003076,0.566454,0.433546,0.250386,0.749614,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,T.Bass,00-0036162,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,40.0,"9/8/24, 13:03:02",2024-09-08T17:03:02.957Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,KICK_OFF,1.0,,2024-09-08T17:03:06.833Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.257819,,,,,,,,,,,,,,,,,,,,,,,,,,,
15,407.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,BUF,15.0,2024-09-08,467.0,1367.0,3167.0,Half1,0.0,1.0,1.0,1.0,,0.0,07:47,BUF 15,0.0,70.0,"5-M.Prater extra point is GOOD, Center-46-A.Br...",extra_point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,33.0,good,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,7.0,6.0,0.0,6.0,7.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.932427,0.0,0.932427,0.067573,-6.29735,6.29735,-0.25641,0.25641,-5.715549,5.715549,,,0.0,0.0,-2.603761,2.603761,-4.56486,4.56486,-3.119902,3.119902,-3.581094,3.581094,0.613047,0.386953,0.386953,0.613047,0.001292,0.000491,-0.000491,0.38566,0.61434,0.445916,0.554084,0.010752,-0.010752,-0.190253,0.190253,,,0.0,0.0,-0.046479,0.046479,-0.174029,0.174029,-0.046479,0.046479,-0.158647,0.158647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,5.0,1.0,Touchdown,407.0,"9/8/24, 13:03:02",2024-09-08T17:16:35Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,XP_KICK,1.0,,,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.067573,,,,,,,,,,,,,,,,,,,,,,,,,,,
16,422.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,home,BUF,ARI,35.0,2024-09-08,467.0,1367.0,3167.0,Half1,0.0,2.0,0.0,1.0,,0.0,07:47,ARI 35,0.0,16.0,5-M.Prater kicks 63 yards from ARI 35 to BUF 2...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,63.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,7.0,0.0,7.0,-7.0,0.0,7.0,-7.0,0.015049,0.134336,0.001989,0.233148,0.213073,0.003136,0.39927,0.0,0.0,1.401358,1.882355,-4.414996,4.414996,-0.25641,0.25641,-5.715549,5.715549,,,0.0,0.0,-2.603761,2.603761,-4.56486,4.56486,-3.119902,3.119902,-3.581094,3.581094,0.38566,0.61434,0.38566,0.61434,0.048635,0.072297,0.072297,0.434295,0.565705,0.553593,0.553593,0.010752,-0.010752,-0.190253,0.190253,,,0.0,0.0,-0.046479,0.046479,-0.174029,0.174029,-0.046479,0.046479,-0.158647,0.158647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,B.Codrington,00-0039333,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,00-0038984,K.Clark,ARI,,,,,,,,,,1.0,00-0039807,M.Melton,ARI,,,,,,,,,,,,,,,,,,,,,,,,,,,,BUF,53.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,6.0,1.0,First down,422.0,"9/8/24, 13:03:02",2024-09-08T17:19:41.557Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,KICK_OFF,1.0,,2024-09-08T17:19:53.017Z,,2.0,Turnover,2024-09-08T17:19:41.557Z,4.0,2:23,2.0,1.0,0.0,1.0,1.0,0.0,KICKOFF,FUMBLE,07:47,05:24,ARI 45,ARI 19,422.0,528.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,1.0,0.0,1.882355,,,,,,,,,,,,,,,,,,,,,,,,,,,
31,823.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,away,BUF,BUF,11.0,2024-09-08,900.0,900.0,2700.0,Half1,0.0,3.0,1.0,2.0,4.0,0.0,15:00,BUF 11,4.0,60.0,"(15:00) 5-M.Prater 29 yard field goal is GOOD,...",field_goal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,made,29.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,10.0,7.0,0.0,7.0,10.0,0.0,10.0,0.002077,0.007349,0.000103,0.01276,0.966858,0.000108,0.010744,0.0,0.0,2.864426,0.135574,-10.698709,10.698709,0.494553,-0.494553,-12.61465,12.61465,,,0.0,0.0,2.935215,-2.935215,-10.958288,10.958288,2.419075,-2.419075,-9.974523,9.974523,0.721678,0.278322,0.278322,0.721678,-0.002172,0.010197,-0.010197,0.280494,0.719506,0.557039,0.442961,-0.001494,0.001494,-0.33398,0.33398,,,0.0,0.0,-0.043326,0.043326,-0.167168,0.167168,-0.043326,0.043326,-0.151786,0.151786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,12.0,0.0,Field goal,823.0,"9/8/24, 13:03:02",2024-09-08T17:34:16Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,FIELD_GOAL,0.0,,,,3.0,Field goal,2024-09-08T17:25:39.110Z,10.0,5:27,3.0,1.0,1.0,1.0,2.0,0.0,FUMBLE,FIELD_GOAL,05:24,14:57,ARI 29,BUF 11,565.0,823.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.135574,,,,,,,,,,,,,,,,,,,,,,,,,,,
32,847.0,2024_01_ARI_BUF,2024090801,BUF,ARI,REG,1,ARI,home,BUF,ARI,35.0,2024-09-08,897.0,897.0,2697.0,Half1,0.0,4.0,0.0,2.0,,0.0,14:57,ARI 35,0.0,51.0,5-M.Prater kicks 65 yards from ARI 35 to end z...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,10.0,0.0,10.0,-10.0,0.0,10.0,-10.0,0.050659,0.138172,0.002267,0.225536,0.211489,0.002728,0.369149,0.0,0.0,1.226168,0.311864,-10.386845,10.386845,0.494553,-0.494553,-12.61465,12.61465,,,0.0,0.0,2.935215,-2.935215,-10.958288,10.958288,2.419075,-2.419075,-9.974523,9.974523,0.280494,0.719506,0.280494,0.719506,,,0.009553,,,0.432765,0.432765,-0.001494,0.001494,-0.33398,0.33398,,,0.0,0.0,-0.043326,0.043326,-0.167168,0.167168,-0.043326,0.043326,-0.151786,0.151786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BUF,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,13.0,1.0,First down,847.0,"9/8/24, 13:03:02",2024-09-08T17:36:15.657Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0,0.0,KICK_OFF,1.0,,2024-09-08T17:36:20.873Z,,4.0,Field goal,2024-09-08T17:36:15.657Z,9.0,5:10,4.0,1.0,1.0,2.0,2.0,-9.0,KICKOFF,FIELD_GOAL,14:57,09:47,BUF 30,ARI 19,847.0,1120.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.311864,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Calculate kicker fantasy points fields

In [6]:
# Set extra point distance based on year and create flags for XP attempts and success
df_kicker_pbp['xp_distance'] = np.where(df_kicker_pbp['game_date'].dt.year < 2015, 19, 33)
df_kicker_pbp["xp_attempt"] = df_kicker_pbp["extra_point_result"].notnull()
df_kicker_pbp["xp_made"] = df_kicker_pbp["extra_point_result"] == "good"

# Create flags for successful and attempted XPs by distance
df_kicker_pbp["xp_made_33y"] = df_kicker_pbp["xp_made"] & (df_kicker_pbp["xp_distance"] == 33)
df_kicker_pbp["xp_made_19y"] = df_kicker_pbp["xp_made"] & (df_kicker_pbp["xp_distance"] == 19)
df_kicker_pbp["xp_attempt_33y"] = df_kicker_pbp["xp_attempt"] & (df_kicker_pbp["xp_distance"] == 33)
df_kicker_pbp["xp_attempt_19y"] = df_kicker_pbp["xp_attempt"] & (df_kicker_pbp["xp_distance"] == 19)

# Field goal (FG) results and distance-based flags
df_kicker_pbp["50+_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"] >= 50)
df_kicker_pbp["40-49_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"].between(40, 49))
df_kicker_pbp["0-39_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"] < 40)

# Missed FG flags by distance
df_kicker_pbp["missed_fg_0-39"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"] < 40)
df_kicker_pbp["missed_fg_40-49"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"].between(40, 49))
df_kicker_pbp["missed_fg_50+"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"] >= 50)

# Total FGs made and missed
df_kicker_pbp["total_fg_made"] = df_kicker_pbp[["50+_fg_made", "40-49_fg_made", "0-39_fg_made"]].sum(axis=1)
df_kicker_pbp["total_fg_missed"] = df_kicker_pbp[["missed_fg_0-39", "missed_fg_40-49", "missed_fg_50+"]].sum(axis=1)

# Calculate fantasy points based on custom scoring system
df_kicker_pbp["fantasy_points"] = (
    df_kicker_pbp["50+_fg_made"] * 5 +
    df_kicker_pbp["40-49_fg_made"] * 4 +
    df_kicker_pbp["0-39_fg_made"] * 3 +
    df_kicker_pbp["xp_made"] * 1 +
    df_kicker_pbp["missed_fg_0-39"] * -2 +
    df_kicker_pbp["missed_fg_40-49"] * -1
)

# Optional: Drop any rows with NaN values in the calculated columns
# df_kicker_pbp.dropna(subset=["fantasy_points"], inplace=True)

# Log completion message
print("Kicker play-by-play data processing completed successfully.")

Kicker play-by-play data processing completed successfully.


# Aggregate to game level

## List of kicker stats to aggregate

Given:
- game_id
- home_team
- away_team
- week
- posteam
- defteam
<!-- - yardline_100 -->
- game_date
<!-- - play_type -->
- field_goal_result
- kick_distance
- extra_point_result
- field_goal_attempt
- kicker_player_name
- kicker_player_id
- stadium
- weather
- roof
- surface
- temp
- wind

Calculated:
- fantasy_points
- 50+_fg_made
- 40-49_fg_made
- 0-39_fg_made
- missed_fg_0-39
- missed_fg_40-49
- missed_fg_50+
- xp_attempt
- xp_made
- xp_distance
<!-- - xp_made_15yl
- xp_made_2yl -->

In [7]:
df_kicker_pbp[df_kicker_pbp.game_id == '2024_10_TEN_LAC'][['temp', 'wind', 'surface', 'stadium']]

Unnamed: 0,temp,wind,surface,stadium
25848,,,,SoFi Stadium
25861,,,,SoFi Stadium
25862,,,,SoFi Stadium
25871,,,,SoFi Stadium
25872,,,,SoFi Stadium
25890,,,,SoFi Stadium
25891,,,,SoFi Stadium
25904,,,,SoFi Stadium
25913,,,,SoFi Stadium
25914,,,,SoFi Stadium


In [8]:
df_kicker_game_level_stadium = df_kicker_pbp.groupby(['game_id', 'game_date', 'week', 'season', 'stadium'], as_index=False).agg({
    # Game level
    'home_team': 'first',
    'roof': 'first',
    'temp': 'first',
    'wind': 'first',
}).sort_values(by=['game_date'], ascending=False)

In [9]:
df_kicker_game_level_stadium.head(10)

Unnamed: 0,game_id,game_date,week,season,stadium,home_team,roof,temp,wind
6333,2024_10_TEN_LAC,2024-11-10,10,2024,SoFi Stadium,LAC,dome,,
6328,2024_10_NYG_CAR,2024-11-10,10,2024,Allianz Arena,CAR,outdoors,,
6322,2024_10_ATL_NO,2024-11-10,10,2024,Caesars Superdome,NO,dome,,
6323,2024_10_BUF_IND,2024-11-10,10,2024,Lucas Oil Stadium,IND,closed,,
6326,2024_10_MIN_JAX,2024-11-10,10,2024,EverBank Stadium,JAX,outdoors,,
6327,2024_10_NE_CHI,2024-11-10,10,2024,Soldier Field,CHI,outdoors,,
6325,2024_10_DEN_KC,2024-11-10,10,2024,GEHA Field at Arrowhead Stadium,KC,outdoors,,
6329,2024_10_NYJ_ARI,2024-11-10,10,2024,State Farm Stadium,ARI,closed,,
6330,2024_10_PHI_DAL,2024-11-10,10,2024,AT&T Stadium,DAL,closed,,
6331,2024_10_PIT_WAS,2024-11-10,10,2024,Northwest Stadium,WAS,outdoors,,


In [10]:
df_kicker_game_level = df_kicker_pbp.groupby(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam', 'kicker_player_name', 'kicker_player_id'], as_index=False).agg({
    # Game level
    'home_team': 'first',
    'away_team': 'first',

    # Play level
    'fantasy_points': 'sum',
    'total_fg_made': 'sum',
    'total_fg_missed': 'sum',
    '50+_fg_made': 'sum',
    '40-49_fg_made': 'sum',
    '0-39_fg_made': 'sum',
    'missed_fg_0-39': 'sum',
    'missed_fg_40-49': 'sum',
    'missed_fg_50+': 'sum',
    'xp_attempt_19y': 'sum',
    'xp_made_19y': 'sum',
    'xp_attempt_33y': 'sum',
    'xp_made_33y': 'sum',
})

df_kicker_game_level["home"] = df_kicker_game_level["home_team"] == df_kicker_game_level["posteam"]
df_kicker_game_level.drop(columns=['home_team', 'away_team'], inplace=True)

In [11]:
df_kicker_game_level.head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y,home
0,1999_01_ARI_PHI,1999-09-12,1,1999,ARI,PHI,C.Jacke,00-0008080,13,4,0,0,0,4,0,0,0,1,1,0,0,False
1,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,D.Akers,00-0000108,0,0,0,0,0,0,0,0,0,0,0,0,0,True
2,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,N.Johnson,00-0008593,6,1,0,0,0,1,0,0,0,3,3,0,0,True
3,1999_01_BUF_IND,1999-09-12,1,1999,BUF,IND,S.Christie,00-0002975,6,2,0,0,0,2,0,0,0,0,0,0,0,False
4,1999_01_BUF_IND,1999-09-12,1,1999,IND,BUF,M.Vanderjagt,00-0016830,7,1,0,0,0,1,0,0,0,4,4,0,0,True
5,1999_01_CAR_NO,1999-09-12,1,1999,CAR,NO,J.Kasay,00-0009028,6,1,1,1,0,0,0,0,1,1,1,0,0,False
6,1999_01_CAR_NO,1999-09-12,1,1999,NO,CAR,D.Brien,00-0001759,8,2,0,0,1,1,0,0,0,2,1,0,0,True
7,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,D.Pelfrey,00-0012742,7,2,0,0,0,2,0,0,0,1,1,0,0,False
8,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,W.Brice,00-0001752,0,0,0,0,0,0,0,0,0,0,0,0,0,False
9,1999_01_CIN_TEN,1999-09-12,1,1999,TEN,CIN,A.Del Greco,00-0004147,12,2,0,1,0,1,0,0,0,4,4,0,0,True


## Predictors to calculate
Aggregate each point-earning stat at the following intervals:
- Past 5 games
- Season
- Career (kicker only)

Calculate the above for kickers, offenses, and defenses as well.

Calculate # games played by player in career (ie sample size).
Calculate # games in season (not quite same as week # -- injuries, byes)

Get offensive stats too (e.g. yards per game, points per game)

All stats should be controlled for time.

In [12]:
def calc_agg_stats(group, fields, career=True):
    """
    Calculate aggregate statistics for each player over their career and season,
    including prior season means, rolling averages, and cumulative counts.

    Parameters:
    - group: DataFrame grouped by player or other identifier.
    - fields: List of fields to calculate statistics on.
    - career: Boolean indicating whether to calculate career-level stats.

    Returns:
    - DataFrame with calculated aggregate statistics.
    """
    # Ensure 'game_date' is datetime
    group['game_date'] = pd.to_datetime(group['game_date'], errors='coerce')
    
    # Sort the group chronologically
    group_sorted = group.sort_values('game_date')
    
    # Initialize the result DataFrame
    result = pd.DataFrame(index=group_sorted.index)
    
    # Calculate cumulative game counts
    if career:
        # Career game count (number of games up to current point, excluding current game)
        result['n_games_career'] = np.arange(len(group_sorted))
    
    # Season game count
    result['n_games_season'] = group_sorted.groupby('season').cumcount()
    
    # Loop over each field to calculate aggregate stats
    for field in fields:
        if career:
            # Career mean up to the previous game (excluding current game)
            result[f'{field}_mean_career'] = (
                group_sorted[field]
                .expanding()
                .mean()
                .shift()
            )
        
        # Season mean up to the previous game (excluding current game)
        result[f'{field}_mean_season'] = (
            group_sorted.groupby('season')[field]
            .expanding()
            .mean()
            .shift()
            .reset_index(level=0, drop=True)
        )
        
        # Prior season mean (mean of the entire previous season)
        result[f'{field}_mean_prior_season'] = (
            group_sorted.groupby('season')[field]
            .transform('mean')
            .shift()
        )
        
        # Rolling mean for the last 5 games up to the previous game (excluding current game)
        result[f'{field}_mean_last5'] = (
            group_sorted[field]
            .rolling(window=5, min_periods=1)
            .mean()
            .shift()
        )
    
    # Combine the result with the original group_sorted DataFrame
    combined = pd.concat([group_sorted, result], axis=1)
    
    return combined

In [13]:
# Define the fields for which you want to calculate aggregate statistics
kicker_fields = [
    'fantasy_points', 
    'total_fg_made', 
    'total_fg_missed', 
    '50+_fg_made', 
    '40-49_fg_made', 
    '0-39_fg_made', 
    'missed_fg_50+', 
    'missed_fg_40-49', 
    'missed_fg_0-39', 
    'xp_attempt_19y', 
    'xp_made_19y', 
    'xp_attempt_33y', 
    'xp_made_33y'
]

# Apply the 'calc_agg_stats' function to each kicker's data
df_kicker_game_level_agg = df_kicker_game_level.groupby(
    ['kicker_player_name', 'kicker_player_id'], 
    group_keys=False
).apply(
    calc_agg_stats, 
    fields=kicker_fields
).reset_index(drop=True).round(2)


In [14]:
df_kicker_game_level_agg.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)


Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y,home,n_games_career,n_games_season,fantasy_points_mean_career,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_career,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_career,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_career,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_career,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_career,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_career,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_career,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_career,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5
16349,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,C.Dicker,00-0037224,9,2,0,0,0,2,0,0,0,0,0,3,3,True,37,8,9.03,9.5,9.44,10.0,1.95,2.12,2.11,2.2,0.14,0.25,0.22,0.4,0.32,0.5,0.44,0.6,0.59,0.75,0.67,0.8,1.03,0.88,1.0,0.8,0.08,0.12,0.11,0.2,0.05,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,1.88,2.0,2.0,2.0,1.5,1.67,1.6
16350,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,N.Folk,00-0025565,4,1,1,0,0,1,0,1,0,0,0,2,2,False,241,8,7.69,7.25,6.89,8.8,1.66,1.38,1.33,1.8,0.27,0.0,0.11,0.0,0.18,0.38,0.33,0.6,0.46,0.5,0.44,0.4,1.01,0.5,0.56,0.8,0.07,0.0,0.0,0.0,0.14,0.0,0.11,0.0,0.05,0.0,0.0,0.0,1.25,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.95,1.88,1.89,1.8,0.88,1.88,1.89,1.8
16346,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4
16347,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4
16348,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,T.Gill,00-0037097,0,0,0,0,0,0,0,0,0,0,0,0,0,True,16,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16344,2024_10_PIT_WAS,2024-11-10,10,2024,PIT,WAS,C.Boswell,00-0031136,4,0,0,0,0,0,0,0,0,0,0,4,4,False,150,8,8.34,12.75,11.78,11.8,1.81,2.88,2.56,2.6,0.21,0.12,0.11,0.0,0.24,0.75,0.67,0.4,0.57,0.62,0.56,0.6,1.0,1.5,1.33,1.6,0.05,0.12,0.11,0.0,0.12,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.17,2.0,2.22,2.6,2.06,2.0,2.22,2.6
16345,2024_10_PIT_WAS,2024-11-10,10,2024,WAS,PIT,Z.Gonzalez,00-0033862,11,2,0,0,2,0,0,0,0,0,0,3,3,True,63,0,6.9,8.17,8.17,7.0,1.44,1.67,1.67,1.4,0.3,0.08,0.08,0.0,0.17,0.25,0.25,0.0,0.46,0.83,0.83,1.0,0.81,0.58,0.58,0.4,0.1,0.08,0.08,0.0,0.16,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.13,1.92,1.92,1.8,2.02,1.83,1.83,1.8
16341,2024_10_PHI_DAL,2024-11-10,10,2024,DAL,PHI,B.Aubrey,00-0037692,7,2,0,0,1,1,0,0,0,0,0,0,0,True,26,8,10.77,12.12,11.56,9.6,2.19,2.5,2.44,2.0,0.08,0.12,0.11,0.2,0.73,1.12,1.0,0.8,0.38,0.75,0.78,0.4,1.08,0.62,0.67,0.8,0.04,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.58,1.62,1.44,1.6,2.42,1.62,1.44,1.6
16342,2024_10_PHI_DAL,2024-11-10,10,2024,PHI,DAL,B.Mann,00-0036313,0,0,0,0,0,0,0,0,0,0,0,0,0,False,29,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16343,2024_10_PHI_DAL,2024-11-10,10,2024,PHI,DAL,J.Elliott,00-0033787,10,2,0,0,0,2,0,0,0,0,0,4,4,False,132,8,7.45,6.75,7.11,7.2,1.47,1.25,1.33,1.2,0.2,0.25,0.22,0.2,0.2,0.0,0.0,0.0,0.43,0.62,0.56,1.0,0.83,0.62,0.78,0.2,0.1,0.25,0.22,0.2,0.05,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.48,2.38,2.56,2.6,2.37,2.38,2.56,2.6


### Defense stats

In [15]:
df_kicker_game_level_agg_by_game = df_kicker_game_level.groupby(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam'], as_index=False).agg({
    # Play level
    'fantasy_points': 'sum',
    'total_fg_made': 'sum',
    'total_fg_missed': 'sum',
    '50+_fg_made': 'sum',
    '40-49_fg_made': 'sum',
    '0-39_fg_made': 'sum',
    'missed_fg_0-39': 'sum',
    'missed_fg_40-49': 'sum',
    'missed_fg_50+': 'sum',
    'xp_attempt_19y': 'sum',
    'xp_made_19y': 'sum',
    'xp_attempt_33y': 'sum',
    'xp_made_33y': 'sum',
})

In [16]:
df_kicker_game_level_agg_by_game.head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y
0,1999_01_ARI_PHI,1999-09-12,1,1999,ARI,PHI,13,4,0,0,0,4,0,0,0,1,1,0,0
1,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,6,1,0,0,0,1,0,0,0,3,3,0,0
2,1999_01_BUF_IND,1999-09-12,1,1999,BUF,IND,6,2,0,0,0,2,0,0,0,0,0,0,0
3,1999_01_BUF_IND,1999-09-12,1,1999,IND,BUF,7,1,0,0,0,1,0,0,0,4,4,0,0
4,1999_01_CAR_NO,1999-09-12,1,1999,CAR,NO,6,1,1,1,0,0,0,0,1,1,1,0,0
5,1999_01_CAR_NO,1999-09-12,1,1999,NO,CAR,8,2,0,0,1,1,0,0,0,2,1,0,0
6,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,7,2,0,0,0,2,0,0,0,1,1,0,0
7,1999_01_CIN_TEN,1999-09-12,1,1999,TEN,CIN,12,2,0,1,0,1,0,0,0,4,4,0,0
8,1999_01_DAL_WAS,1999-09-12,1,1999,DAL,WAS,5,0,0,0,0,0,0,0,0,5,5,0,0
9,1999_01_DAL_WAS,1999-09-12,1,1999,WAS,DAL,10,2,0,0,1,1,0,0,0,3,3,0,0


In [17]:
# Group by 'defteam' and apply the 'calc_agg_stats' function
df_kicker_game_level_agg_by_def = df_kicker_game_level_agg_by_game.groupby(
    ['defteam'], 
    group_keys=False
).apply(
    calc_agg_stats, 
    fields=kicker_fields, 
    career=False 
).reset_index(drop=True).round(2)

In [18]:
df_kicker_game_level_agg_by_def.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y,n_games_season,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5
13704,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,9,2,0,0,0,2,0,0,0,0,0,3,3,8,9.75,9.67,9.2,1.88,1.89,1.6,0.12,0.11,0.2,0.5,0.44,0.6,0.38,0.33,0.2,1.0,1.11,0.8,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,2.78,3.0,2.75,2.78,3.0
13705,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,4,1,1,0,0,1,0,1,0,0,0,2,2,8,5.0,4.89,5.2,1.25,1.22,1.2,0.5,0.56,0.4,0.0,0.0,0.0,0.38,0.33,0.6,0.88,0.89,0.6,0.25,0.22,0.2,0.25,0.33,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.22,1.2,1.12,1.22,1.2
13702,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6
13703,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4
13700,2024_10_PIT_WAS,2024-11-10,10,2024,PIT,WAS,4,0,0,0,0,0,0,0,0,0,0,4,4,9,5.89,5.7,5.6,1.22,1.1,1.2,0.11,0.1,0.0,0.22,0.2,0.2,0.11,0.1,0.2,0.89,0.8,0.8,0.0,0.0,0.0,0.11,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.89,2.1,1.4,1.78,2.0,1.4
13701,2024_10_PIT_WAS,2024-11-10,10,2024,WAS,PIT,11,2,0,0,2,0,0,0,0,0,0,3,3,8,6.12,6.67,7.0,1.5,1.56,1.6,0.12,0.11,0.2,0.12,0.11,0.2,0.25,0.44,0.4,1.12,1.0,1.0,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.33,1.4,1.12,1.33,1.4
13698,2024_10_PHI_DAL,2024-11-10,10,2024,DAL,PHI,7,2,0,0,1,1,0,0,0,0,0,0,0,8,7.25,7.22,6.4,1.88,1.89,1.4,0.38,0.33,0.4,0.12,0.11,0.2,0.25,0.33,0.4,1.5,1.44,0.8,0.25,0.22,0.4,0.12,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.38,1.22,1.6,1.25,1.11,1.4
13699,2024_10_PHI_DAL,2024-11-10,10,2024,PHI,DAL,10,2,0,0,0,2,0,0,0,0,0,4,4,8,10.12,10.11,12.6,1.88,1.89,2.6,0.12,0.11,0.0,0.38,0.33,0.4,0.88,0.78,1.4,0.62,0.78,0.8,0.0,0.0,0.0,0.12,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.12,3.22,2.6,3.0,3.11,2.6
13696,2024_10_NYJ_ARI,2024-11-10,10,2024,ARI,NYJ,7,1,0,0,0,1,0,0,0,0,0,4,4,9,9.67,9.4,9.2,2.11,2.0,2.0,0.44,0.4,0.6,0.56,0.5,0.6,0.78,0.7,0.4,0.78,0.8,1.0,0.22,0.2,0.2,0.11,0.1,0.2,0.11,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,1.89,2.1,2.4,1.78,2.0,2.2
13697,2024_10_NYJ_ARI,2024-11-10,10,2024,NYJ,ARI,7,2,0,0,1,1,0,0,0,0,0,0,0,9,10.33,10.0,13.2,2.22,2.2,3.0,0.11,0.1,0.2,0.56,0.5,1.0,0.44,0.5,0.6,1.22,1.2,1.4,0.0,0.0,0.0,0.11,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.33,2.1,1.8,2.22,2.0,1.8


In [19]:
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 22].sort_values(by=['game_date', 'game_id'], ascending=False).head(10)
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 1].sort_values(by=['game_date', 'game_id'], ascending=False).head(10)
# df_kicker_game_level_agg_by_def.index.get_level_values('week').unique()
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 1].sort_values(by=['game_date', 'game_id'], ascending=True).head(10)


## Combine data

In [20]:
# Merge kicker aggregate stats with defensive team stats
df_combined = pd.merge(
    df_kicker_game_level_agg,
    df_kicker_game_level_agg_by_def,
    on=['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam'],
    how='left',
    suffixes=('_k', '_def')
)

# Merge with stadium data
df_combined = pd.merge(
    df_combined,
    df_kicker_game_level_stadium,
    on=['game_id', 'game_date', 'week', 'season'],
    how='left'
)

# Merge with original kicker game level data to include 'fantasy_points'
df_combined = pd.merge(
    df_combined,
    df_kicker_game_level[['game_id', 'fantasy_points']],
    on='game_id',
    how='left'
)

# Drop redundant columns if necessary
columns_to_drop = ['home_team']
df_combined.drop(columns=columns_to_drop, inplace=True, errors='ignore')


# Reset index
df_combined.reset_index(drop=True, inplace=True)

# Log completion message
print("DataFrames merged successfully into 'df_combined'.")

DataFrames merged successfully into 'df_combined'.


In [21]:
df_combined.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,fantasy_points_k,total_fg_made_k,total_fg_missed_k,50+_fg_made_k,40-49_fg_made_k,0-39_fg_made_k,missed_fg_0-39_k,missed_fg_40-49_k,missed_fg_50+_k,xp_attempt_19y_k,xp_made_19y_k,xp_attempt_33y_k,xp_made_33y_k,home,n_games_career,n_games_season_k,fantasy_points_mean_career,fantasy_points_mean_season_k,fantasy_points_mean_prior_season_k,fantasy_points_mean_last5_k,total_fg_made_mean_career,total_fg_made_mean_season_k,total_fg_made_mean_prior_season_k,total_fg_made_mean_last5_k,total_fg_missed_mean_career,total_fg_missed_mean_season_k,total_fg_missed_mean_prior_season_k,total_fg_missed_mean_last5_k,50+_fg_made_mean_career,50+_fg_made_mean_season_k,50+_fg_made_mean_prior_season_k,50+_fg_made_mean_last5_k,40-49_fg_made_mean_career,40-49_fg_made_mean_season_k,40-49_fg_made_mean_prior_season_k,40-49_fg_made_mean_last5_k,0-39_fg_made_mean_career,0-39_fg_made_mean_season_k,0-39_fg_made_mean_prior_season_k,0-39_fg_made_mean_last5_k,missed_fg_50+_mean_career,missed_fg_50+_mean_season_k,missed_fg_50+_mean_prior_season_k,missed_fg_50+_mean_last5_k,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season_k,missed_fg_40-49_mean_prior_season_k,missed_fg_40-49_mean_last5_k,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season_k,missed_fg_0-39_mean_prior_season_k,missed_fg_0-39_mean_last5_k,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season_k,xp_attempt_19y_mean_prior_season_k,xp_attempt_19y_mean_last5_k,xp_made_19y_mean_career,xp_made_19y_mean_season_k,xp_made_19y_mean_prior_season_k,xp_made_19y_mean_last5_k,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season_k,xp_attempt_33y_mean_prior_season_k,xp_attempt_33y_mean_last5_k,xp_made_33y_mean_career,xp_made_33y_mean_season_k,xp_made_33y_mean_prior_season_k,xp_made_33y_mean_last5_k,fantasy_points_def,total_fg_made_def,total_fg_missed_def,50+_fg_made_def,40-49_fg_made_def,0-39_fg_made_def,missed_fg_0-39_def,missed_fg_40-49_def,missed_fg_50+_def,xp_attempt_19y_def,xp_made_19y_def,xp_attempt_33y_def,xp_made_33y_def,n_games_season_def,fantasy_points_mean_season_def,fantasy_points_mean_prior_season_def,fantasy_points_mean_last5_def,total_fg_made_mean_season_def,total_fg_made_mean_prior_season_def,total_fg_made_mean_last5_def,total_fg_missed_mean_season_def,total_fg_missed_mean_prior_season_def,total_fg_missed_mean_last5_def,50+_fg_made_mean_season_def,50+_fg_made_mean_prior_season_def,50+_fg_made_mean_last5_def,40-49_fg_made_mean_season_def,40-49_fg_made_mean_prior_season_def,40-49_fg_made_mean_last5_def,0-39_fg_made_mean_season_def,0-39_fg_made_mean_prior_season_def,0-39_fg_made_mean_last5_def,missed_fg_50+_mean_season_def,missed_fg_50+_mean_prior_season_def,missed_fg_50+_mean_last5_def,missed_fg_40-49_mean_season_def,missed_fg_40-49_mean_prior_season_def,missed_fg_40-49_mean_last5_def,missed_fg_0-39_mean_season_def,missed_fg_0-39_mean_prior_season_def,missed_fg_0-39_mean_last5_def,xp_attempt_19y_mean_season_def,xp_attempt_19y_mean_prior_season_def,xp_attempt_19y_mean_last5_def,xp_made_19y_mean_season_def,xp_made_19y_mean_prior_season_def,xp_made_19y_mean_last5_def,xp_attempt_33y_mean_season_def,xp_attempt_33y_mean_prior_season_def,xp_attempt_33y_mean_last5_def,xp_made_33y_mean_season_def,xp_made_33y_mean_prior_season_def,xp_made_33y_mean_last5_def,stadium,roof,temp,wind,fantasy_points
41203,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,C.Dicker,00-0037224,9,2,0,0,0,2,0,0,0,0,0,3,3,True,37,8,9.03,9.5,9.44,10.0,1.95,2.12,2.11,2.2,0.14,0.25,0.22,0.4,0.32,0.5,0.44,0.6,0.59,0.75,0.67,0.8,1.03,0.88,1.0,0.8,0.08,0.12,0.11,0.2,0.05,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,1.88,2.0,2.0,2.0,1.5,1.67,1.6,9,2,0,0,0,2,0,0,0,0,0,3,3,8,9.75,9.67,9.2,1.88,1.89,1.6,0.12,0.11,0.2,0.5,0.44,0.6,0.38,0.33,0.2,1.0,1.11,0.8,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,2.78,3.0,2.75,2.78,3.0,SoFi Stadium,dome,,,9
41204,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,C.Dicker,00-0037224,9,2,0,0,0,2,0,0,0,0,0,3,3,True,37,8,9.03,9.5,9.44,10.0,1.95,2.12,2.11,2.2,0.14,0.25,0.22,0.4,0.32,0.5,0.44,0.6,0.59,0.75,0.67,0.8,1.03,0.88,1.0,0.8,0.08,0.12,0.11,0.2,0.05,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,1.88,2.0,2.0,2.0,1.5,1.67,1.6,9,2,0,0,0,2,0,0,0,0,0,3,3,8,9.75,9.67,9.2,1.88,1.89,1.6,0.12,0.11,0.2,0.5,0.44,0.6,0.38,0.33,0.2,1.0,1.11,0.8,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,2.78,3.0,2.75,2.78,3.0,SoFi Stadium,dome,,,4
41205,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,N.Folk,00-0025565,4,1,1,0,0,1,0,1,0,0,0,2,2,False,241,8,7.69,7.25,6.89,8.8,1.66,1.38,1.33,1.8,0.27,0.0,0.11,0.0,0.18,0.38,0.33,0.6,0.46,0.5,0.44,0.4,1.01,0.5,0.56,0.8,0.07,0.0,0.0,0.0,0.14,0.0,0.11,0.0,0.05,0.0,0.0,0.0,1.25,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.95,1.88,1.89,1.8,0.88,1.88,1.89,1.8,4,1,1,0,0,1,0,1,0,0,0,2,2,8,5.0,4.89,5.2,1.25,1.22,1.2,0.5,0.56,0.4,0.0,0.0,0.0,0.38,0.33,0.6,0.88,0.89,0.6,0.25,0.22,0.2,0.25,0.33,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.22,1.2,1.12,1.22,1.2,SoFi Stadium,dome,,,9
41206,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,N.Folk,00-0025565,4,1,1,0,0,1,0,1,0,0,0,2,2,False,241,8,7.69,7.25,6.89,8.8,1.66,1.38,1.33,1.8,0.27,0.0,0.11,0.0,0.18,0.38,0.33,0.6,0.46,0.5,0.44,0.4,1.01,0.5,0.56,0.8,0.07,0.0,0.0,0.0,0.14,0.0,0.11,0.0,0.05,0.0,0.0,0.0,1.25,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.95,1.88,1.89,1.8,0.88,1.88,1.89,1.8,4,1,1,0,0,1,0,1,0,0,0,2,2,8,5.0,4.89,5.2,1.25,1.22,1.2,0.5,0.56,0.4,0.0,0.0,0.0,0.38,0.33,0.6,0.88,0.89,0.6,0.25,0.22,0.2,0.25,0.33,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.22,1.2,1.12,1.22,1.2,SoFi Stadium,dome,,,4
41194,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,,,10
41195,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,,,8
41196,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,,,0
41197,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,,,10
41198,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,,,8
41199,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,,,0


## Impute misising values

In [22]:
# Calculate the percentage of null values in each column
null_percentages = df_combined.isnull().mean() * 100

# Sort the percentages in descending order for better readability
null_percentages = null_percentages.sort_values(ascending=False)

# Format the output to display percentages with two decimal places
null_percentages_formatted = null_percentages.apply(lambda x: f"{x:.2f}%")

# Print the results
print("Percentage of Null Values in Each Column:")
print(null_percentages_formatted)

Percentage of Null Values in Each Column:
wind                        38.38%
temp                        38.38%
roof                         8.81%
stadium                      8.81%
xp_made_19y_mean_last5_k     2.55%
                             ...  
fantasy_points_def           0.00%
game_date                    0.00%
n_games_season_k             0.00%
n_games_career               0.00%
fantasy_points               0.00%
Length: 134, dtype: object


In [23]:
# Ensure 'temp' and 'wind' are numeric
df_combined['temp'] = pd.to_numeric(df_combined['temp'], errors='coerce')
df_combined['wind'] = pd.to_numeric(df_combined['wind'], errors='coerce')

# Calculate mean 'temp' and 'wind' by stadium
temp_wind_means = (
    df_combined.groupby('stadium')[['temp', 'wind']]
    .mean()
    .reset_index()
)

# Merge the mean values back to the original DataFrame
df_combined = pd.merge(
    df_combined,
    temp_wind_means,
    on='stadium',
    how='left',
    suffixes=('', '_mean')
)

# Impute missing 'temp' and 'wind' with the group mean values
df_combined['temp'].fillna(df_combined['temp_mean'], inplace=True)
df_combined['wind'].fillna(df_combined['wind_mean'], inplace=True)

# If any missing 'temp' or 'wind' values remain, fill them with the overall mean
df_combined['temp'].fillna(df_combined['temp'].mean(), inplace=True)
df_combined['wind'].fillna(df_combined['wind'].mean(), inplace=True)

# Drop the temporary mean columns
df_combined.drop(columns=['temp_mean', 'wind_mean'], inplace=True)

# For the rest of the columns, fill missing values with 0
# Exclude 'temp' and 'wind' as they've already been imputed
columns_to_fill = df_combined.columns.difference(['temp', 'wind'])
df_combined[columns_to_fill] = df_combined[columns_to_fill].fillna(0)

# Check if any missing values remain
remaining_nulls = df_combined.isnull().sum()
if remaining_nulls.sum() > 0:
    print("Remaining null values after imputation:")
    print(remaining_nulls[remaining_nulls > 0])
else:
    print("All missing values have been imputed.")

All missing values have been imputed.


In [24]:
df_combined.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,fantasy_points_k,total_fg_made_k,total_fg_missed_k,50+_fg_made_k,40-49_fg_made_k,0-39_fg_made_k,missed_fg_0-39_k,missed_fg_40-49_k,missed_fg_50+_k,xp_attempt_19y_k,xp_made_19y_k,xp_attempt_33y_k,xp_made_33y_k,home,n_games_career,n_games_season_k,fantasy_points_mean_career,fantasy_points_mean_season_k,fantasy_points_mean_prior_season_k,fantasy_points_mean_last5_k,total_fg_made_mean_career,total_fg_made_mean_season_k,total_fg_made_mean_prior_season_k,total_fg_made_mean_last5_k,total_fg_missed_mean_career,total_fg_missed_mean_season_k,total_fg_missed_mean_prior_season_k,total_fg_missed_mean_last5_k,50+_fg_made_mean_career,50+_fg_made_mean_season_k,50+_fg_made_mean_prior_season_k,50+_fg_made_mean_last5_k,40-49_fg_made_mean_career,40-49_fg_made_mean_season_k,40-49_fg_made_mean_prior_season_k,40-49_fg_made_mean_last5_k,0-39_fg_made_mean_career,0-39_fg_made_mean_season_k,0-39_fg_made_mean_prior_season_k,0-39_fg_made_mean_last5_k,missed_fg_50+_mean_career,missed_fg_50+_mean_season_k,missed_fg_50+_mean_prior_season_k,missed_fg_50+_mean_last5_k,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season_k,missed_fg_40-49_mean_prior_season_k,missed_fg_40-49_mean_last5_k,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season_k,missed_fg_0-39_mean_prior_season_k,missed_fg_0-39_mean_last5_k,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season_k,xp_attempt_19y_mean_prior_season_k,xp_attempt_19y_mean_last5_k,xp_made_19y_mean_career,xp_made_19y_mean_season_k,xp_made_19y_mean_prior_season_k,xp_made_19y_mean_last5_k,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season_k,xp_attempt_33y_mean_prior_season_k,xp_attempt_33y_mean_last5_k,xp_made_33y_mean_career,xp_made_33y_mean_season_k,xp_made_33y_mean_prior_season_k,xp_made_33y_mean_last5_k,fantasy_points_def,total_fg_made_def,total_fg_missed_def,50+_fg_made_def,40-49_fg_made_def,0-39_fg_made_def,missed_fg_0-39_def,missed_fg_40-49_def,missed_fg_50+_def,xp_attempt_19y_def,xp_made_19y_def,xp_attempt_33y_def,xp_made_33y_def,n_games_season_def,fantasy_points_mean_season_def,fantasy_points_mean_prior_season_def,fantasy_points_mean_last5_def,total_fg_made_mean_season_def,total_fg_made_mean_prior_season_def,total_fg_made_mean_last5_def,total_fg_missed_mean_season_def,total_fg_missed_mean_prior_season_def,total_fg_missed_mean_last5_def,50+_fg_made_mean_season_def,50+_fg_made_mean_prior_season_def,50+_fg_made_mean_last5_def,40-49_fg_made_mean_season_def,40-49_fg_made_mean_prior_season_def,40-49_fg_made_mean_last5_def,0-39_fg_made_mean_season_def,0-39_fg_made_mean_prior_season_def,0-39_fg_made_mean_last5_def,missed_fg_50+_mean_season_def,missed_fg_50+_mean_prior_season_def,missed_fg_50+_mean_last5_def,missed_fg_40-49_mean_season_def,missed_fg_40-49_mean_prior_season_def,missed_fg_40-49_mean_last5_def,missed_fg_0-39_mean_season_def,missed_fg_0-39_mean_prior_season_def,missed_fg_0-39_mean_last5_def,xp_attempt_19y_mean_season_def,xp_attempt_19y_mean_prior_season_def,xp_attempt_19y_mean_last5_def,xp_made_19y_mean_season_def,xp_made_19y_mean_prior_season_def,xp_made_19y_mean_last5_def,xp_attempt_33y_mean_season_def,xp_attempt_33y_mean_prior_season_def,xp_attempt_33y_mean_last5_def,xp_made_33y_mean_season_def,xp_made_33y_mean_prior_season_def,xp_made_33y_mean_last5_def,stadium,roof,temp,wind,fantasy_points
41203,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,C.Dicker,00-0037224,9,2,0,0,0,2,0,0,0,0,0,3,3,True,37,8,9.03,9.5,9.44,10.0,1.95,2.12,2.11,2.2,0.14,0.25,0.22,0.4,0.32,0.5,0.44,0.6,0.59,0.75,0.67,0.8,1.03,0.88,1.0,0.8,0.08,0.12,0.11,0.2,0.05,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,1.88,2.0,2.0,2.0,1.5,1.67,1.6,9,2,0,0,0,2,0,0,0,0,0,3,3,8,9.75,9.67,9.2,1.88,1.89,1.6,0.12,0.11,0.2,0.5,0.44,0.6,0.38,0.33,0.2,1.0,1.11,0.8,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,2.78,3.0,2.75,2.78,3.0,SoFi Stadium,dome,58.591282,8.367558,9
41204,2024_10_TEN_LAC,2024-11-10,10,2024,LAC,TEN,C.Dicker,00-0037224,9,2,0,0,0,2,0,0,0,0,0,3,3,True,37,8,9.03,9.5,9.44,10.0,1.95,2.12,2.11,2.2,0.14,0.25,0.22,0.4,0.32,0.5,0.44,0.6,0.59,0.75,0.67,0.8,1.03,0.88,1.0,0.8,0.08,0.12,0.11,0.2,0.05,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,1.88,2.0,2.0,2.0,1.5,1.67,1.6,9,2,0,0,0,2,0,0,0,0,0,3,3,8,9.75,9.67,9.2,1.88,1.89,1.6,0.12,0.11,0.2,0.5,0.44,0.6,0.38,0.33,0.2,1.0,1.11,0.8,0.12,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,2.78,3.0,2.75,2.78,3.0,SoFi Stadium,dome,58.591282,8.367558,4
41205,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,N.Folk,00-0025565,4,1,1,0,0,1,0,1,0,0,0,2,2,False,241,8,7.69,7.25,6.89,8.8,1.66,1.38,1.33,1.8,0.27,0.0,0.11,0.0,0.18,0.38,0.33,0.6,0.46,0.5,0.44,0.4,1.01,0.5,0.56,0.8,0.07,0.0,0.0,0.0,0.14,0.0,0.11,0.0,0.05,0.0,0.0,0.0,1.25,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.95,1.88,1.89,1.8,0.88,1.88,1.89,1.8,4,1,1,0,0,1,0,1,0,0,0,2,2,8,5.0,4.89,5.2,1.25,1.22,1.2,0.5,0.56,0.4,0.0,0.0,0.0,0.38,0.33,0.6,0.88,0.89,0.6,0.25,0.22,0.2,0.25,0.33,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.22,1.2,1.12,1.22,1.2,SoFi Stadium,dome,58.591282,8.367558,9
41206,2024_10_TEN_LAC,2024-11-10,10,2024,TEN,LAC,N.Folk,00-0025565,4,1,1,0,0,1,0,1,0,0,0,2,2,False,241,8,7.69,7.25,6.89,8.8,1.66,1.38,1.33,1.8,0.27,0.0,0.11,0.0,0.18,0.38,0.33,0.6,0.46,0.5,0.44,0.4,1.01,0.5,0.56,0.8,0.07,0.0,0.0,0.0,0.14,0.0,0.11,0.0,0.05,0.0,0.0,0.0,1.25,0.0,0.0,0.0,1.25,0.0,0.0,0.0,0.95,1.88,1.89,1.8,0.88,1.88,1.89,1.8,4,1,1,0,0,1,0,1,0,0,0,2,2,8,5.0,4.89,5.2,1.25,1.22,1.2,0.5,0.56,0.4,0.0,0.0,0.0,0.38,0.33,0.6,0.88,0.89,0.6,0.25,0.22,0.2,0.25,0.33,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.12,1.22,1.2,1.12,1.22,1.2,SoFi Stadium,dome,58.591282,8.367558,4
41194,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,77.15612,7.685007,10
41195,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,77.15612,7.685007,8
41196,2024_10_SF_TB,2024-11-10,10,2024,SF,TB,J.Moody,00-0038562,10,3,3,0,1,2,0,2,1,0,0,2,2,False,25,5,8.68,11.6,11.33,11.6,1.6,2.6,2.67,2.6,0.24,0.2,0.67,0.2,0.28,0.4,0.33,0.4,0.32,0.6,0.67,0.6,1.0,1.6,1.67,1.6,0.08,0.2,0.33,0.2,0.12,0.0,0.33,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.28,2.4,2.33,2.4,3.2,2.4,2.33,2.4,10,3,3,0,1,2,0,2,1,0,0,2,2,9,9.0,9.1,10.4,1.78,1.9,1.8,0.44,0.7,0.4,0.33,0.3,0.6,0.56,0.6,0.6,0.89,1.0,0.6,0.11,0.2,0.0,0.33,0.5,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.78,2.7,3.6,2.78,2.7,3.6,Raymond James Stadium,outdoors,77.15612,7.685007,0
41197,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,77.15612,7.685007,10
41198,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,77.15612,7.685007,8
41199,2024_10_SF_TB,2024-11-10,10,2024,TB,SF,C.McLaughlin,00-0035358,8,2,0,0,0,2,0,0,0,0,0,2,2,True,75,9,7.57,9.11,9.0,9.0,1.52,1.56,1.6,1.4,0.23,0.11,0.1,0.2,0.41,0.67,0.6,0.6,0.32,0.11,0.1,0.2,0.79,0.78,0.9,0.6,0.07,0.11,0.1,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.08,3.11,3.0,3.4,2.04,3.0,2.9,3.4,8,2,0,0,0,2,0,0,0,0,0,2,2,8,7.38,7.44,7.6,1.5,1.56,1.4,0.0,0.0,0.0,0.25,0.22,0.4,0.12,0.11,0.2,1.12,1.22,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.22,2.4,2.25,2.22,2.4,Raymond James Stadium,outdoors,77.15612,7.685007,0


# Feature Selection

In [25]:
from nfl_model import NFLModel

In [26]:

def get_dummy_variables(df, drop_first=True, dummy_na=False):
    """
    Converts non-numerical columns in a DataFrame to dummy variables.

    Parameters:
    - df: pandas DataFrame
        The input DataFrame containing the data.
    - drop_first: bool, default=False
        Whether to drop the first level of categorical variables to avoid the dummy variable trap.
    - dummy_na: bool, default=False
        Add a column to indicate NaNs, if False NaNs are ignored.

    Returns:
    - df_dummies: pandas DataFrame
        The DataFrame with non-numeric columns converted to dummy variables.
    """
    # Identify non-numeric columns
    non_numeric_cols = df.select_dtypes(exclude=['number', 'bool']).columns.tolist()

    # If there are no non-numeric columns, return the original DataFrame
    if not non_numeric_cols:
        print("No non-numerical columns to convert.")
        return df.copy()

    # Convert categorical variables to dummy variables
    df_dummies = pd.get_dummies(df, columns=non_numeric_cols, drop_first=drop_first, dummy_na=dummy_na)

    return df_dummies


In [27]:
columns_to_include = df_combined.columns.difference(['game_id', 'game_date', 'kicker_player_name'])
final_df = df_combined[columns_to_include].copy()
final_df['kicker_player_id'] = final_df['kicker_player_id'].astype('category')
final_df = get_dummy_variables(final_df)

y_var = 'fantasy_points'

In [28]:
final_df.columns

Index(['0-39_fg_made_def', '0-39_fg_made_k', '0-39_fg_made_mean_career',
       '0-39_fg_made_mean_last5_def', '0-39_fg_made_mean_last5_k',
       '0-39_fg_made_mean_prior_season_def',
       '0-39_fg_made_mean_prior_season_k', '0-39_fg_made_mean_season_def',
       '0-39_fg_made_mean_season_k', '40-49_fg_made_def',
       ...
       'stadium_The Coliseum', 'stadium_The Georgia Dome',
       'stadium_The Meadowlands', 'stadium_Tiger Stadium',
       'stadium_Tottenham Hotspur Stadium', 'stadium_Twickenham Stadium',
       'stadium_U.S. Bank Stadium', 'stadium_Veterans',
       'stadium_Veterans Stadium', 'stadium_Wembley Stadium'],
      dtype='object', length=641)

In [29]:
# Initialize the model
model = NFLModel(data=final_df, target_variable=y_var)

# Preprocess data
model.preprocess_data()

# Perform feature selection
model.feature_selection()

# Evaluate models
model.evaluate_models()

# Get and print the results
results_df = model.get_results()
print(results_df)

Data preprocessing completed.
Lasso selected features: ['40-49_fg_made_def', '40-49_fg_made_k', '50+_fg_made_def', '50+_fg_made_mean_career', '50+_fg_made_mean_prior_season_k', 'fantasy_points_def', 'fantasy_points_k', 'missed_fg_0-39_def', 'missed_fg_40-49_def', 'missed_fg_50+_mean_career', 'missed_fg_50+_mean_prior_season_k', 'season', 'total_fg_made_def', 'total_fg_missed_def', 'defteam_ATL', 'defteam_DET', 'defteam_IND', 'defteam_TB', 'kicker_player_id_00-0004147', 'kicker_player_id_00-0004811', 'kicker_player_id_00-0007622', 'kicker_player_id_00-0009028', 'kicker_player_id_00-0015784', 'kicker_player_id_00-0020578', 'posteam_ATL', 'posteam_IND', 'posteam_LV', 'posteam_TB', 'roof_outdoors', 'stadium_AT&T Stadium', 'stadium_Raymond James Stadium']
Elastic Net selected features: ['0-39_fg_made_mean_prior_season_k', '40-49_fg_made_def', '40-49_fg_made_k', '50+_fg_made_def', '50+_fg_made_k', '50+_fg_made_mean_career', '50+_fg_made_mean_prior_season_k', 'fantasy_points_def', 'fantasy_po

In [30]:
rf_model = model.train_evaluate_rf_all_features()

Random Forest with all features evaluated. MAE: 4.8022, MSE: 34.9129, R2: -0.4632


# To-do list
- Reinclude prior season stats (once fixing bug)
    - n games in prior season doesn't calc correctly, which makes me distrust the prior season aggregation.
- (Get offensive stats)