In [108]:
import pandas as pd
import numpy as np
import nfl_data_py as nfl
import os

pd.set_option('display.max_columns', None)

# Scoring rules
https://www.espn.com/fantasy/football/ffl/story?page=fflrulesstandardscoring

Kicker:
- 5pt - 50+ yd FG
- 4pt - 40-49 yd FG
- 3pt - 0-39 yd FG
- 2pt - 2pt conversion
- 1pt - XP
- -2pt - missed FG (0-39 yd)
- -1pt - missed FG (40-49 yd)



# Read data

In [109]:
# try:
#     # Create the data directory if it doesn't exist
#     if not os.path.exists('data'):
#         os.makedirs('data')

#     # Check if the Feather file exists
#     if not os.path.exists('data/pbp_1999_2024.feather'):
#         print("Downloading play-by-play data...")
#         # Fetch data from the source
#         df_pbp = pd.DataFrame(nfl.import_pbp_data([2024, 2023, 2022, 2021, 2020, 2019, 2018, 2017, 
#                                                 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 
#                                                 2008, 2007, 2006, 2005, 2004, 2003, 2002, 2001, 
#                                                 2000, 1999]))
#         # Save the DataFrame to a Feather file
#         df_pbp.to_feather("data/pbp_1999_2024.feather")
#         print("Data download complete. File saved to 'data/pbp_1999_2024.feather'.")
#     else:
#         print("Loading play-by-play data from local Feather file...")
#         # Read the data from the local Feather file
#         df_pbp = pd.read_feather('data/pbp_1999_2024.feather')
#         print("Data successfully loaded from 'data/pbp_1999_2024.feather'.")

# except Exception as e:
#     print(f"An error occurred: {e}")

In [110]:
import os

def load_data(file_name, download_fn, years=list(range(1999, 2025))):
    try:
        # Create the data directory if it doesn't exist
        if not os.path.exists('data'):
            os.makedirs('data')

        # Check if the Feather file exists
        if not os.path.exists(f'data/{file_name}'):
            print(f"Downloading {file_name}...")
            # Fetch data from the source
            df = pd.DataFrame(download_fn(years))
            # Save the DataFrame to a Feather file
            df.to_csv(f"data/{file_name}")
            print(f"Data download complete. File saved to 'data/{file_name}'.")
        else:
            print(f"Loading {file_name} data from local CSV file...")
            # Read the data from the local Feather file
            df = pd.read_csv(f'data/{file_name}')
            print(f"Data successfully loaded from 'data/{file_name}'.")

        return df

    except Exception as e:
        print(f"An error occurred: {e}")

In [111]:
pbp_df = load_data("pbp_1999_2024.csv", nfl.import_pbp_data)
roster_data = load_data("roster_1999_2024.csv", nfl.import_seasonal_rosters)
schedules_df = load_data("schedules_1999_2024.csv", nfl.import_schedules)
weekly_df = load_data("weekly_1999_2024.csv", nfl.import_weekly_data)
injuries_df = load_data("injuries_2009_2024.csv", nfl.import_injuries, years=list(range(2009, 2025)))

Loading pbp_1999_2024.csv data from local CSV file...


KeyboardInterrupt: 

In [5]:
# injuries_df = load_data("injuries_1999_2024.csv", nfl.import_injuries, years=list(range(2009, 2025)))

In [6]:
df_pbp = pbp_df

In [7]:
df_pbp.head()

Unnamed: 0.1,Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,old_game_id_x,nflverse_game_id,old_game_id_y,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type
0,0,1.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,,,,,,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,,0.0,1.0,,0.0,15:00,BUF 35,0.0,,GAME,,,0.0,0.0,,0.0,0.0,0.0,,,,,,,,,,,3.0,3.0,,,,,,,,0.0,0.0,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.770222,-0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,-0.0,-0.0,0.0,,,0.250386,0.749614,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,2024,,,1.0,1.0,First down,1.0,"9/8/24, 13:03:02",,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,GAME_START,0.0,,,,1.0,Touchdown,,,,,,,,,,,,,,,,,,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,0.0,,,,,,,0.0,0.0,,0.0,0.0,,,,,,,,,,,0.0,0.0,-0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,40.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,BUF,35.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,,0.0,15:00,BUF 35,0.0,70.0,2-T.Bass kicks 65 yards from BUF 35 to end zon...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004568,0.143585,0.002325,0.275986,0.215226,0.003265,0.355046,0.0,0.0,0.770222,0.257819,-0.257819,0.257819,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,0.000338,0.003076,-0.003076,0.566454,0.433546,0.250386,0.749614,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,T.Bass,00-0036162,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,40.0,"9/8/24, 13:03:02",2024-09-08T17:03:02.957Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,KICK_OFF,1.0,,2024-09-08T17:03:06.833Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.257819,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2,61.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,ARI,70.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,15:00,ARI 30,10.0,70.0,(15:00) 6-J.Conner up the middle to ARI 33 for...,run,3.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,middle,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004585,0.140649,0.003159,0.26066,0.209893,0.003228,0.377827,0.0,0.0,1.028042,-0.200602,-0.057217,0.057217,0.200602,-0.200602,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433546,0.566454,0.566454,0.433546,-0.00727,-0.003599,0.003599,0.573724,0.426276,0.253462,0.746538,0.00727,-0.00727,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,00-0033553,J.Conner,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0034376,Ta.Johnson,BUF,,,,,,,,,,1.0,00-0037254,T.Bernard,BUF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,61.0,"9/8/24, 13:03:02",2024-09-08T17:03:40.463Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,RUSH,0.0,,2024-09-08T17:03:43.660Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,0.0,,,J.Conner,6.0,,,0.0,1.0,0.0,0.0,1.0,,00-0033553,,J.Conner,6.0,00-0033553,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,-0.200602,,,,,,0.456761,-45.6761,,,,,,,,,,,,,,,,,,,,
3,3,83.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,ARI,67.0,2024-09-08,867.0,1767.0,3567.0,Half1,0.0,1.0,0.0,1.0,2.0,0.0,14:27,ARI 33,7.0,70.0,(14:27) 1-K.Murray pass short left to 6-J.Conn...,pass,22.0,0.0,0.0,1.0,0.0,0.0,0.0,short,left,-3.0,25.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004817,0.142571,0.002405,0.270632,0.220688,0.003977,0.35491,0.0,0.0,0.827439,2.028874,-2.086091,2.086091,0.200602,-0.200602,-2.028874,2.028874,-1.083852,3.112726,-1.083852,3.112726,1.083852,-1.083852,-3.112726,3.112726,1.083852,-1.083852,-3.112726,3.112726,0.426276,0.573724,0.573724,0.426276,0.053842,0.051482,-0.051482,0.519882,0.480118,0.249864,0.750136,0.00727,-0.00727,-0.053842,0.053842,0.0,0.053842,0.0,0.053842,0.0,0.0,-0.053842,0.053842,0.0,0.0,-0.053842,0.053842,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,00-0035228,K.Murray,22.0,00-0033553,J.Conner,22.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0033551,R.Douglas,BUF,,,,,,,,,,1.0,00-0036888,D.Hamlin,BUF,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,0.868591,13.140899,1.0,1.0,First down,83.0,"9/8/24, 13:03:02",2024-09-08T17:04:12.743Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,PASS,0.0,,2024-09-08T17:04:20.843Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,K.Murray,1.0,,,J.Conner,6.0,1.0,0.0,1.0,0.0,1.0,00-0035228,,00-0033553,K.Murray,1.0,00-0035228,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,2.028874,1.345418,9.321221,8.0,0.509778,0.363807,0.576656,42.33443,,,,,,,,,,,,,,,,,,,,
4,4,108.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,BUF,45.0,2024-09-08,823.0,1723.0,3523.0,Half1,0.0,1.0,0.0,1.0,1.0,0.0,13:43,BUF 45,10.0,70.0,(13:43) (Shotgun) 1-K.Murray pass short middle...,pass,9.0,1.0,0.0,1.0,0.0,0.0,0.0,short,middle,2.0,7.0,,,,,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004927,0.080094,0.001128,0.144228,0.314572,0.004124,0.450927,0.0,0.0,2.856313,0.754242,-2.840333,2.840333,0.200602,-0.200602,-2.783116,2.783116,-0.567367,1.321609,-0.567367,1.321609,1.651219,-1.651219,-4.434335,4.434335,1.651219,-1.651219,-4.434335,4.434335,0.480118,0.519882,0.519882,0.480118,0.054495,0.018542,-0.018542,0.465387,0.534613,0.301346,0.698654,0.00727,-0.00727,-0.108337,0.108337,0.0,0.054495,0.0,0.054495,0.0,0.0,-0.108337,0.108337,0.0,0.0,-0.108337,0.108337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,00-0035228,K.Murray,9.0,00-0033553,J.Conner,9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,00-0035663,T.Rapp,BUF,00-0038557,Do.Williams,BUF,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,0.807773,19.222683,2.0,1.0,First down,108.0,"9/8/24, 13:03:02",2024-09-08T17:04:57.067Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,PASS,0.0,,2024-09-08T17:05:03.770Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,K.Murray,1.0,,,J.Conner,6.0,1.0,0.0,0.0,0.0,1.0,00-0035228,,00-0033553,K.Murray,1.0,00-0035228,J.Conner,00-0033553,J.Conner,00-0033553,0.0,0.0,0.754242,0.882798,5.78356,4.0,0.668478,0.25514,0.426443,57.35569,,,,,,,,,,,,,,,,,,,,


## Filter to kicker plays

In [8]:
# Filter rows where 'kicker_player_name' is not null and the play type is relevant
df_kicker_pbp = df_pbp.loc[
    df_pbp['kicker_player_name'].notnull() & 
    df_pbp['play_type'].isin(['field_goal', 'extra_point', 'kickoff'])
].copy() 

# Ensure 'posteam' and 'defteam' columns exist
if 'posteam' in df_kicker_pbp.columns and 'defteam' in df_kicker_pbp.columns:
    # Create a mask for kickoff plays
    kickoff_mask = df_kicker_pbp['play_type'] == 'kickoff'

    # Log the number of kickoff plays being processed
    print(f"Swapping 'posteam' and 'defteam' for {kickoff_mask.sum()} kickoff plays...")

    # Swap values using the mask
    df_kicker_pbp.loc[kickoff_mask, ['posteam', 'defteam']] = (
        df_kicker_pbp.loc[kickoff_mask, ['defteam', 'posteam']].values
    )

    print("Swap complete.")
else:
    print("Error: Required columns 'posteam' and 'defteam' are missing from the DataFrame.")

# Convert 'game_date' column to datetime format, with error handling
try:
    df_kicker_pbp['game_date'] = pd.to_datetime(df_kicker_pbp['game_date'], errors='coerce')
    if df_kicker_pbp['game_date'].isnull().any():
        print("Warning: Some 'game_date' entries could not be converted and have been set to NaT.")
except Exception as e:
    print(f"An error occurred while converting 'game_date' to datetime: {e}")

# Final log for confirmation
print("Data processing for 'df_kicker_pbp' completed.")

Swapping 'posteam' and 'defteam' for 68111 kickoff plays...
Swap complete.
Data processing for 'df_kicker_pbp' completed.


In [9]:
df_kicker_pbp.head()

Unnamed: 0.1,Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe,old_game_id_x,nflverse_game_id,old_game_id_y,possession_team,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense,ngs_air_yards,time_to_throw,was_pressure,route,defense_man_zone_type,defense_coverage_type
1,1,40.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,BUF,away,ARI,BUF,35.0,2024-09-08,900.0,1800.0,3600.0,Half1,0.0,1.0,0.0,1.0,,0.0,15:00,BUF 35,0.0,70.0,2-T.Bass kicks 65 yards from BUF 35 to end zon...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.004568,0.143585,0.002325,0.275986,0.215226,0.003265,0.355046,0.0,0.0,0.770222,0.257819,-0.257819,0.257819,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.433208,0.566792,0.566792,0.433208,0.000338,0.003076,-0.003076,0.566454,0.433546,0.250386,0.749614,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,T.Bass,00-0036162,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,1.0,1.0,First down,40.0,"9/8/24, 13:03:02",2024-09-08T17:03:02.957Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,KICK_OFF,1.0,,2024-09-08T17:03:06.833Z,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.257819,,,,,,,,,,,,,,,,,,,,,,,,,,,
15,15,407.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,BUF,15.0,2024-09-08,467.0,1367.0,3167.0,Half1,0.0,1.0,1.0,1.0,,0.0,07:47,BUF 15,0.0,70.0,"5-M.Prater extra point is GOOD, Center-46-A.Br...",extra_point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,33.0,good,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,7.0,6.0,0.0,6.0,7.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.932427,0.0,0.932427,0.067573,-6.29735,6.29735,-0.25641,0.25641,-5.715548,5.715548,,,0.0,0.0,-2.603761,2.603761,-4.56486,4.56486,-3.119902,3.119902,-3.581094,3.581094,0.613047,0.386953,0.386953,0.613047,0.001292,0.000491,-0.000491,0.38566,0.61434,0.445916,0.554084,0.010752,-0.010752,-0.190253,0.190253,,,0.0,0.0,-0.046479,0.046479,-0.174029,0.174029,-0.046479,0.046479,-0.158647,0.158647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,5.0,1.0,Touchdown,407.0,"9/8/24, 13:03:02",2024-09-08T17:16:35Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,XP_KICK,1.0,,,,1.0,Touchdown,2024-09-08T17:03:02.957Z,13.0,7:13,5.0,1.0,1.0,1.0,1.0,10.0,KICKOFF,TOUCHDOWN,15:00,07:47,ARI 30,BUF 5,40.0,407.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.067573,,,,,,,,,,,,,,,,,,,,,,,,,,,
16,16,422.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,home,BUF,ARI,35.0,2024-09-08,467.0,1367.0,3167.0,Half1,0.0,2.0,0.0,1.0,,0.0,07:47,ARI 35,0.0,16.0,5-M.Prater kicks 63 yards from ARI 35 to BUF 2...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,63.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,7.0,0.0,7.0,-7.0,0.0,7.0,-7.0,0.015049,0.134336,0.001989,0.233148,0.213073,0.003136,0.39927,0.0,0.0,1.401358,1.882355,-4.414996,4.414996,-0.25641,0.25641,-5.715548,5.715548,,,0.0,0.0,-2.603761,2.603761,-4.56486,4.56486,-3.119902,3.119902,-3.581094,3.581094,0.38566,0.61434,0.38566,0.61434,0.048635,0.072297,0.072297,0.434295,0.565705,0.553593,0.553593,0.010752,-0.010752,-0.190253,0.190253,,,0.0,0.0,-0.046479,0.046479,-0.174029,0.174029,-0.046479,0.046479,-0.158647,0.158647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,B.Codrington,00-0039333,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,00-0038984,K.Clark,ARI,,,,,,,,,,1.0,00-0039807,M.Melton,ARI,,,,,,,,,,,,,,,,,,,,,,,,,,,,BUF,53.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,6.0,1.0,First down,422.0,"9/8/24, 13:03:02",2024-09-08T17:19:41.557Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,KICK_OFF,1.0,,2024-09-08T17:19:53.017Z,,2.0,Turnover,2024-09-08T17:19:41.557Z,4.0,2:23,2.0,1.0,0.0,1.0,1.0,0.0,KICKOFF,FUMBLE,07:47,05:24,ARI 45,ARI 19,422.0,528.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,1.0,0.0,1.882355,,,,,,,,,,,,,,,,,,,,,,,,,,,
31,31,823.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,away,BUF,BUF,11.0,2024-09-08,900.0,900.0,2700.0,Half1,0.0,3.0,1.0,2.0,4.0,0.0,15:00,BUF 11,4.0,60.0,"(15:00) 5-M.Prater 29 yard field goal is GOOD,...",field_goal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,made,29.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,10.0,7.0,0.0,7.0,10.0,0.0,10.0,0.002077,0.007349,0.000103,0.01276,0.966858,0.000108,0.010744,0.0,0.0,2.864426,0.135574,-10.698709,10.698709,0.494553,-0.494553,-12.61465,12.61465,,,0.0,0.0,2.935215,-2.935215,-10.958288,10.958288,2.419075,-2.419075,-9.974523,9.974523,0.721678,0.278322,0.278322,0.721678,-0.002172,0.010197,-0.010197,0.280494,0.719506,0.557039,0.442961,-0.001494,0.001494,-0.33398,0.33398,,,0.0,0.0,-0.043326,0.043326,-0.167168,0.167168,-0.043326,0.043326,-0.151786,0.151786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,12.0,0.0,Field goal,823.0,"9/8/24, 13:03:02",2024-09-08T17:34:16Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,FIELD_GOAL,0.0,,,,3.0,Field goal,2024-09-08T17:25:39.110Z,10.0,5:27,3.0,1.0,1.0,1.0,2.0,0.0,FUMBLE,FIELD_GOAL,05:24,14:57,ARI 29,BUF 11,565.0,823.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.135574,,,,,,,,,,,,,,,,,,,,,,,,,,,
32,32,847.0,2024_01_ARI_BUF,2024091000.0,BUF,ARI,REG,1,ARI,home,BUF,ARI,35.0,2024-09-08,897.0,897.0,2697.0,Half1,0.0,4.0,0.0,2.0,,0.0,14:57,ARI 35,0.0,51.0,5-M.Prater kicks 65 yards from ARI 35 to end z...,kickoff,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,65.0,,,3.0,3.0,0.0,,,,,3.0,3.0,0.0,10.0,0.0,10.0,-10.0,0.0,10.0,-10.0,0.050659,0.138172,0.002267,0.225536,0.211489,0.002728,0.369149,0.0,0.0,1.226168,0.311864,-10.386845,10.386845,0.494553,-0.494553,-12.61465,12.61465,,,0.0,0.0,2.935215,-2.935215,-10.958288,10.958288,2.419075,-2.419075,-9.974523,9.974523,0.280494,0.719506,0.280494,0.719506,,,0.009553,,,0.432765,0.432765,-0.001494,0.001494,-0.33398,0.33398,,,0.0,0.0,-0.043326,0.043326,-0.167168,0.167168,-0.043326,0.043326,-0.151786,0.151786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,M.Prater,00-0023853,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BUF,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,,,2024,,,13.0,1.0,First down,847.0,"9/8/24, 13:03:02",2024-09-08T17:36:15.657Z,Highmark Stadium,"Clouds and sun with wind Temp: 61° F, Humidity...",7d40236a-1312-11ef-afd1-646009f18b2e,0.0,0.0,KICK_OFF,1.0,,2024-09-08T17:36:20.873Z,,4.0,Field goal,2024-09-08T17:36:15.657Z,9.0,5:10,4.0,1.0,1.0,2.0,2.0,-9.0,KICKOFF,FIELD_GOAL,14:57,09:47,BUF 30,ARI 19,847.0,1120.0,28,34,Home,6,62,6.5,46.0,0,outdoors,a_turf,61.0,20.0,Sean McDermott,Jonathan Gannon,BUF00,New Era Field,0.0,1.0,,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,,0.0,0.0,0.311864,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Calculate kicker fantasy points fields

In [10]:
# Set extra point distance based on year and create flags for XP attempts and success
df_kicker_pbp['xp_distance'] = np.where(df_kicker_pbp['game_date'].dt.year < 2015, 19, 33)
df_kicker_pbp["xp_attempt"] = df_kicker_pbp["extra_point_result"].notnull()
df_kicker_pbp["xp_made"] = df_kicker_pbp["extra_point_result"] == "good"

# Create flags for successful and attempted XPs by distance
df_kicker_pbp["xp_made_33y"] = df_kicker_pbp["xp_made"] & (df_kicker_pbp["xp_distance"] == 33)
df_kicker_pbp["xp_made_19y"] = df_kicker_pbp["xp_made"] & (df_kicker_pbp["xp_distance"] == 19)
df_kicker_pbp["xp_attempt_33y"] = df_kicker_pbp["xp_attempt"] & (df_kicker_pbp["xp_distance"] == 33)
df_kicker_pbp["xp_attempt_19y"] = df_kicker_pbp["xp_attempt"] & (df_kicker_pbp["xp_distance"] == 19)

# Field goal (FG) results and distance-based flags
df_kicker_pbp["50+_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"] >= 50)
df_kicker_pbp["40-49_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"].between(40, 49))
df_kicker_pbp["0-39_fg_made"] = (df_kicker_pbp["field_goal_result"] == "made") & (df_kicker_pbp["kick_distance"] < 40)

# Missed FG flags by distance
df_kicker_pbp["missed_fg_0-39"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"] < 40)
df_kicker_pbp["missed_fg_40-49"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"].between(40, 49))
df_kicker_pbp["missed_fg_50+"] = (df_kicker_pbp["field_goal_result"] == "missed") & (df_kicker_pbp["kick_distance"] >= 50)

# Total FGs made and missed
df_kicker_pbp["total_fg_made"] = df_kicker_pbp[["50+_fg_made", "40-49_fg_made", "0-39_fg_made"]].sum(axis=1)
df_kicker_pbp["total_fg_missed"] = df_kicker_pbp[["missed_fg_0-39", "missed_fg_40-49", "missed_fg_50+"]].sum(axis=1)

# Calculate fantasy points based on custom scoring system
df_kicker_pbp["fantasy_points"] = (
    df_kicker_pbp["50+_fg_made"] * 5 +
    df_kicker_pbp["40-49_fg_made"] * 4 +
    df_kicker_pbp["0-39_fg_made"] * 3 +
    df_kicker_pbp["xp_made"] * 1 +
    df_kicker_pbp["missed_fg_0-39"] * -2 +
    df_kicker_pbp["missed_fg_40-49"] * -1
)

# Optional: Drop any rows with NaN values in the calculated columns
# df_kicker_pbp.dropna(subset=["fantasy_points"], inplace=True)

# Log completion message
print("Kicker play-by-play data processing completed successfully.")

Kicker play-by-play data processing completed successfully.


# Aggregate to game level

## List of kicker stats to aggregate

Given:
- game_id
- home_team
- away_team
- week
- posteam
- defteam
<!-- - yardline_100 -->
- game_date
<!-- - play_type -->
- field_goal_result
- kick_distance
- extra_point_result
- field_goal_attempt
- kicker_player_name
- kicker_player_id
- stadium
- weather
- roof
- surface
- temp
- wind

Calculated:
- fantasy_points
- 50+_fg_made
- 40-49_fg_made
- 0-39_fg_made
- missed_fg_0-39
- missed_fg_40-49
- missed_fg_50+
- xp_attempt
- xp_made
- xp_distance
<!-- - xp_made_15yl
- xp_made_2yl -->

In [11]:
df_kicker_pbp[df_kicker_pbp.game_id == '2024_10_TEN_LAC'][['temp', 'wind', 'surface', 'stadium']]

Unnamed: 0,temp,wind,surface,stadium


In [12]:
df_kicker_game_level_stadium = df_kicker_pbp.groupby(['game_id', 'game_date', 'week', 'season', 'stadium'], as_index=False).agg({
    # Game level
    'home_team': 'first',
    'roof': 'first',
    'temp': 'first',
    'wind': 'first',
}).sort_values(by=['game_date'], ascending=False)

In [13]:
df_kicker_game_level_stadium.head(10)

Unnamed: 0,game_id,game_date,week,season,stadium,home_team,roof,temp,wind
6320,2024_09_TB_KC,2024-11-04,9,2024,GEHA Field at Arrowhead Stadium,KC,outdoors,,
6321,2024_09_WAS_NYG,2024-11-03,9,2024,MetLife Stadium,NYG,outdoors,52.0,6.0
6308,2024_09_DAL_ATL,2024-11-03,9,2024,Mercedes-Benz Stadium,ATL,closed,,
6319,2024_09_NO_CAR,2024-11-03,9,2024,Bank of America Stadium,CAR,outdoors,66.0,7.0
6318,2024_09_NE_TEN,2024-11-03,9,2024,Nissan Stadium,TEN,outdoors,75.0,10.0
6317,2024_09_MIA_BUF,2024-11-03,9,2024,Highmark Stadium,BUF,outdoors,54.0,9.0
6316,2024_09_LV_CIN,2024-11-03,9,2024,Paycor Stadium,CIN,outdoors,67.0,7.0
6315,2024_09_LA_SEA,2024-11-03,9,2024,Lumen Field,SEA,outdoors,52.0,4.0
6314,2024_09_LAC_CLE,2024-11-03,9,2024,Huntington Bank Field,CLE,outdoors,57.0,10.0
6313,2024_09_JAX_PHI,2024-11-03,9,2024,Lincoln Financial Field,PHI,outdoors,57.0,6.0


In [14]:
df_kicker_game_level = df_kicker_pbp.groupby(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam', 'kicker_player_name', 'kicker_player_id'], as_index=False).agg({
    # Game level
    'home_team': 'first',
    'away_team': 'first',

    # Play level
    'fantasy_points': 'sum',
    'total_fg_made': 'sum',
    'total_fg_missed': 'sum',
    '50+_fg_made': 'sum',
    '40-49_fg_made': 'sum',
    '0-39_fg_made': 'sum',
    'missed_fg_0-39': 'sum',
    'missed_fg_40-49': 'sum',
    'missed_fg_50+': 'sum',
    'xp_attempt_19y': 'sum',
    'xp_made_19y': 'sum',
    'xp_attempt_33y': 'sum',
    'xp_made_33y': 'sum',
})

df_kicker_game_level["home"] = df_kicker_game_level["home_team"] == df_kicker_game_level["posteam"]
df_kicker_game_level.drop(columns=['home_team', 'away_team'], inplace=True)

In [15]:
df_kicker_game_level.head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y,home
0,1999_01_ARI_PHI,1999-09-12,1,1999,ARI,PHI,C.Jacke,00-0008080,13,4,0,0,0,4,0,0,0,1,1,0,0,False
1,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,D.Akers,00-0000108,0,0,0,0,0,0,0,0,0,0,0,0,0,True
2,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,N.Johnson,00-0008593,6,1,0,0,0,1,0,0,0,3,3,0,0,True
3,1999_01_BUF_IND,1999-09-12,1,1999,BUF,IND,S.Christie,00-0002975,6,2,0,0,0,2,0,0,0,0,0,0,0,False
4,1999_01_BUF_IND,1999-09-12,1,1999,IND,BUF,M.Vanderjagt,00-0016830,7,1,0,0,0,1,0,0,0,4,4,0,0,True
5,1999_01_CAR_NO,1999-09-12,1,1999,CAR,NO,J.Kasay,00-0009028,6,1,1,1,0,0,0,0,1,1,1,0,0,False
6,1999_01_CAR_NO,1999-09-12,1,1999,NO,CAR,D.Brien,00-0001759,8,2,0,0,1,1,0,0,0,2,1,0,0,True
7,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,D.Pelfrey,00-0012742,7,2,0,0,0,2,0,0,0,1,1,0,0,False
8,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,W.Brice,00-0001752,0,0,0,0,0,0,0,0,0,0,0,0,0,False
9,1999_01_CIN_TEN,1999-09-12,1,1999,TEN,CIN,A.Del Greco,00-0004147,12,2,0,1,0,1,0,0,0,4,4,0,0,True


## Predictors to calculate
Aggregate each point-earning stat at the following intervals:
- Past 5 games
- Season
- Career (kicker only)

Calculate the above for kickers, offenses, and defenses as well.

Calculate # games played by player in career (ie sample size).
Calculate # games in season (not quite same as week # -- injuries, byes)

Get offensive stats too (e.g. yards per game, points per game)

All stats should be controlled for time.

In [165]:
def calc_agg_stats(group, fields, career=True):
    """
    Calculate aggregate statistics for each player over their career and season,
    including prior season means, rolling averages, and cumulative counts.

    Parameters:
    - group: DataFrame grouped by player or other identifier.
    - fields: List of fields to calculate statistics on.
    - career: Boolean indicating whether to calculate career-level stats.

    Returns:
    - DataFrame with calculated aggregate statistics.
    """
    # Ensure 'game_date' is datetime
    group['game_date'] = pd.to_datetime(group['game_date'], errors='coerce')
    
    # Sort the group chronologically
    group_sorted = group.sort_values('game_date')
    
    # Initialize the result DataFrame
    result = pd.DataFrame(index=group_sorted.index)
    
    # Calculate cumulative game counts
    if career:
        # Career game count (number of games up to current point, excluding current game)
        result['n_games_career'] = np.arange(len(group_sorted))
    
    # Season game count
    result['n_games_season'] = group_sorted.groupby('season').cumcount()
    
    # Loop over each field to calculate aggregate stats
    for field in fields:
        if career:
            # Career mean up to the previous game (excluding current game)
            result[f'{field}_mean_career'] = (
                group_sorted[field]
                .expanding()
                .mean()
                .shift()
            )
        
        # Season mean up to the previous game (excluding current game)
        result[f'{field}_mean_season'] = (
            group_sorted.groupby('season')[field]
            .transform(lambda x: x.expanding().mean().shift())
            # .expanding()
            # .mean()
            # .shift()
            # .reset_index(level=0, drop=True)
        )
        
        # Prior season mean (mean of the entire previous season)
        result[f'{field}_mean_prior_season'] = (
            group_sorted.groupby('season')[field]
            .mean()
            .shift()
            .reindex(group_sorted['season'])
            .values
        )
        
        # Rolling mean for the last 5 games up to the previous game (excluding current game)
        result[f'{field}_mean_last5'] = (
            group_sorted[field]
            .rolling(window=5, min_periods=1)
            .mean()
            .shift()
        )
    
    # Combine the result with the original group_sorted DataFrame
    combined = pd.concat([group_sorted, result], axis=1)
    
    return combined

In [166]:
# Define the fields for which you want to calculate aggregate statistics
kicker_fields = [
    'fantasy_points', 
    'total_fg_made', 
    'total_fg_missed', 
    '50+_fg_made', 
    '40-49_fg_made', 
    '0-39_fg_made', 
    'missed_fg_50+', 
    'missed_fg_40-49', 
    'missed_fg_0-39', 
    'xp_attempt_19y', 
    'xp_made_19y', 
    'xp_attempt_33y', 
    'xp_made_33y'
]

# Apply the 'calc_agg_stats' function to each kicker's data
df_kicker_game_level_agg = df_kicker_game_level.groupby(
    ['kicker_player_name', 'kicker_player_id'], 
    group_keys=False
).apply(
    calc_agg_stats, 
    fields=kicker_fields
).reset_index(drop=True).round(2)
df_kicker_game_level_agg = df_kicker_game_level_agg.drop(columns=df_kicker_game_level_agg.loc[:, "fantasy_points":"home"].columns)


In [191]:
df_kicker_game_level_agg.columns

Index(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam',
       'kicker_player_name', 'kicker_player_id', 'n_games_career',
       'n_games_season', 'fantasy_points_mean_career',
       'fantasy_points_mean_season', 'fantasy_points_mean_prior_season',
       'fantasy_points_mean_last5', 'total_fg_made_mean_career',
       'total_fg_made_mean_season', 'total_fg_made_mean_prior_season',
       'total_fg_made_mean_last5', 'total_fg_missed_mean_career',
       'total_fg_missed_mean_season', 'total_fg_missed_mean_prior_season',
       'total_fg_missed_mean_last5', '50+_fg_made_mean_career',
       '50+_fg_made_mean_season', '50+_fg_made_mean_prior_season',
       '50+_fg_made_mean_last5', '40-49_fg_made_mean_career',
       '40-49_fg_made_mean_season', '40-49_fg_made_mean_prior_season',
       '40-49_fg_made_mean_last5', '0-39_fg_made_mean_career',
       '0-39_fg_made_mean_season', '0-39_fg_made_mean_prior_season',
       '0-39_fg_made_mean_last5', 'missed_fg_50+_mean_career'

In [168]:
df_kicker_game_level_agg.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)


Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,n_games_career,n_games_season,fantasy_points_mean_career,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_career,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_career,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_career,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_career,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_career,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_career,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_career,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_career,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5
16319,2024_09_TB_KC,2024-11-04,9,2024,KC,TB,H.Butker,00-0033303,133,7,9.2,9.29,9.33,9.2,1.83,2.0,2.1,2.0,0.21,0.29,0.1,0.4,0.27,0.29,0.33,0.2,0.42,0.29,0.38,0.4,1.14,1.43,1.38,1.4,0.1,0.29,0.0,0.4,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.57,2.19,2.6,2.92,2.43,2.19,2.4
16320,2024_09_TB_KC,2024-11-04,9,2024,TB,KC,C.McLaughlin,00-0035358,74,8,7.58,9.38,8.26,9.8,1.53,1.62,1.74,1.6,0.23,0.12,0.05,0.2,0.42,0.75,0.42,0.8,0.31,0.0,0.21,0.0,0.8,0.88,1.11,0.8,0.07,0.12,0.05,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.07,3.12,2.0,3.6,2.03,3.0,2.0,3.4
16321,2024_09_TB_KC,2024-11-04,9,2024,TB,KC,T.Gill,00-0037097,15,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16322,2024_09_WAS_NYG,2024-11-03,9,2024,NYG,WAS,J.McAtamney,00-0039934,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16323,2024_09_WAS_NYG,2024-11-03,9,2024,WAS,NYG,A.Seibert,00-0035145,38,7,7.97,13.86,4.0,13.2,1.79,3.29,1.0,3.0,0.32,0.14,0.0,0.2,0.08,0.14,0.0,0.2,0.63,1.0,0.0,1.0,1.08,2.14,1.0,1.8,0.13,0.14,0.0,0.2,0.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.16,2.71,1.0,2.8,2.0,2.71,1.0,2.8
16317,2024_09_NO_CAR,2024-11-03,9,2024,CAR,NO,E.Pineiro,00-0034173,61,8,7.02,5.38,7.13,4.4,1.61,1.12,1.67,0.8,0.2,0.12,0.27,0.2,0.16,0.0,0.33,0.0,0.49,0.5,0.47,0.4,0.95,0.62,0.87,0.4,0.03,0.0,0.13,0.0,0.13,0.12,0.13,0.2,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.7,1.62,1.33,1.8,1.57,1.62,1.13,1.8
16318,2024_09_NO_CAR,2024-11-03,9,2024,NO,CAR,B.Grupe,00-0038905,25,8,8.36,8.12,8.47,5.8,1.72,1.62,1.76,1.2,0.32,0.12,0.41,0.2,0.32,0.25,0.35,0.0,0.52,0.62,0.47,0.8,0.88,0.75,0.94,0.4,0.12,0.0,0.18,0.0,0.12,0.12,0.12,0.2,0.08,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4,2.5,2.35,1.8,2.32,2.25,2.35,1.6
16315,2024_09_NE_TEN,2024-11-03,9,2024,NE,TEN,J.Slye,00-0035192,86,8,7.1,6.0,5.88,5.2,1.53,1.38,1.12,1.0,0.27,0.12,0.24,0.2,0.26,0.25,0.18,0.4,0.43,0.25,0.41,0.2,0.85,0.88,0.53,0.4,0.13,0.0,0.12,0.0,0.1,0.0,0.12,0.0,0.03,0.12,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.94,1.38,2.06,1.6,1.73,1.38,1.88,1.6
16316,2024_09_NE_TEN,2024-11-03,9,2024,TEN,NE,N.Folk,00-0025565,240,7,7.69,7.14,7.71,7.6,1.65,1.29,1.71,1.4,0.27,0.0,0.06,0.0,0.18,0.43,0.29,0.6,0.47,0.57,0.35,0.4,1.0,0.29,1.06,0.4,0.08,0.0,0.06,0.0,0.14,0.0,0.0,0.0,0.05,0.0,0.0,0.0,1.26,0.0,0.0,0.0,1.26,0.0,0.0,0.0,0.95,1.86,1.76,1.8,0.88,1.86,1.65,1.8
16313,2024_09_MIA_BUF,2024-11-03,9,2024,BUF,MIA,T.Bass,00-0036162,83,8,8.31,7.88,7.47,6.8,1.58,1.5,1.37,1.4,0.28,0.38,0.32,0.4,0.19,0.12,0.21,0.2,0.36,0.25,0.26,0.0,1.02,1.12,0.89,1.2,0.08,0.0,0.11,0.0,0.12,0.38,0.16,0.4,0.07,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.22,3.5,3.0,2.8,3.1,3.25,2.95,2.6


In [169]:
df_kicker_game_level_agg[df_kicker_game_level_agg['kicker_player_id'] == '00-0033303'].sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,n_games_career,n_games_season,fantasy_points_mean_career,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_career,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_career,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_career,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_career,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_career,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_career,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_career,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_career,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5
16319,2024_09_TB_KC,2024-11-04,9,2024,KC,TB,H.Butker,00-0033303,133,7,9.2,9.29,9.33,9.2,1.83,2.0,2.1,2.0,0.21,0.29,0.1,0.4,0.27,0.29,0.33,0.2,0.42,0.29,0.38,0.4,1.14,1.43,1.38,1.4,0.1,0.29,0.0,0.4,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.57,2.19,2.6,2.92,2.43,2.19,2.4
16272,2024_08_KC_LV,2024-10-27,8,2024,KC,LV,H.Butker,00-0033303,132,6,9.2,9.17,9.33,9.2,1.83,2.0,2.1,2.0,0.21,0.33,0.1,0.4,0.27,0.33,0.33,0.4,0.42,0.17,0.38,0.2,1.14,1.5,1.38,1.4,0.1,0.33,0.0,0.4,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.5,2.19,2.4,2.92,2.33,2.19,2.2
16229,2024_07_KC_SF,2024-10-20,7,2024,KC,SF,H.Butker,00-0033303,131,5,9.24,10.2,9.33,10.2,1.84,2.4,2.1,2.4,0.21,0.4,0.1,0.4,0.27,0.4,0.33,0.4,0.42,0.2,0.38,0.2,1.15,1.8,1.38,1.8,0.1,0.4,0.0,0.4,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.2,2.19,2.2,2.91,2.0,2.19,2.0
16175,2024_05_NO_KC,2024-10-07,5,2024,KC,NO,H.Butker,00-0033303,130,4,9.2,9.25,9.33,10.4,1.82,2.0,2.1,2.4,0.21,0.25,0.1,0.2,0.28,0.5,0.33,0.6,0.42,0.25,0.38,0.2,1.12,1.25,1.38,1.6,0.09,0.25,0.0,0.2,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.25,2.19,2.0,2.92,2.0,2.19,1.8
16128,2024_04_KC_LAC,2024-09-29,4,2024,KC,LAC,H.Butker,00-0033303,129,3,9.23,10.67,9.33,10.8,1.83,2.33,2.1,2.4,0.2,0.0,0.1,0.0,0.28,0.67,0.33,0.8,0.43,0.33,0.38,0.2,1.12,1.33,1.38,1.4,0.09,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.09,2.33,2.19,2.0,2.92,2.0,2.19,1.8
16099,2024_03_KC_ATL,2024-09-22,3,2024,KC,ATL,H.Butker,00-0033303,128,2,9.2,9.5,9.33,10.2,1.82,2.0,2.1,2.2,0.2,0.0,0.1,0.0,0.27,0.5,0.33,0.6,0.42,0.0,0.38,0.2,1.12,1.5,1.38,1.4,0.09,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.1,2.5,2.19,2.2,2.94,2.5,2.19,2.2
16050,2024_02_CIN_KC,2024-09-15,2,2024,KC,CIN,H.Butker,00-0033303,127,1,9.2,9.0,9.33,11.0,1.82,2.0,2.1,2.6,0.2,0.0,0.1,0.0,0.27,0.0,0.33,0.4,0.43,0.0,0.38,0.2,1.13,2.0,1.38,2.0,0.09,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.11,3.0,2.19,2.2,2.94,3.0,2.19,2.2
16008,2024_01_BAL_KC,2024-09-05,1,2024,KC,BAL,H.Butker,00-0033303,126,0,9.2,,9.33,10.8,1.82,,2.1,2.6,0.21,,0.1,0.0,0.27,,0.33,0.4,0.43,,0.38,0.4,1.12,,1.38,1.8,0.09,,0.0,0.0,0.07,,0.0,0.0,0.05,,0.1,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,3.11,,2.19,1.8,2.94,,2.19,1.8
16003,2023_22_SF_KC,2024-02-11,22,2023,KC,SF,H.Butker,00-0033303,125,20,9.15,9.05,8.25,12.6,1.8,2.0,1.5,3.0,0.21,0.1,0.38,0.0,0.26,0.3,0.31,0.4,0.43,0.4,0.38,1.0,1.1,1.3,0.81,1.6,0.09,0.0,0.19,0.0,0.07,0.0,0.12,0.0,0.05,0.1,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.13,2.25,3.19,1.8,2.96,2.25,3.0,1.8
16002,2023_21_KC_BAL,2024-01-28,21,2023,KC,BAL,H.Butker,00-0033303,124,19,9.17,9.16,8.25,11.2,1.81,2.05,1.5,2.8,0.21,0.11,0.38,0.2,0.26,0.26,0.31,0.2,0.44,0.42,0.38,1.0,1.11,1.37,0.81,1.6,0.09,0.0,0.19,0.0,0.07,0.0,0.12,0.0,0.05,0.11,0.06,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.14,2.26,3.19,1.8,2.97,2.26,3.0,1.8


### Defense stats

In [170]:
df_kicker_game_level_agg_by_game = df_kicker_game_level.groupby(['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam'], as_index=False).agg({
    # Play level
    'fantasy_points': 'sum',
    'total_fg_made': 'sum',
    'total_fg_missed': 'sum',
    '50+_fg_made': 'sum',
    '40-49_fg_made': 'sum',
    '0-39_fg_made': 'sum',
    'missed_fg_0-39': 'sum',
    'missed_fg_40-49': 'sum',
    'missed_fg_50+': 'sum',
    'xp_attempt_19y': 'sum',
    'xp_made_19y': 'sum',
    'xp_attempt_33y': 'sum',
    'xp_made_33y': 'sum',
})

In [171]:
df_kicker_game_level_agg_by_game.head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,fantasy_points,total_fg_made,total_fg_missed,50+_fg_made,40-49_fg_made,0-39_fg_made,missed_fg_0-39,missed_fg_40-49,missed_fg_50+,xp_attempt_19y,xp_made_19y,xp_attempt_33y,xp_made_33y
0,1999_01_ARI_PHI,1999-09-12,1,1999,ARI,PHI,13,4,0,0,0,4,0,0,0,1,1,0,0
1,1999_01_ARI_PHI,1999-09-12,1,1999,PHI,ARI,6,1,0,0,0,1,0,0,0,3,3,0,0
2,1999_01_BUF_IND,1999-09-12,1,1999,BUF,IND,6,2,0,0,0,2,0,0,0,0,0,0,0
3,1999_01_BUF_IND,1999-09-12,1,1999,IND,BUF,7,1,0,0,0,1,0,0,0,4,4,0,0
4,1999_01_CAR_NO,1999-09-12,1,1999,CAR,NO,6,1,1,1,0,0,0,0,1,1,1,0,0
5,1999_01_CAR_NO,1999-09-12,1,1999,NO,CAR,8,2,0,0,1,1,0,0,0,2,1,0,0
6,1999_01_CIN_TEN,1999-09-12,1,1999,CIN,TEN,7,2,0,0,0,2,0,0,0,1,1,0,0
7,1999_01_CIN_TEN,1999-09-12,1,1999,TEN,CIN,12,2,0,1,0,1,0,0,0,4,4,0,0
8,1999_01_DAL_WAS,1999-09-12,1,1999,DAL,WAS,5,0,0,0,0,0,0,0,0,5,5,0,0
9,1999_01_DAL_WAS,1999-09-12,1,1999,WAS,DAL,10,2,0,0,1,1,0,0,0,3,3,0,0


In [172]:
# Group by 'defteam' and apply the 'calc_agg_stats' function
df_kicker_game_level_agg_by_def = df_kicker_game_level_agg_by_game.groupby(
    ['defteam'], 
    group_keys=False
).apply(
    calc_agg_stats, 
    fields=kicker_fields, 
    career=False 
).reset_index(drop=True).round(2)
df_kicker_game_level_agg_by_def = df_kicker_game_level_agg_by_def.drop(columns=df_kicker_game_level_agg_by_def.loc[:, "fantasy_points":"xp_made_33y"].columns)

In [173]:
df_kicker_game_level_agg_by_def.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,n_games_season,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5
13678,2024_09_TB_KC,2024-11-04,9,2024,KC,TB,8,9.25,7.05,9.4,1.88,1.63,1.6,0.5,0.37,0.4,0.38,0.16,0.6,0.5,0.26,0.4,1.0,1.21,0.6,0.12,0.37,0.0,0.38,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,1.58,3.4,2.75,1.58,3.4
13679,2024_09_TB_KC,2024-11-04,9,2024,TB,KC,7,8.0,5.95,6.4,1.71,1.29,1.2,0.29,0.19,0.2,0.71,0.14,0.8,0.14,0.38,0.0,0.86,0.76,0.4,0.29,0.05,0.2,0.0,0.1,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.71,1.67,1.6,1.29,1.62,1.2
13680,2024_09_WAS_NYG,2024-11-03,9,2024,NYG,WAS,8,6.12,10.47,5.2,1.25,2.06,1.0,0.12,0.18,0.0,0.25,0.41,0.2,0.12,0.59,0.2,0.88,1.06,0.6,0.0,0.0,0.0,0.12,0.12,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.18,1.6,1.88,3.12,1.6
13681,2024_09_WAS_NYG,2024-11-03,9,2024,WAS,NYG,8,8.88,7.29,8.8,2.0,1.47,1.8,0.25,0.35,0.2,0.12,0.06,0.2,0.5,0.47,0.6,1.38,0.94,1.0,0.25,0.24,0.2,0.0,0.06,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.12,2.71,2.4,2.12,2.47,2.4
13676,2024_09_NO_CAR,2024-11-03,9,2024,CAR,NO,8,11.62,7.71,14.0,2.5,1.76,3.0,0.25,0.29,0.2,0.5,0.29,0.6,0.88,0.35,0.8,1.12,1.12,1.6,0.25,0.06,0.2,0.0,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.38,1.71,3.2,2.25,1.71,3.0
13677,2024_09_NO_CAR,2024-11-03,9,2024,NO,CAR,8,10.75,7.65,10.4,2.0,1.47,2.0,0.12,0.06,0.0,0.38,0.29,0.2,0.62,0.12,0.4,1.0,1.06,1.4,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.62,2.76,3.8,3.38,2.65,3.6
13674,2024_09_NE_TEN,2024-11-03,9,2024,NE,TEN,7,10.14,10.88,10.4,2.0,2.71,2.0,0.0,0.0,0.0,0.43,0.35,0.4,0.43,0.35,0.4,1.14,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.86,1.76,3.2,2.86,1.71,3.2
13675,2024_09_NE_TEN,2024-11-03,9,2024,TEN,NE,8,8.38,8.18,8.8,1.88,1.82,2.0,0.5,0.35,0.6,0.25,0.47,0.2,0.5,0.18,0.6,1.12,1.18,1.2,0.0,0.18,0.0,0.5,0.12,0.6,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.38,1.88,2.6,2.25,1.82,2.4
13672,2024_09_MIA_BUF,2024-11-03,9,2024,BUF,MIA,7,10.0,6.78,11.2,2.0,1.22,2.4,0.43,0.17,0.4,1.0,0.17,1.2,0.29,0.22,0.2,0.71,0.83,1.0,0.14,0.11,0.2,0.14,0.06,0.0,0.14,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.14,2.67,1.8,2.14,2.61,1.8
13673,2024_09_MIA_BUF,2024-11-03,9,2024,MIA,BUF,8,6.38,7.21,6.8,1.38,1.53,1.4,0.25,0.05,0.4,0.25,0.21,0.4,0.25,0.68,0.2,0.88,0.63,0.8,0.0,0.0,0.0,0.12,0.05,0.2,0.12,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,1.88,1.63,2.2,1.88,1.58,2.2


In [174]:
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 22].sort_values(by=['game_date', 'game_id'], ascending=False).head(10)
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 1].sort_values(by=['game_date', 'game_id'], ascending=False).head(10)
# df_kicker_game_level_agg_by_def.index.get_level_values('week').unique()
# df_kicker_game_level_agg_by_def[df_kicker_game_level_agg_by_def.index.get_level_values('week') == 1].sort_values(by=['game_date', 'game_id'], ascending=True).head(10)


### Offense stats

In [175]:
# Group by 'defteam' and apply the 'calc_agg_stats' function
df_kicker_game_level_agg_by_pos = df_kicker_game_level_agg_by_game.groupby(
    ['posteam'], 
    group_keys=False
).apply(
    calc_agg_stats, 
    fields=kicker_fields, 
    career=False 
).reset_index(drop=True).round(2)
df_kicker_game_level_agg_by_pos = df_kicker_game_level_agg_by_pos.drop(columns=df_kicker_game_level_agg_by_pos.loc[:, "fantasy_points":"xp_made_33y"].columns)


## Injury data

In [244]:
injuries_df_clean = (injuries_df[['week', 'season', 'gsis_id', 'position', 'report_primary_injury', 'report_status']]
                     [injuries_df['position'] == 'K']
                     .fillna({'report_primary_injury': 'None', 'report_status': 'Active'})
                     )
injuries_df_clean.head(10)


Unnamed: 0,week,season,gsis_id,position,report_primary_injury,report_status
57,1.0,2009.0,00-0006800,K,right Knee,Probable
178,2.0,2009.0,00-0004811,K,left Hamstring,Probable
246,2.0,2009.0,00-0006800,K,right Knee,Probable
255,2.0,2009.0,00-0025580,K,Abdomen,Probable
410,3.0,2009.0,00-0004811,K,left Hamstring,Probable
465,3.0,2009.0,00-0004091,K,right Calf,Questionable
504,3.0,2009.0,00-0025580,K,Abdomen,Probable
676,3.0,2009.0,00-0020962,K,right Ankle,Questionable
723,4.0,2009.0,00-0004091,K,right Calf,Doubtful
782,4.0,2009.0,00-0016919,K,,Active


In [256]:
injuries_df_kicker = (
    pd.merge(
        df_kicker_game_level.loc[:, 'game_id':'kicker_player_id'],
        injuries_df_clean,
        left_on=['kicker_player_id', 'season', 'week'],
        right_on=['gsis_id', 'season', 'week'],
        how='left'
    ).fillna({'position': 'K', 'report_primary_injury': 'None', 'report_status': 'Active'})
    .drop(columns=['gsis_id'])
)[["game_id", "kicker_player_id", "report_primary_injury", "report_status"]]

injuries_df_kicker


Unnamed: 0,game_id,kicker_player_id,report_primary_injury,report_status
0,1999_01_ARI_PHI,00-0008080,,Active
1,1999_01_ARI_PHI,00-0000108,,Active
2,1999_01_ARI_PHI,00-0008593,,Active
3,1999_01_BUF_IND,00-0002975,,Active
4,1999_01_BUF_IND,00-0016830,,Active
...,...,...,...,...
16319,2024_09_TB_KC,00-0033303,,Active
16320,2024_09_TB_KC,00-0035358,,Active
16321,2024_09_TB_KC,00-0037097,,Active
16322,2024_09_WAS_NYG,00-0039934,,Active


## Combine data

In [343]:
# Merge kicker aggregate stats with defensive team stats
df_combined = pd.merge(
    df_kicker_game_level_agg,
    df_kicker_game_level_agg_by_def,
    on=['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam'],
    how='left',
    suffixes=('', '_def')
)

# # Merge with offensive team stats NOT USEFUL
# df_combined = pd.merge(
#     df_combined,
#     df_kicker_game_level_agg_by_pos,
#     on=['game_id', 'game_date', 'week', 'season', 'posteam', 'defteam'],
#     how='left',
#     suffixes=('', '_pos')
# )

# Merge with stadium data
df_combined = pd.merge(
    df_combined,
    df_kicker_game_level_stadium,
    on=['game_id', 'game_date', 'week', 'season'],
    how='left'
)

# Merge with injury data
df_combined = pd.merge(
    df_combined,
    injuries_df_kicker,
    on=['game_id', 'kicker_player_id'],
    how='left'
)

# Merge with original kicker game level data to include 'fantasy_points'
df_combined = pd.merge(
    df_combined,
    df_kicker_game_level[['game_id', 'fantasy_points', 'kicker_player_id']],
    on=['game_id', 'kicker_player_id'],
    how='left'
)

# Drop redundant columns if necessary
columns_to_drop = ['home_team']
df_combined.drop(columns=columns_to_drop, inplace=True, errors='ignore')


# Reset index
df_combined.reset_index(drop=True, inplace=True)

# Log completion message
print("DataFrames merged successfully into 'df_combined'.")

DataFrames merged successfully into 'df_combined'.


In [344]:
# # Check a specific player-game combination
# sample_game = df_combined['game_id'].iloc[0]
# sample_player = df_combined['kicker_player_id'].iloc[0]

# print("Number of rows for this player-game:", 
#       len(df_combined[(df_combined['game_id'] == sample_game) & 
#                      (df_combined['kicker_player_id'] == sample_player)]))

# # Display these rows to see what's different between them
# print("\nSample rows:")
# print(df_combined[(df_combined['game_id'] == sample_game) & 
#                  (df_combined['kicker_player_id'] == sample_player)])

In [345]:
# df_combined.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

In [346]:
df_combined[df_combined['kicker_player_id'] == '00-0033303'].head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,n_games_career,n_games_season,fantasy_points_mean_career,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_career,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_career,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_career,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_career,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_career,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_career,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_career,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_career,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5,n_games_season_def,fantasy_points_mean_season_def,fantasy_points_mean_prior_season_def,fantasy_points_mean_last5_def,total_fg_made_mean_season_def,total_fg_made_mean_prior_season_def,total_fg_made_mean_last5_def,total_fg_missed_mean_season_def,total_fg_missed_mean_prior_season_def,total_fg_missed_mean_last5_def,50+_fg_made_mean_season_def,50+_fg_made_mean_prior_season_def,50+_fg_made_mean_last5_def,40-49_fg_made_mean_season_def,40-49_fg_made_mean_prior_season_def,40-49_fg_made_mean_last5_def,0-39_fg_made_mean_season_def,0-39_fg_made_mean_prior_season_def,0-39_fg_made_mean_last5_def,missed_fg_50+_mean_season_def,missed_fg_50+_mean_prior_season_def,missed_fg_50+_mean_last5_def,missed_fg_40-49_mean_season_def,missed_fg_40-49_mean_prior_season_def,missed_fg_40-49_mean_last5_def,missed_fg_0-39_mean_season_def,missed_fg_0-39_mean_prior_season_def,missed_fg_0-39_mean_last5_def,xp_attempt_19y_mean_season_def,xp_attempt_19y_mean_prior_season_def,xp_attempt_19y_mean_last5_def,xp_made_19y_mean_season_def,xp_made_19y_mean_prior_season_def,xp_made_19y_mean_last5_def,xp_attempt_33y_mean_season_def,xp_attempt_33y_mean_prior_season_def,xp_attempt_33y_mean_last5_def,xp_made_33y_mean_season_def,xp_made_33y_mean_prior_season_def,xp_made_33y_mean_last5_def,stadium,roof,temp,wind,report_primary_injury,report_status,fantasy_points
11558,2017_04_WAS_KC,2017-10-02,4,2017,KC,WAS,H.Butker,00-0033303,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,8.67,7.38,7.4,2.0,1.5,1.6,0.0,0.31,0.0,0.33,0.06,0.2,0.67,0.5,0.6,1.0,0.94,0.8,0.0,0.12,0.0,0.0,0.06,0.0,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.67,2.69,1.8,1.33,2.56,1.6,GEHA Field at Arrowhead Stadium,outdoors,82.0,14.0,,Active,11
11574,2017_05_KC_HOU,2017-10-08,5,2017,KC,HOU,H.Butker,00-0033303,1,1,11.0,11.0,,11.0,3.0,3.0,,3.0,1.0,1.0,,1.0,0.0,0.0,,0.0,1.0,1.0,,1.0,2.0,2.0,,2.0,0.0,0.0,,0.0,1.0,1.0,,1.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.0,2.0,,2.0,2.0,2.0,,2.0,4,6.25,7.39,7.2,1.5,1.5,1.6,0.25,0.22,0.2,0.0,0.17,0.0,0.25,0.39,0.4,1.25,0.94,1.2,0.0,0.11,0.0,0.0,0.11,0.0,0.25,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.25,2.28,2.6,2.0,2.28,2.4,NRG Stadium,closed,,,,Active,21
11620,2017_06_PIT_KC,2017-10-15,6,2017,KC,PIT,H.Butker,00-0033303,2,2,16.0,16.0,,16.0,4.0,4.0,,4.0,0.5,0.5,,0.5,0.0,0.0,,0.0,2.0,2.0,,2.0,2.0,2.0,,2.0,0.0,0.0,,0.0,0.5,0.5,,0.5,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.5,2.5,,2.5,2.5,2.5,,2.5,5,4.6,7.32,4.6,1.0,1.68,1.0,0.4,0.21,0.4,0.0,0.05,0.0,0.6,0.53,0.6,0.4,1.11,0.4,0.2,0.11,0.2,0.2,0.11,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.6,1.95,1.6,1.2,1.74,1.2,GEHA Field at Arrowhead Stadium,outdoors,60.0,14.0,,Active,8
11646,2017_07_KC_OAK,2017-10-19,7,2017,KC,LV,H.Butker,00-0033303,3,3,13.33,13.33,,13.33,3.33,3.33,,3.33,0.33,0.33,,0.33,0.0,0.0,,0.0,1.67,1.67,,1.67,1.67,1.67,,1.67,0.0,0.0,,0.0,0.33,0.33,,0.33,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.0,2.0,,2.0,2.0,2.0,,2.0,6,9.5,6.94,9.0,2.33,1.41,2.2,0.67,0.24,0.6,0.17,0.12,0.0,0.67,0.41,0.8,1.5,0.88,1.4,0.33,0.12,0.2,0.17,0.06,0.2,0.17,0.06,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.47,2.2,2.0,2.24,2.2,Oakland-Alameda County Stadium,outdoors,62.0,1.0,,Active,14
11670,2017_08_DEN_KC,2017-10-30,8,2017,KC,DEN,H.Butker,00-0033303,4,4,13.5,13.5,,13.5,3.25,3.25,,3.25,0.25,0.25,,0.25,0.25,0.25,,0.25,1.25,1.25,,1.25,1.75,1.75,,1.75,0.0,0.0,,0.0,0.25,0.25,,0.25,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.25,2.25,,2.25,2.25,2.25,,2.25,6,8.33,7.5,9.4,1.5,1.81,1.8,0.0,0.25,0.0,0.67,0.12,0.8,0.33,0.56,0.4,0.5,1.12,0.6,0.0,0.06,0.0,0.0,0.12,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.17,1.69,2.0,2.17,1.5,2.0,GEHA Field at Arrowhead Stadium,outdoors,44.0,12.0,,Active,21
11712,2017_09_KC_DAL,2017-11-05,9,2017,KC,DAL,H.Butker,00-0033303,5,5,15.0,15.0,,15.0,3.6,3.6,,3.6,0.2,0.2,,0.2,0.4,0.4,,0.4,1.4,1.4,,1.4,1.8,1.8,,1.8,0.0,0.0,,0.0,0.2,0.2,,0.2,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.2,2.2,,2.2,2.2,2.2,,2.2,7,8.0,6.88,9.6,1.86,1.59,2.4,0.29,0.41,0.2,0.0,0.18,0.0,0.86,0.18,1.2,1.0,1.24,1.2,0.0,0.18,0.0,0.14,0.24,0.0,0.14,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.43,1.88,2.2,2.0,1.82,1.6,AT&T Stadium,closed,,,,Active,5
11772,2017_11_KC_NYG,2017-11-19,11,2017,KC,NYG,H.Butker,00-0033303,6,6,13.33,13.33,,13.8,3.17,3.17,,3.2,0.17,0.17,,0.0,0.33,0.33,,0.4,1.17,1.17,,1.2,1.67,1.67,,1.6,0.0,0.0,,0.0,0.17,0.17,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,2.17,2.17,,2.2,2.17,2.17,,2.2,9,9.11,8.59,8.4,1.89,2.06,1.6,0.44,0.24,0.2,0.22,0.12,0.0,0.67,0.53,0.6,1.0,1.41,1.0,0.11,0.18,0.0,0.22,0.06,0.0,0.11,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.89,1.76,3.4,2.78,1.71,3.4,MetLife Stadium,outdoors,50.0,23.0,,Active,9
11788,2017_12_BUF_KC,2017-11-26,12,2017,KC,BUF,H.Butker,00-0033303,7,7,12.71,12.71,,11.4,3.14,3.14,,2.8,0.14,0.14,,0.0,0.29,0.29,,0.4,1.0,1.0,,0.6,1.86,1.86,,1.8,0.0,0.0,,0.0,0.14,0.14,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,1.86,1.86,,1.6,1.86,1.86,,1.6,10,9.5,8.81,10.8,2.1,1.88,2.0,0.1,0.12,0.2,0.3,0.31,0.4,0.2,0.44,0.2,1.6,1.12,1.4,0.0,0.0,0.0,0.1,0.12,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.6,2.38,4.2,2.5,2.25,4.0,GEHA Field at Arrowhead Stadium,outdoors,62.0,9.0,,Active,5
11839,2017_13_KC_NYJ,2017-12-03,13,2017,KC,NYJ,H.Butker,00-0033303,8,8,11.75,11.75,,10.8,2.88,2.88,,2.6,0.12,0.12,,0.0,0.25,0.25,,0.4,1.0,1.0,,0.6,1.62,1.62,,1.6,0.0,0.0,,0.0,0.12,0.12,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,1.75,1.75,,1.6,1.75,1.75,,1.6,11,6.27,8.38,9.0,1.36,1.69,2.2,0.45,0.25,0.0,0.0,0.19,0.0,0.36,0.44,0.8,1.0,1.06,1.4,0.18,0.06,0.0,0.18,0.12,0.0,0.09,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.36,2.81,1.8,2.18,2.75,1.6,MetLife Stadium,outdoors,48.0,0.0,,Active,5
11884,2017_14_OAK_KC,2017-12-10,14,2017,KC,LV,H.Butker,00-0033303,9,9,11.0,11.0,,9.0,2.67,2.67,,2.2,0.22,0.22,,0.2,0.22,0.22,,0.2,0.89,0.89,,0.6,1.56,1.56,,1.4,0.0,0.0,,0.0,0.11,0.11,,0.0,0.11,0.11,,0.2,0.0,0.0,,0.0,0.0,0.0,,0.0,2.0,2.0,,1.8,2.0,2.0,,1.8,12,9.67,6.94,9.0,2.08,1.41,1.6,0.33,0.24,0.0,0.42,0.12,0.6,0.58,0.41,0.6,1.08,0.88,0.4,0.17,0.12,0.0,0.08,0.06,0.0,0.08,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.33,2.47,2.6,2.25,2.24,2.4,GEHA Field at Arrowhead Stadium,outdoors,46.0,13.0,,Active,17


In [347]:
# (
#     df_combined[df_combined['wind'].isnull() | df_combined['temp'].isnull()]
#     .groupby(["season", "stadium", "roof"])
#     .count()
#     .sort_values(by='season', ascending=False)
#     .head(10)
# )


## Impute misising values

In [348]:
# Calculate the percentage of null values in each column
null_percentages = df_combined.isnull().mean() * 100

# Sort the percentages in descending order for better readability
null_percentages = null_percentages.sort_values(ascending=False)

# Format the output to display percentages with two decimal places
null_percentages_formatted = null_percentages.apply(lambda x: f"{x:.2f}%")

# Print the results
print("Percentage of Null Values in Each Column:")
print(null_percentages_formatted.head(30))

Percentage of Null Values in Each Column:
wind                                 36.00%
temp                                 36.00%
xp_attempt_19y_mean_prior_season     17.39%
total_fg_made_mean_prior_season      17.39%
missed_fg_40-49_mean_prior_season    17.39%
missed_fg_50+_mean_prior_season      17.39%
xp_made_19y_mean_prior_season        17.39%
0-39_fg_made_mean_prior_season       17.39%
xp_attempt_33y_mean_prior_season     17.39%
40-49_fg_made_mean_prior_season      17.39%
xp_made_33y_mean_prior_season        17.39%
50+_fg_made_mean_prior_season        17.39%
total_fg_missed_mean_prior_season    17.39%
missed_fg_0-39_mean_prior_season     17.39%
fantasy_points_mean_prior_season     17.39%
50+_fg_made_mean_season               9.53%
total_fg_made_mean_season             9.53%
0-39_fg_made_mean_season              9.53%
missed_fg_40-49_mean_season           9.53%
xp_attempt_19y_mean_season            9.53%
total_fg_missed_mean_season           9.53%
missed_fg_0-39_mean_season        

In [349]:
# Ensure 'temp' and 'wind' are numeric
df_combined['temp'] = pd.to_numeric(df_combined['temp'], errors='coerce')
df_combined['wind'] = pd.to_numeric(df_combined['wind'], errors='coerce')

# Calculate mean 'temp' and 'wind' by stadium
temp_wind_means = (
    df_combined.groupby('stadium')[['temp', 'wind']]
    .mean()
    .reset_index()
)

# Merge the mean values back to the original DataFrame
df_combined = pd.merge(
    df_combined,
    temp_wind_means,
    on='stadium',
    how='left',
    suffixes=('', '_mean')
)

# Impute missing 'temp' and 'wind' with the group mean values
df_combined['temp'].fillna(df_combined['temp_mean'], inplace=True)
df_combined['wind'].fillna(df_combined['wind_mean'], inplace=True)

# If any missing 'temp' or 'wind' values remain, fill them with the overall mean
df_combined['temp'].fillna(df_combined['temp'].mean(), inplace=True)
df_combined['wind'].fillna(df_combined['wind'].mean(), inplace=True)

# Drop the temporary mean columns
df_combined.drop(columns=['temp_mean', 'wind_mean'], inplace=True)

# For the rest of the columns, fill missing values with 0
# Exclude 'temp' and 'wind' as they've already been imputed
columns_to_fill = df_combined.columns.difference(['temp', 'wind'])
df_combined[columns_to_fill] = df_combined[columns_to_fill].fillna(0)

# Check if any missing values remain
remaining_nulls = df_combined.isnull().sum()
if remaining_nulls.sum() > 0:
    print("Remaining null values after imputation:")
    print(remaining_nulls[remaining_nulls > 0])
else:
    print("All missing values have been imputed.")

All missing values have been imputed.


In [350]:
df_combined.sort_values(by=['game_date', 'game_id'], ascending=False).head(10)

Unnamed: 0,game_id,game_date,week,season,posteam,defteam,kicker_player_name,kicker_player_id,n_games_career,n_games_season,fantasy_points_mean_career,fantasy_points_mean_season,fantasy_points_mean_prior_season,fantasy_points_mean_last5,total_fg_made_mean_career,total_fg_made_mean_season,total_fg_made_mean_prior_season,total_fg_made_mean_last5,total_fg_missed_mean_career,total_fg_missed_mean_season,total_fg_missed_mean_prior_season,total_fg_missed_mean_last5,50+_fg_made_mean_career,50+_fg_made_mean_season,50+_fg_made_mean_prior_season,50+_fg_made_mean_last5,40-49_fg_made_mean_career,40-49_fg_made_mean_season,40-49_fg_made_mean_prior_season,40-49_fg_made_mean_last5,0-39_fg_made_mean_career,0-39_fg_made_mean_season,0-39_fg_made_mean_prior_season,0-39_fg_made_mean_last5,missed_fg_50+_mean_career,missed_fg_50+_mean_season,missed_fg_50+_mean_prior_season,missed_fg_50+_mean_last5,missed_fg_40-49_mean_career,missed_fg_40-49_mean_season,missed_fg_40-49_mean_prior_season,missed_fg_40-49_mean_last5,missed_fg_0-39_mean_career,missed_fg_0-39_mean_season,missed_fg_0-39_mean_prior_season,missed_fg_0-39_mean_last5,xp_attempt_19y_mean_career,xp_attempt_19y_mean_season,xp_attempt_19y_mean_prior_season,xp_attempt_19y_mean_last5,xp_made_19y_mean_career,xp_made_19y_mean_season,xp_made_19y_mean_prior_season,xp_made_19y_mean_last5,xp_attempt_33y_mean_career,xp_attempt_33y_mean_season,xp_attempt_33y_mean_prior_season,xp_attempt_33y_mean_last5,xp_made_33y_mean_career,xp_made_33y_mean_season,xp_made_33y_mean_prior_season,xp_made_33y_mean_last5,n_games_season_def,fantasy_points_mean_season_def,fantasy_points_mean_prior_season_def,fantasy_points_mean_last5_def,total_fg_made_mean_season_def,total_fg_made_mean_prior_season_def,total_fg_made_mean_last5_def,total_fg_missed_mean_season_def,total_fg_missed_mean_prior_season_def,total_fg_missed_mean_last5_def,50+_fg_made_mean_season_def,50+_fg_made_mean_prior_season_def,50+_fg_made_mean_last5_def,40-49_fg_made_mean_season_def,40-49_fg_made_mean_prior_season_def,40-49_fg_made_mean_last5_def,0-39_fg_made_mean_season_def,0-39_fg_made_mean_prior_season_def,0-39_fg_made_mean_last5_def,missed_fg_50+_mean_season_def,missed_fg_50+_mean_prior_season_def,missed_fg_50+_mean_last5_def,missed_fg_40-49_mean_season_def,missed_fg_40-49_mean_prior_season_def,missed_fg_40-49_mean_last5_def,missed_fg_0-39_mean_season_def,missed_fg_0-39_mean_prior_season_def,missed_fg_0-39_mean_last5_def,xp_attempt_19y_mean_season_def,xp_attempt_19y_mean_prior_season_def,xp_attempt_19y_mean_last5_def,xp_made_19y_mean_season_def,xp_made_19y_mean_prior_season_def,xp_made_19y_mean_last5_def,xp_attempt_33y_mean_season_def,xp_attempt_33y_mean_prior_season_def,xp_attempt_33y_mean_last5_def,xp_made_33y_mean_season_def,xp_made_33y_mean_prior_season_def,xp_made_33y_mean_last5_def,stadium,roof,temp,wind,report_primary_injury,report_status,fantasy_points
16325,2024_09_TB_KC,2024-11-04,9,2024,KC,TB,H.Butker,00-0033303,133,7,9.2,9.29,9.33,9.2,1.83,2.0,2.1,2.0,0.21,0.29,0.1,0.4,0.27,0.29,0.33,0.2,0.42,0.29,0.38,0.4,1.14,1.43,1.38,1.4,0.1,0.29,0.0,0.4,0.07,0.0,0.0,0.0,0.05,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.08,2.57,2.19,2.6,2.92,2.43,2.19,2.4,8,9.25,7.05,9.4,1.88,1.63,1.6,0.5,0.37,0.4,0.38,0.16,0.6,0.5,0.26,0.4,1.0,1.21,0.6,0.12,0.37,0.0,0.38,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75,1.58,3.4,2.75,1.58,3.4,GEHA Field at Arrowhead Stadium,outdoors,51.290488,9.398458,,Active,7
16326,2024_09_TB_KC,2024-11-04,9,2024,TB,KC,C.McLaughlin,00-0035358,74,8,7.58,9.38,8.26,9.8,1.53,1.62,1.74,1.6,0.23,0.12,0.05,0.2,0.42,0.75,0.42,0.8,0.31,0.0,0.21,0.0,0.8,0.88,1.11,0.8,0.07,0.12,0.05,0.2,0.15,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.07,3.12,2.0,3.6,2.03,3.0,2.0,3.4,7,8.0,5.95,6.4,1.71,1.29,1.2,0.29,0.19,0.2,0.71,0.14,0.8,0.14,0.38,0.0,0.86,0.76,0.4,0.29,0.05,0.2,0.0,0.1,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.71,1.67,1.6,1.29,1.62,1.2,GEHA Field at Arrowhead Stadium,outdoors,51.290488,9.398458,,Active,7
16327,2024_09_TB_KC,2024-11-04,9,2024,TB,KC,T.Gill,00-0037097,15,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7,8.0,5.95,6.4,1.71,1.29,1.2,0.29,0.19,0.2,0.71,0.14,0.8,0.14,0.38,0.0,0.86,0.76,0.4,0.29,0.05,0.2,0.0,0.1,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.71,1.67,1.6,1.29,1.62,1.2,GEHA Field at Arrowhead Stadium,outdoors,51.290488,9.398458,,Active,0
16328,2024_09_WAS_NYG,2024-11-03,9,2024,NYG,WAS,J.McAtamney,00-0039934,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8,6.12,10.47,5.2,1.25,2.06,1.0,0.12,0.18,0.0,0.25,0.41,0.2,0.12,0.59,0.2,0.88,1.06,0.6,0.0,0.0,0.0,0.12,0.12,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.18,1.6,1.88,3.12,1.6,MetLife Stadium,outdoors,52.0,6.0,,Active,4
16329,2024_09_WAS_NYG,2024-11-03,9,2024,WAS,NYG,A.Seibert,00-0035145,38,7,7.97,13.86,4.0,13.2,1.79,3.29,1.0,3.0,0.32,0.14,0.0,0.2,0.08,0.14,0.0,0.2,0.63,1.0,0.0,1.0,1.08,2.14,1.0,1.8,0.13,0.14,0.0,0.2,0.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.16,2.71,1.0,2.8,2.0,2.71,1.0,2.8,8,8.88,7.29,8.8,2.0,1.47,1.8,0.25,0.35,0.2,0.12,0.06,0.2,0.5,0.47,0.6,1.38,0.94,1.0,0.25,0.24,0.2,0.0,0.06,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.12,2.71,2.4,2.12,2.47,2.4,MetLife Stadium,outdoors,52.0,6.0,,Active,9
16323,2024_09_NO_CAR,2024-11-03,9,2024,CAR,NO,E.Pineiro,00-0034173,61,8,7.02,5.38,7.13,4.4,1.61,1.12,1.67,0.8,0.2,0.12,0.27,0.2,0.16,0.0,0.33,0.0,0.49,0.5,0.47,0.4,0.95,0.62,0.87,0.4,0.03,0.0,0.13,0.0,0.13,0.12,0.13,0.2,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.7,1.62,1.33,1.8,1.57,1.62,1.13,1.8,8,11.62,7.71,14.0,2.5,1.76,3.0,0.25,0.29,0.2,0.5,0.29,0.6,0.88,0.35,0.8,1.12,1.12,1.6,0.25,0.06,0.2,0.0,0.24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.38,1.71,3.2,2.25,1.71,3.0,Bank of America Stadium,outdoors,66.0,7.0,,Active,6
16324,2024_09_NO_CAR,2024-11-03,9,2024,NO,CAR,B.Grupe,00-0038905,25,8,8.36,8.12,8.47,5.8,1.72,1.62,1.76,1.2,0.32,0.12,0.41,0.2,0.32,0.25,0.35,0.0,0.52,0.62,0.47,0.8,0.88,0.75,0.94,0.4,0.12,0.0,0.18,0.0,0.12,0.12,0.12,0.2,0.08,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4,2.5,2.35,1.8,2.32,2.25,2.35,1.6,8,10.75,7.65,10.4,2.0,1.47,2.0,0.12,0.06,0.0,0.38,0.29,0.2,0.62,0.12,0.4,1.0,1.06,1.4,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.62,2.76,3.8,3.38,2.65,3.6,Bank of America Stadium,outdoors,66.0,7.0,,Active,12
16321,2024_09_NE_TEN,2024-11-03,9,2024,NE,TEN,J.Slye,00-0035192,86,8,7.1,6.0,5.88,5.2,1.53,1.38,1.12,1.0,0.27,0.12,0.24,0.2,0.26,0.25,0.18,0.4,0.43,0.25,0.41,0.2,0.85,0.88,0.53,0.4,0.13,0.0,0.12,0.0,0.1,0.0,0.12,0.0,0.03,0.12,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.94,1.38,2.06,1.6,1.73,1.38,1.88,1.6,7,10.14,10.88,10.4,2.0,2.71,2.0,0.0,0.0,0.0,0.43,0.35,0.4,0.43,0.35,0.4,1.14,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.86,1.76,3.2,2.86,1.71,3.2,Nissan Stadium,outdoors,75.0,10.0,,Active,7
16322,2024_09_NE_TEN,2024-11-03,9,2024,TEN,NE,N.Folk,00-0025565,240,7,7.69,7.14,7.71,7.6,1.65,1.29,1.71,1.4,0.27,0.0,0.06,0.0,0.18,0.43,0.29,0.6,0.47,0.57,0.35,0.4,1.0,0.29,1.06,0.4,0.08,0.0,0.06,0.0,0.14,0.0,0.0,0.0,0.05,0.0,0.0,0.0,1.26,0.0,0.0,0.0,1.26,0.0,0.0,0.0,0.95,1.86,1.76,1.8,0.88,1.86,1.65,1.8,8,8.38,8.18,8.8,1.88,1.82,2.0,0.5,0.35,0.6,0.25,0.47,0.2,0.5,0.18,0.6,1.12,1.18,1.2,0.0,0.18,0.0,0.5,0.12,0.6,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.38,1.88,2.6,2.25,1.82,2.4,Nissan Stadium,outdoors,75.0,10.0,,Active,8
16319,2024_09_MIA_BUF,2024-11-03,9,2024,BUF,MIA,T.Bass,00-0036162,83,8,8.31,7.88,7.47,6.8,1.58,1.5,1.37,1.4,0.28,0.38,0.32,0.4,0.19,0.12,0.21,0.2,0.36,0.25,0.26,0.0,1.02,1.12,0.89,1.2,0.08,0.0,0.11,0.0,0.12,0.38,0.16,0.4,0.07,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.22,3.5,3.0,2.8,3.1,3.25,2.95,2.6,7,10.0,6.78,11.2,2.0,1.22,2.4,0.43,0.17,0.4,1.0,0.17,1.2,0.29,0.22,0.2,0.71,0.83,1.0,0.14,0.11,0.2,0.14,0.06,0.0,0.14,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,2.14,2.67,1.8,2.14,2.61,1.8,Highmark Stadium,outdoors,54.0,9.0,,Active,14


# Feature Selection

In [351]:
best_params = ['fantasy_points_mean_last5', 'fantasy_points_mean_career', 'n_games_career', 'fantasy_points_mean_prior_season', 'fantasy_points_mean_season', 'n_games_season', 'total_fg_missed_mean_career',
               'fantasy_points_mean_season_def', 'fantasy_points_mean_last5_def', 'fantasy_points_mean_prior_season_def',
               'temp', 'wind', 'roof', 'report_primary_injury', 'report_status']


In [352]:
df_combined = df_combined[best_params + ['fantasy_points']][df_combined['season']>=2009]


In [353]:
# import importlib
# import nfl_model_v2
# importlib.reload(nfl_model_v2)
from nfl_model_v2 import NFLModel

In [354]:

def get_dummy_variables(df, drop_first=True, dummy_na=False):
    """
    Converts non-numerical columns in a DataFrame to dummy variables.

    Parameters:
    - df: pandas DataFrame
        The input DataFrame containing the data.
    - drop_first: bool, default=False
        Whether to drop the first level of categorical variables to avoid the dummy variable trap.
    - dummy_na: bool, default=False
        Add a column to indicate NaNs, if False NaNs are ignored.

    Returns:
    - df_dummies: pandas DataFrame
        The DataFrame with non-numeric columns converted to dummy variables.
    """
    # Identify non-numeric columns
    non_numeric_cols = df.select_dtypes(exclude=['number', 'bool']).columns.tolist()

    # If there are no non-numeric columns, return the original DataFrame
    if not non_numeric_cols:
        print("No non-numerical columns to convert.")
        return df.copy()

    # Convert categorical variables to dummy variables
    df_dummies = pd.get_dummies(df, columns=non_numeric_cols, drop_first=drop_first, dummy_na=dummy_na)

    return df_dummies


In [355]:
columns_to_include = df_combined.columns.difference(['game_id', 'game_date', 'kicker_player_name', 'kicker_player_id', 'posteam', 'defteam', 'season', 'week'])
# final_df = df_combined[columns_to_include][df_combined['week'] > 5].copy()
final_df = df_combined[columns_to_include].copy()
final_df = get_dummy_variables(final_df)

y_var = 'fantasy_points'

In [356]:
final_df.columns

Index(['fantasy_points', 'fantasy_points_mean_career',
       'fantasy_points_mean_last5', 'fantasy_points_mean_last5_def',
       'fantasy_points_mean_prior_season',
       'fantasy_points_mean_prior_season_def', 'fantasy_points_mean_season',
       'fantasy_points_mean_season_def', 'n_games_career', 'n_games_season',
       'temp', 'total_fg_missed_mean_career', 'wind',
       'report_primary_injury_Ankle', 'report_primary_injury_Back',
       'report_primary_injury_Groin', 'report_primary_injury_Head',
       'report_primary_injury_Illness', 'report_primary_injury_Left Ankle',
       'report_primary_injury_Left Hamstring', 'report_primary_injury_None',
       'report_primary_injury_Not Injury Related',
       'report_primary_injury_Pelvis', 'report_primary_injury_Rib',
       'report_primary_injury_Ribs', 'report_primary_injury_Right Ankle',
       'report_primary_injury_Right Hamstring',
       'report_primary_injury_Shoulder', 'report_primary_injury_left Ankle',
       'report_pri

In [357]:
# # Initialize the model
# model = NFLModel(data=final_df, target_variable=y_var)

# # Preprocess data
# model.preprocess_data()

# # Perform feature selection
# model.feature_selection()

# # Evaluate models
# model.evaluate_models_train()
# print(model.get_results())
# model.evaluate_models()

# # Get and print the results
# results_df = model.get_results()
# print(results_df)

In [358]:
import importlib
import nfl_model_v3
importlib.reload(nfl_model_v3)
from nfl_model_v3 import NFLModel

In [359]:
# Initialize the model
model = NFLModel(data=final_df, target_variable=y_var)
# model = NFLModel(data=final_df, target_variable=y_var)

# Preprocess data
model.preprocess_data()

# Train all models
model.train_all_models()

# Get and print the results
results_df = model.get_results()
print(results_df)

Data preprocessing completed.
Random Forest hyperparams: {'max_depth': 5}
[('fantasy_points_mean_last5', 0.8346500531144511), ('fantasy_points_mean_career', 0.0435959301957776), ('n_games_career', 0.032417669186095836), ('fantasy_points_mean_season_def', 0.01444438691531518), ('fantasy_points_mean_prior_season', 0.01285932372349202), ('wind', 0.01095648363579341), ('fantasy_points_mean_last5_def', 0.010782850141512989), ('fantasy_points_mean_prior_season_def', 0.010310567339682282), ('fantasy_points_mean_season', 0.00966505530402894), ('temp', 0.009576295144321005), ('total_fg_missed_mean_career', 0.006957059912431885), ('n_games_season', 0.001534146068076382), ('roof_outdoors', 0.0005941661142995131), ('roof_open', 0.0005551057168089981), ('roof_dome', 0.0005317733668218839), ('report_primary_injury_left Knee', 0.00013304241375627825), ('report_status_Questionable', 0.0001235657121660222), ('report_primary_injury_None', 0.00012000670041120629), ('report_primary_injury_right Groin', 8.

In [360]:
model.results

{'Model': ['Random Forest', 'Random Forest'],
 'Data': ['Train', 'Test'],
 'MAE': [2.9484313511020863, 2.9697300616550355],
 'MSE': [15.887207334152748, 15.78832773337313],
 'R2': [0.35532266034177595, 0.33865977795500957]}