In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib as plt
import datetime as dt

# Display all columns
pd.set_option('display.max_columns', None)

# 1. Importing data from nflfastR

## Player stats from 1999-2021

In [171]:
# Player stats from 1999 - 2021
players = pd.read_csv('https://github.com/nflverse/nflfastR-data/blob/master/data/player_stats.csv.gz?raw=True', compression='gzip', low_memory=False)
# Clean dataset to relevant features
player_cols = ['player_id', 'player_name', 'recent_team', 'season', 'attempts', 'completions', 'passing_yards', 'passing_tds', 'interceptions', 'passing_epa', 'rushing_yards', 'rushing_tds', 'rushing_epa', 'receiving_yards', 'receiving_tds', 'receiving_epa']
players = players[player_cols]
players = players[players['season'] > 2003]
players = players.groupby(by=['player_id', 'player_name' , 'recent_team', 'season']).sum().reset_index()
players = players[players['attempts'] > 150]
# Combine passing, rushing and receiving yards and EPA.
players['total_yards'] = players['passing_yards'] + players['rushing_yards'] + players['receiving_yards']
players['total_tds'] = players['passing_tds'] + players['rushing_tds'] + players['receiving_tds']
players['total_epa'] = players['passing_epa'] + players['rushing_epa'] + players['receiving_epa']
players['completion_pct'] = players['completions'] / players['attempts']
# Combine passing, rushing and receiving yards
players.drop(columns=['passing_yards', 'rushing_yards', 'receiving_yards', 'passing_tds', 'rushing_tds', 'receiving_tds', 'passing_epa', 'rushing_epa', 'receiving_epa'], inplace=True)
players[['games_played', 'games_won']] = 0
# Reset index
players.reset_index(drop=True, inplace=True)

In [172]:
players

Unnamed: 0,player_id,player_name,recent_team,season,attempts,completions,interceptions,total_yards,total_tds,total_epa,completion_pct,games_played,games_won
0,00-0001361,D.Bledsoe,BUF,2004,450,256,16,2969,20,-31.915171482245,0.568888888889,0,0
1,00-0001361,D.Bledsoe,DAL,2005,499,300,17,3689,25,32.794140938287,0.601202404810,0,0
2,00-0001361,D.Bledsoe,DAL,2006,169,90,8,1192,9,-3.913389641803,0.532544378698,0,0
3,00-0001823,A.Brooks,LV,2006,192,110,8,1229,3,-49.179364933053,0.572916666667,0,0
4,00-0001823,A.Brooks,NO,2004,542,309,16,3984,25,18.590597648228,0.570110701107,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
689,00-0036898,D.Mills,HOU,2021,329,219,9,2228,12,-46.069905760991,0.665653495441,0,0
690,00-0036945,J.Fields,CHI,2021,270,159,10,2290,9,-58.200916894737,0.588888888889,0,0
691,00-0036971,T.Lawrence,JAX,2021,543,319,14,3526,11,-47.358391061321,0.587476979742,0,0
692,00-0036972,M.Jones,NE,2021,461,310,12,3426,18,37.201095363455,0.672451193059,0,0


## Define functions

In [4]:
# Add the player's team for that year
def add_teams(year, season_data):
    annex = players[players['season'] == year]
    annex = dict(zip(annex['player_id'], annex['recent_team']))
    season_data['recent_team'] = season_data['passer_player_id'].apply(lambda x: annex[x] if x in annex else ' ')
    season_data = season_data[season_data.recent_team != ' ']
    season_data.reset_index(drop=True, inplace=True)
    return season_data

In [5]:
# Match winning team to the player's team
def add_games_won(season_data):
    for i in range(len(season_data)):
        # If the player's team is the home team and the home team won
        if (season_data.iloc[i,10] == season_data.iloc[i,2]) and (season_data.iloc[i,3] > season_data.iloc[i,5]):
            # Count it as a win
            season_data.iloc[i, 9] = 1
        # Or if the player's team is the away team and the away team won
        elif (season_data.iloc[i,10] == season_data.iloc[i,4]) and (season_data.iloc[i,3] < season_data.iloc[i,5]):
            # Count it as a win
            season_data.iloc[i, 9] = 1
    return season_data

In [6]:
# Save the games won and played by each player in a dictionary
def get_games_dict(season_data):
    new_dict = {}
    id_list = season_data['passer_player_id'].unique().tolist()
    for i in id_list:
        new_dict[i] = {'games_played': season_data.loc[season_data['passer_player_id'] == i, 'games_played'].sum(), 'games_won': season_data.loc[season_data['passer_player_id'] == i, 'games_won'].sum()}
    return new_dict

In [7]:
# Add the games played and won to the dataframe
def update_games(games_dict, year):
    new = players[players['season'] == year]
    id_list = new['player_id'].unique().tolist()
    for i in id_list:
        new.loc[new['player_id'] == i, 'games_played'] = games_dict[i]['games_played']
        new.loc[new['player_id'] == i, 'games_won'] = games_dict[i]['games_won']
    return new

In [173]:
test = pd.read_csv('https://github.com/nflverse/nflfastR-data/blob/master/data/play_by_play_2004.csv.gz?raw=True', compression='gzip', low_memory=False)
test


Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,side_of_field,yardline_100,game_date,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,drive,sp,qtr,down,goal_to_go,time,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,shotgun,no_huddle,qb_dropback,qb_kneel,qb_spike,qb_scramble,pass_length,pass_location,air_yards,yards_after_catch,run_location,run_gap,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,home_timeouts_remaining,away_timeouts_remaining,timeout,timeout_team,td_team,td_player_name,td_player_id,posteam_timeouts_remaining,defteam_timeouts_remaining,total_home_score,total_away_score,posteam_score,defteam_score,score_differential,posteam_score_post,defteam_score_post,score_differential_post,no_score_prob,opp_fg_prob,opp_safety_prob,opp_td_prob,fg_prob,safety_prob,td_prob,extra_point_prob,two_point_conversion_prob,ep,epa,total_home_epa,total_away_epa,total_home_rush_epa,total_away_rush_epa,total_home_pass_epa,total_away_pass_epa,air_epa,yac_epa,comp_air_epa,comp_yac_epa,total_home_comp_air_epa,total_away_comp_air_epa,total_home_comp_yac_epa,total_away_comp_yac_epa,total_home_raw_air_epa,total_away_raw_air_epa,total_home_raw_yac_epa,total_away_raw_yac_epa,wp,def_wp,home_wp,away_wp,wpa,vegas_wpa,vegas_home_wpa,home_wp_post,away_wp_post,vegas_wp,vegas_home_wp,total_home_rush_wpa,total_away_rush_wpa,total_home_pass_wpa,total_away_pass_wpa,air_wpa,yac_wpa,comp_air_wpa,comp_yac_wpa,total_home_comp_air_wpa,total_away_comp_air_wpa,total_home_comp_yac_wpa,total_away_comp_yac_wpa,total_home_raw_air_wpa,total_away_raw_air_wpa,total_home_raw_yac_wpa,total_away_raw_yac_wpa,punt_blocked,first_down_rush,first_down_pass,first_down_penalty,third_down_converted,third_down_failed,fourth_down_converted,fourth_down_failed,incomplete_pass,touchback,interception,punt_inside_twenty,punt_in_endzone,punt_out_of_bounds,punt_downed,punt_fair_catch,kickoff_inside_twenty,kickoff_in_endzone,kickoff_out_of_bounds,kickoff_downed,kickoff_fair_catch,fumble_forced,fumble_not_forced,fumble_out_of_bounds,solo_tackle,safety,penalty,tackled_for_loss,fumble_lost,own_kickoff_recovery,own_kickoff_recovery_td,qb_hit,rush_attempt,pass_attempt,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,fumble,complete_pass,assist_tackle,lateral_reception,lateral_rush,lateral_return,lateral_recovery,passer_player_id,passer_player_name,passing_yards,receiver_player_id,receiver_player_name,receiving_yards,rusher_player_id,rusher_player_name,rushing_yards,lateral_receiver_player_id,lateral_receiver_player_name,lateral_receiving_yards,lateral_rusher_player_id,lateral_rusher_player_name,lateral_rushing_yards,lateral_sack_player_id,lateral_sack_player_name,interception_player_id,interception_player_name,lateral_interception_player_id,lateral_interception_player_name,punt_returner_player_id,punt_returner_player_name,lateral_punt_returner_player_id,lateral_punt_returner_player_name,kickoff_returner_player_name,kickoff_returner_player_id,lateral_kickoff_returner_player_id,lateral_kickoff_returner_player_name,punter_player_id,punter_player_name,kicker_player_name,kicker_player_id,own_kickoff_recovery_player_id,own_kickoff_recovery_player_name,blocked_player_id,blocked_player_name,tackle_for_loss_1_player_id,tackle_for_loss_1_player_name,tackle_for_loss_2_player_id,tackle_for_loss_2_player_name,qb_hit_1_player_id,qb_hit_1_player_name,qb_hit_2_player_id,qb_hit_2_player_name,forced_fumble_player_1_team,forced_fumble_player_1_player_id,forced_fumble_player_1_player_name,forced_fumble_player_2_team,forced_fumble_player_2_player_id,forced_fumble_player_2_player_name,solo_tackle_1_team,solo_tackle_2_team,solo_tackle_1_player_id,solo_tackle_2_player_id,solo_tackle_1_player_name,solo_tackle_2_player_name,assist_tackle_1_player_id,assist_tackle_1_player_name,assist_tackle_1_team,assist_tackle_2_player_id,assist_tackle_2_player_name,assist_tackle_2_team,assist_tackle_3_player_id,assist_tackle_3_player_name,assist_tackle_3_team,assist_tackle_4_player_id,assist_tackle_4_player_name,assist_tackle_4_team,tackle_with_assist,tackle_with_assist_1_player_id,tackle_with_assist_1_player_name,tackle_with_assist_1_team,tackle_with_assist_2_player_id,tackle_with_assist_2_player_name,tackle_with_assist_2_team,pass_defense_1_player_id,pass_defense_1_player_name,pass_defense_2_player_id,pass_defense_2_player_name,fumbled_1_team,fumbled_1_player_id,fumbled_1_player_name,fumbled_2_player_id,fumbled_2_player_name,fumbled_2_team,fumble_recovery_1_team,fumble_recovery_1_yards,fumble_recovery_1_player_id,fumble_recovery_1_player_name,fumble_recovery_2_team,fumble_recovery_2_yards,fumble_recovery_2_player_id,fumble_recovery_2_player_name,sack_player_id,sack_player_name,half_sack_1_player_id,half_sack_1_player_name,half_sack_2_player_id,half_sack_2_player_name,return_team,return_yards,penalty_team,penalty_player_id,penalty_player_name,penalty_yards,replay_or_challenge,replay_or_challenge_result,penalty_type,defensive_two_point_attempt,defensive_two_point_conv,defensive_extra_point_attempt,defensive_extra_point_conv,safety_player_name,safety_player_id,season,cp,cpoe,series,series_success,series_result,order_sequence,start_time,time_of_day,stadium,weather,nfl_api_id,play_clock,play_deleted,play_type_nfl,special_teams_play,st_play_type,end_clock_time,end_yard_line,fixed_drive,fixed_drive_result,drive_real_start_time,drive_play_count,drive_time_of_possession,drive_first_downs,drive_inside20,drive_ended_with_score,drive_quarter_start,drive_quarter_end,drive_yards_penalized,drive_start_transition,drive_end_transition,drive_game_clock_start,drive_game_clock_end,drive_start_yard_line,drive_end_yard_line,drive_play_id_started,drive_play_id_ended,away_score,home_score,location,result,total,spread_line,total_line,div_game,roof,surface,temp,wind,home_coach,away_coach,stadium_id,game_stadium,aborted_play,success,passer,passer_jersey_number,rusher,rusher_jersey_number,receiver,receiver_jersey_number,pass,rush,first_down,special,play,passer_id,rusher_id,receiver_id,name,jersey_number,id,fantasy_player_name,fantasy_player_id,fantasy,fantasy_id,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,,,,,,2004-09-12,900.0,1800.0,3600.0,Half1,0,,0,1,,0,15:00,LA 30,0,,GAME,,,0,0,,0,0,0,,,,,,,,,,,3,3,,,,,,,,0,0,,,,,,,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.0,0.0,,,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.000000000000,0.000000000000,,,,,,,,,,,,,0.422024160624,0.577975839376,0.577975839376,0.422024160624,-0.000000000000,-0.000000000000,0.000000000000,,,0.157771617174,0.842228382826,0.000000000000,0.000000000000,0.000000000000,0.000000000000,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,2004,,,1,1,First down,1.0,13:00:00,,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0.0,0,GAME_START,0,,,,1,Punt,,,,,,,,,,,,,,,,,,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0,,,,,,,,0,0,,0,0,,,,,,,,,,,0,0,,,,,,,,
1,35,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,ARI,away,LA,LA,30.0,2004-09-12,900.0,1800.0,3600.0,Half1,0,1.0,0,1,,0,15:00,LA 30,0,53.0,14-J.Wilkins kicks 62 yards from STL 30 to ARI...,kickoff,0.0,0,0,0.0,0,0,0,,,,,,,,62.0,,,3,3,0.0,,,,,3.0,3.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.008067391813,0.174223750830,0.006769760512,0.311387091875,0.183685600758,0.003610468237,0.312255948782,0.0,0.0,0.028148963582,-0.665945112705,0.665945112705,-0.665945112705,0.000000000000,0.000000000000,0.000000000000,0.000000000000,,,,,,,,,,,,,0.422024160624,0.577975839376,0.577975839376,0.422024160624,-0.010538876057,-0.015801221132,0.015801221132,0.588514715433,0.411485284567,0.157771617174,0.842228382826,0.000000000000,0.000000000000,0.000000000000,0.000000000000,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,J.Scobey,00-0021098,,,,,J.Wilkins,00-0017693,,,,,,,,,,,,,,,,,,,,,,,,,00-0021750,A.Harris,LA,,,,,,,,,,1.0,00-0018563,J.Lucas,LA,,,,,,,,,,,,,,,,,,,,,,,,,,,,ARI,10.0,ARI,00-0020383,M.Stone,9.0,0,,Offensive Holding,0.0,0.0,0.0,0.0,,,2004,,,1,1,First down,35.0,13:00:00,12:06:02,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0.0,0,KICK_OFF,1,,,ARI 9,1,Punt,,7.0,3:04,2.0,0.0,0.0,1.0,1.0,0.0,KICKOFF,PUNT,15:00,11:56,ARI 9,LA 38,35.0,276.0,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0,0.0,,,,,,,0,0,0.0,1,0,,,,,,,,,,,0,0,-0.665945112705,,,,,,,
2,68,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,ARI,away,LA,ARI,91.0,2004-09-12,895.0,1795.0,3595.0,Half1,0,1.0,0,1,1.0,0,14:55,ARI 9,10,53.0,(14:55) 12-J.McCown pass to 11-L.Fitzgerald to...,pass,37.0,0,0,1.0,0,0,0,,,,,,,,,,,3,3,0.0,,,,,3.0,3.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.008219416253,0.204906433821,0.016466934234,0.337530136108,0.161974817514,0.001934434986,0.268967807293,0.0,0.0,-0.637796149123,2.414031928871,-1.748086816166,1.748086816166,0.000000000000,0.000000000000,-2.414031928871,2.414031928871,,,,,,,,,,,,,0.411485284567,0.588514715433,0.588514715433,0.411485284567,0.050941765308,0.028402358294,-0.028402358294,0.537572950125,0.462427049875,0.141970396042,0.858029603958,0.000000000000,0.000000000000,-0.050941765308,0.050941765308,,,,,,,,,,,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,00-0021206,J.McCown,37.0,00-0022921,L.Fitzgerald,37.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,LA,,00-0017710,,A.Williams,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,1,1,First down,68.0,13:00:00,12:07:08,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0.0,0,PASS,0,,,ARI 46,1,Punt,,7.0,3:04,2.0,0.0,0.0,1.0,1.0,0.0,KICKOFF,PUNT,15:00,11:56,ARI 9,LA 38,35.0,276.0,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0,1.0,J.McCown,12.0,,,L.Fitzgerald,11.0,1,0,1.0,0,1,00-0021206,,00-0022921,J.McCown,12.0,00-0021206,L.Fitzgerald,00-0022921,L.Fitzgerald,00-0022921,0,0,2.414031928871,,,,,,,
3,93,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,ARI,away,LA,ARI,54.0,2004-09-12,855.0,1755.0,3555.0,Half1,0,1.0,0,1,1.0,0,14:15,ARI 46,10,53.0,(14:15) 12-J.McCown pass incomplete to 82-L.Di...,pass,0.0,0,0,1.0,0,0,0,,,,,,,,,,,3,3,0.0,,,,,3.0,3.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.009176832624,0.115002557635,0.002603607252,0.207282170653,0.264580100775,0.005159568973,0.396195203066,0.0,0.0,1.776235779747,-0.493423517793,-1.254663298372,1.254663298372,0.000000000000,0.000000000000,-1.920608411077,1.920608411077,,,,,,,,,,,,,0.462427049875,0.537572950125,0.537572950125,0.462427049875,-0.018173366785,-0.010430961847,0.010430961847,0.555746316910,0.444253683090,0.170372754335,0.829627245665,0.000000000000,0.000000000000,-0.032768398523,0.032768398523,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00-0021206,J.McCown,,00-0021554,L.Diamond,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,2,1,First down,93.0,13:00:00,12:07:48,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0.0,0,PASS,0,,,ARI 46,1,Punt,,7.0,3:04,2.0,0.0,0.0,1.0,1.0,0.0,KICKOFF,PUNT,15:00,11:56,ARI 9,LA 38,35.0,276.0,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0,0.0,J.McCown,12.0,,,L.Diamond,82.0,1,0,0.0,0,1,00-0021206,,00-0021554,J.McCown,12.0,00-0021206,L.Diamond,00-0021554,L.Diamond,00-0021554,0,0,-0.493423517793,,,,,,,
4,112,2004_01_ARI_STL,2004091208,LA,ARI,REG,1,ARI,away,LA,ARI,54.0,2004-09-12,849.0,1749.0,3549.0,Half1,0,1.0,0,1,2.0,0,14:09,ARI 46,10,53.0,(14:09) 12-J.McCown pass incomplete to 11-L.Fi...,pass,0.0,0,0,1.0,0,0,0,,,,,,,,,,,3,3,0.0,,,,,3.0,3.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.009287794121,0.132419720292,0.002724245656,0.235140010715,0.245933711529,0.005552557763,0.368941962719,0.0,0.0,1.282812261954,-0.904404168949,-0.350259129424,0.350259129424,0.000000000000,0.000000000000,-1.016204242129,1.016204242129,,,,,,,,,,,,,0.444253683090,0.555746316910,0.555746316910,0.444253683090,-0.031995266676,-0.016288012266,0.016288012266,0.587741583586,0.412258416414,0.159941792488,0.840058207512,0.000000000000,0.000000000000,-0.000773131847,0.000773131847,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00-0021206,J.McCown,,00-0022921,L.Fitzgerald,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,2,1,First down,112.0,13:00:00,12:08:34,Edward Jones Dome,"Temp: ° F, Wind: mph",10160000-0269-1882-ad83-9c5a60528a04,0.0,0,PASS,0,,,ARI 46,1,Punt,,7.0,3:04,2.0,0.0,0.0,1.0,1.0,0.0,KICKOFF,PUNT,15:00,11:56,ARI 9,LA 38,35.0,276.0,10,17,Home,7,27,11.0,46.0,1,dome,astroturf,,,Mike Martz,Dennis Green,STL00,Edward Jones Dome,0,0.0,J.McCown,12.0,,,L.Fitzgerald,11.0,1,0,0.0,0,1,00-0021206,,00-0022921,J.McCown,12.0,00-0021206,L.Fitzgerald,00-0022921,L.Fitzgerald,00-0022921,0,0,-0.904404168949,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47465,3998,2004_21_NE_PHI,2005020600,PHI,NE,POST,21,PHI,home,NE,PHI,96.0,2005-02-06,46.0,46.0,46.0,Half2,0,25.0,0,4,1.0,0,00:46,PHI 4,10,1.0,(:46) 5-D.McNabb pass to 36-B.Westbrook to PHI...,pass,1.0,0,0,1.0,0,0,0,,,,,,,,,,,0,1,0.0,,,,,0.0,1.0,21,24,21.0,24.0,-3.0,21.0,24.0,-3.0,0.899399101734,0.019752042368,0.006581085268,0.034162074327,0.027522115037,0.000255925028,0.012327714823,0.0,0.0,-0.142180619005,0.026583004976,-6.180778875307,6.180778875307,-5.136760335963,5.136760335963,-1.359328841965,1.359328841965,,,,,,,,,,,,,0.095929190516,0.904070809484,0.095929190516,0.904070809484,-0.036552712321,-0.040630146861,-0.040630146861,0.059376478195,0.940623521805,0.084676846862,0.084676846862,-0.493073142875,0.493073142875,-0.118591132416,0.118591132416,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,00-0011022,D.McNabb,1.0,00-0021216,B.Westbrook,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NE,,00-0012909,,R.Phifer,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,62,0,Turnover,3998.0,18:30:00,22:13:44,ALLTEL Stadium,"Clear Temp: 59° F, Humidity: 75%, Wind: North ...",10160000-0272-43cb-89a9-59e63349edce,0.0,0,PASS,0,,,PHI 5,25,Turnover,,3.0,0:37,0.0,0.0,0.0,4.0,4.0,0.0,PUNT,INTERCEPTION,00:46,00:09,PHI 4,PHI 5,3998.0,4038.0,24,21,Neutral,-3,45,-7.0,47.0,0,outdoors,grass,59.0,12.0,Andy Reid,Bill Belichick,JAX00,Alltel Stadium,0,1.0,D.McNabb,5.0,,,B.Westbrook,36.0,1,0,0.0,0,1,00-0011022,,00-0021216,D.McNabb,5.0,00-0011022,B.Westbrook,00-0021216,B.Westbrook,00-0021216,0,1,0.026583004976,,,,,,,
47466,4019,2004_21_NE_PHI,2005020600,PHI,NE,POST,21,PHI,home,NE,PHI,95.0,2005-02-06,22.0,22.0,22.0,Half2,0,25.0,0,4,2.0,0,00:22,PHI 5,9,1.0,(:22) 5-D.McNabb pass incomplete to 81-T.Owens.,pass,0.0,0,0,1.0,0,0,0,,,,,,,,,,,0,1,0.0,,,,,0.0,1.0,21,24,21.0,24.0,-3.0,21.0,24.0,-3.0,0.949289679527,0.007699886803,0.004897135310,0.021622158587,0.011484487914,0.000169702369,0.004836936947,0.0,0.0,-0.115597614029,-0.007420303387,-6.188199178694,6.188199178694,-5.136760335963,5.136760335963,-1.366749145353,1.366749145353,,,,,,,,,,,,,0.059376478195,0.940623521805,0.059376478195,0.940623521805,-0.003595039248,-0.012569852173,-0.012569852173,0.055781438947,0.944218561053,0.044046700001,0.044046700001,-0.493073142875,0.493073142875,-0.122186171665,0.122186171665,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00-0011022,D.McNabb,,00-0012478,T.Owens,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,62,0,Turnover,4019.0,18:30:00,22:14:08,ALLTEL Stadium,"Clear Temp: 59° F, Humidity: 75%, Wind: North ...",10160000-0272-43cb-89a9-59e63349edce,0.0,0,PASS,0,,,PHI 5,25,Turnover,,3.0,0:37,0.0,0.0,0.0,4.0,4.0,0.0,PUNT,INTERCEPTION,00:46,00:09,PHI 4,PHI 5,3998.0,4038.0,24,21,Neutral,-3,45,-7.0,47.0,0,outdoors,grass,59.0,12.0,Andy Reid,Bill Belichick,JAX00,Alltel Stadium,0,0.0,D.McNabb,5.0,,,T.Owens,81.0,1,0,0.0,0,1,00-0011022,,00-0012478,D.McNabb,5.0,00-0011022,T.Owens,00-0012478,T.Owens,00-0012478,0,1,-0.007420303387,,,,,,,
47467,4038,2004_21_NE_PHI,2005020600,PHI,NE,POST,21,PHI,home,NE,PHI,95.0,2005-02-06,17.0,17.0,17.0,Half2,0,25.0,0,4,3.0,0,00:17,PHI 5,9,1.0,(:17) (Shotgun) 5-D.McNabb pass intended for 8...,pass,0.0,1,0,1.0,0,0,0,,,,,,,,,,,0,1,0.0,,,,,0.0,1.0,21,24,21.0,24.0,-3.0,21.0,24.0,-3.0,0.953856706619,0.006094351411,0.005021292251,0.021686341614,0.008870205842,0.000159266623,0.004311851691,0.0,0.0,-0.123017917416,-1.888383560785,-8.076582739479,8.076582739479,-5.136760335963,5.136760335963,-3.255132706137,3.255132706137,,,,,,,,,,,,,0.055781438947,0.944218561053,0.055781438947,0.944218561053,-0.031101599336,-0.025290600955,-0.025290600955,0.024679839611,0.975320160389,0.031476847827,0.031476847827,-0.493073142875,0.493073142875,-0.153287771000,0.153287771000,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,00-0011022,D.McNabb,,00-0022126,L.Smith,,,,,,,,,,,,,00-0007030,R.Harrison,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,PHI,,00-0020497,,F.Mitchell,,,,,,,,,,,,,,0.0,,,,,,,00-0007030,R.Harrison,,,,,,,,,,,,,,,,,,,,,,,NE,6.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,62,0,Turnover,4038.0,18:30:00,22:14:51,ALLTEL Stadium,"Clear Temp: 59° F, Humidity: 75%, Wind: North ...",10160000-0272-43cb-89a9-59e63349edce,0.0,0,PASS,0,,00:09,PHI 22,25,Turnover,,3.0,0:37,0.0,0.0,0.0,4.0,4.0,0.0,PUNT,INTERCEPTION,00:46,00:09,PHI 4,PHI 5,3998.0,4038.0,24,21,Neutral,-3,45,-7.0,47.0,0,outdoors,grass,59.0,12.0,Andy Reid,Bill Belichick,JAX00,Alltel Stadium,0,0.0,D.McNabb,5.0,,,L.Smith,82.0,1,0,0.0,0,1,00-0011022,,00-0022126,D.McNabb,5.0,00-0011022,L.Smith,00-0022126,L.Smith,00-0022126,0,1,-1.888383560785,,,,,,,
47468,4062,2004_21_NE_PHI,2005020600,PHI,NE,POST,21,NE,away,PHI,PHI,22.0,2005-02-06,9.0,9.0,9.0,Half2,0,26.0,0,4,1.0,0,00:09,PHI 22,10,-1.0,(:09) 12-T.Brady to PHI 23 for -1 yards.,run,-1.0,0,0,0.0,0,0,0,,,,,,,,,,,0,1,0.0,,,,,1.0,0.0,21,24,24.0,21.0,3.0,24.0,21.0,3.0,0.356721937656,0.002400024561,0.000348376081,0.005148706492,0.597588062286,0.000394990057,0.037397868931,0.0,0.0,2.011401478201,-2.011401478201,-6.065181261278,6.065181261278,-3.125358857762,3.125358857762,-3.255132706137,3.255132706137,,,,,,,,,,,,,0.975320160389,0.024679839611,0.024679839611,0.975320160389,0.024679839611,0.006186246872,-0.006186246872,0.000000000000,1.000000000000,0.993813753128,0.006186246872,-0.517752982486,0.517752982486,-0.153287771000,0.153287771000,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,00-0019596,T.Brady,-1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,0,,,0.0,0.0,0.0,0.0,,,2004,,,63,0,End of half,4062.0,18:30:00,22:16:05,ALLTEL Stadium,"Clear Temp: 59° F, Humidity: 75%, Wind: North ...",10160000-0272-43cb-89a9-59e63349edce,0.0,0,RUSH,0,,,PHI 23,26,End of half,,1.0,0:09,0.0,0.0,0.0,4.0,4.0,0.0,INTERCEPTION,,00:09,00:00,PHI 22,PHI 22,4062.0,4083.0,24,21,Neutral,-3,45,-7.0,47.0,0,outdoors,grass,59.0,12.0,Andy Reid,Bill Belichick,JAX00,Alltel Stadium,0,0.0,,,T.Brady,,,,0,1,0.0,0,1,,00-0019596,,T.Brady,,00-0019596,T.Brady,00-0019596,T.Brady,00-0019596,0,1,-2.011401478201,,,,,,,


# 2. Clean season stats

In [208]:
# Play-by-play stats from 2004 - 2020
play_cols = ['game_id', 'game_date', 'home_team', 'home_score', 'away_team', 'away_score', 'passer_player_name', 'passer_player_id', 'qb_epa']
end_date = {'2004': '2005-01-02', '2005': '2006-01-01', '2006': '2006-12-31', '2007': '2007-12-30', '2008': '2008-12-28', '2009': '2010-01-03', '2010': '2011-01-02', '2011': '2012-01-01', '2012': '2012-12-30', '2013': '2013-12-29', '2014': '2014-12-28', '2015': '2016-01-03', '2016': '2017-01-01', '2017': '2017-12-31', '2018': '2018-12-30', '2019': '2019-12-29', '2020': '2021-01-03', '2021': '2022-01-09'}
plays = []
mvp = {'2004': '00-0010346', '2007': '00-0019596', '2008': '00-0010346', '2009': '00-0010346', '2010': '00-0019596', '2011': '00-0023459', '2013': '00-0010346', '2014': '00-0023459', '2015': '00-0027939', '2016': '00-0026143', '2017': '00-0019596', '2018': '00-0033873', '2019': '00-0034796', '2020': '00-0023459', '2021': '00-0023459'}
years = list(range(2004,2022))
years = [e for e in years if e not in (2005, 2006, 2012)]

for i in years:
    playoff_date = pd.to_datetime(end_date[str(i)])
    # low_memory=False eliminates a warning
    season_data = pd.read_csv('https://github.com/nflverse/nflfastR-data/blob/master/data/play_by_play_' + str(i) + '.csv.gz?raw=True', compression='gzip', low_memory=False)
    # Clean dataset to relevant features
    season_data = season_data[play_cols]
    season_data.dropna(axis=0, how='any', inplace=True)
    season_data.drop_duplicates(inplace=True)
    season_data[['games_played', 'games_won', 'recent_team']] = 1, 0, ' '
    # Filter out games after relevant end date
    season_data['game_date'] = pd.to_datetime(season_data['game_date'])
    season_data = season_data[season_data['game_date'] <= playoff_date]
    season_data = add_teams(i, season_data)
    season_data = add_games_won(season_data)
    games_dict = get_games_dict(season_data)
    new_plays = update_games(games_dict, i)
    # Add MVP
    new_plays[['prob_MVP', 'act_MVP']] = 0
    new_plays.loc[new_plays['player_id'] == mvp[str(i)], 'act_MVP'] = 1
    # Assign rankings
    new_plays = new_plays.groupby(['player_id', 'recent_team', 'season', 'games_played', 'games_won', 'prob_MVP', 'act_MVP'],as_index=False)[['attempts', 'interceptions', 'total_yards', 'total_tds', 'total_epa']].agg('sum')
    new_plays['total_tds_rank'] = new_plays['total_tds'].rank(method='average', ascending=False)
    new_plays['total_epa_rank'] = new_plays['total_epa'].rank(method='average', ascending=False)
    new_plays['total_yards_rank'] = new_plays['total_yards'].rank(method='average', ascending=False)
    new_plays['games_won_rank'] = new_plays['games_won'].rank(method='average', ascending=False)
    new_plays['int_rank'] = new_plays['interceptions'].rank(method='average', ascending=False)
    # Append dataframe to list
    plays.append(new_plays)

# Convert list to dataframe
df = pd.concat(plays)
df.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = value


# 3. Training the Model

In [209]:
# Omitting 'games_played'
# df = df[['player_name','recent_team', 'season', 'act_MVP', 'int_rank', 'total_tds_rank', 'total_epa_rank', 'total_yards_rank', 'games_won_rank']]

In [210]:
df

Unnamed: 0,player_id,recent_team,season,games_played,games_won,prob_MVP,act_MVP,attempts,interceptions,total_yards,total_tds,total_epa,total_tds_rank,total_epa_rank,total_yards_rank,games_won_rank,int_rank
0,00-0001361,BUF,2004,486,0,0,0,450,16,2969,20,-31.915171482245,15.5,28.0,21.0,19.0,9.0
1,00-0001823,NO,2004,585,0,0,0,542,16,3984,25,18.590597648228,11.5,16.0,9.0,19.0,9.0
2,00-0002110,WAS,2004,253,0,0,0,237,6,1256,7,-72.579912131891,33.0,35.0,33.0,19.0,35.0
3,00-0003292,LV,2004,539,0,0,0,513,20,3531,21,-9.758616451538,13.5,22.0,16.0,19.0,3.5
4,00-0003739,MIN,2004,597,0,0,0,623,13,5795,47,182.846453777346,2.0,2.0,1.0,19.0,16.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,00-0036898,HOU,2021,355,0,0,0,329,9,2228,12,-46.069905760991,28.0,31.0,30.0,18.5,22.0
568,00-0036945,CHI,2021,306,0,0,0,270,10,2290,9,-58.200916894737,33.0,35.0,29.0,18.5,18.5
569,00-0036971,JAX,2021,577,0,0,0,543,14,3526,11,-47.358391061321,30.5,32.0,17.0,18.5,3.0
570,00-0036972,NE,2021,489,0,0,0,461,12,3426,18,37.201095363455,22.0,15.0,18.0,18.5,10.0


In [211]:
df2 = df.groupby(['player_id', 'recent_team', 'season', 'games_played', 'games_won', 'prob_MVP', 'act_MVP'],as_index=False)[['attempts', 'interceptions', 'total_yards', 'total_tds', 'total_epa']].agg('sum')

In [212]:
df2 = df2[df2['season'] == 2021]
df2['total_tds_rank'] = df2['total_tds'].rank(method='max', ascending=False)
df2['total_epa_rank'] = df2['total_epa'].rank(method='max', ascending=False)
df2['total_yards_rank'] = df2['total_yards'].rank(method='max', ascending=False)
df2['games_won_rank'] = df2['games_won'].rank(method='max', ascending=False)
df2['int_rank'] = df2['interceptions'].rank(method='min', ascending=False)

In [213]:
df

Unnamed: 0,player_id,recent_team,season,games_played,games_won,prob_MVP,act_MVP,attempts,interceptions,total_yards,total_tds,total_epa,total_tds_rank,total_epa_rank,total_yards_rank,games_won_rank,int_rank
0,00-0001361,BUF,2004,486,0,0,0,450,16,2969,20,-31.915171482245,15.5,28.0,21.0,19.0,9.0
1,00-0001823,NO,2004,585,0,0,0,542,16,3984,25,18.590597648228,11.5,16.0,9.0,19.0,9.0
2,00-0002110,WAS,2004,253,0,0,0,237,6,1256,7,-72.579912131891,33.0,35.0,33.0,19.0,35.0
3,00-0003292,LV,2004,539,0,0,0,513,20,3531,21,-9.758616451538,13.5,22.0,16.0,19.0,3.5
4,00-0003739,MIN,2004,597,0,0,0,623,13,5795,47,182.846453777346,2.0,2.0,1.0,19.0,16.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,00-0036898,HOU,2021,355,0,0,0,329,9,2228,12,-46.069905760991,28.0,31.0,30.0,18.5,22.0
568,00-0036945,CHI,2021,306,0,0,0,270,10,2290,9,-58.200916894737,33.0,35.0,29.0,18.5,18.5
569,00-0036971,JAX,2021,577,0,0,0,543,14,3526,11,-47.358391061321,30.5,32.0,17.0,18.5,3.0
570,00-0036972,NE,2021,489,0,0,0,461,12,3426,18,37.201095363455,22.0,15.0,18.0,18.5,10.0


In [214]:
df2

Unnamed: 0,player_id,recent_team,season,games_played,games_won,prob_MVP,act_MVP,attempts,interceptions,total_yards,total_tds,total_epa,total_tds_rank,total_epa_rank,total_yards_rank,games_won_rank,int_rank
80,00-0019596,TB,2021,653,0,0,0,632,11,4661,39,109.756108475161,1.0,4.0,2.0,36.0,12.0
185,00-0022924,PIT,2021,554,0,0,0,515,8,3383,21,-29.296899262318,16.0,29.0,20.0,36.0,24.0
222,00-0023459,GB,2021,503,0,0,1,475,4,3771,36,117.953248972964,4.0,2.0,10.0,36.0,33.0
302,00-0026143,ATL,2021,538,0,0,0,504,11,3628,20,5.393295881533,19.0,22.0,14.0,36.0,12.0
328,00-0026498,LA,2021,560,0,0,0,534,13,4383,36,94.995887657996,4.0,6.0,5.0,36.0,6.0
367,00-0027973,CHI,2021,164,0,0,0,153,6,1072,6,-5.362241588968,35.0,24.0,35.0,36.0,30.0
400,00-0029263,SEA,2021,378,0,0,0,345,5,2793,19,-1.098593792028,20.0,23.0,25.0,36.0,32.0
410,00-0029604,MIN,2021,567,0,0,0,539,7,4087,31,62.764493194131,8.0,10.0,8.0,36.0,27.0
435,00-0029701,TEN,2021,527,0,0,0,481,14,3592,23,38.947240131966,13.0,14.0,15.0,36.0,1.0
438,00-0030520,NYG,2021,162,0,0,0,156,8,786,5,-50.089546216534,36.0,33.0,36.0,36.0,24.0


In [215]:
df.shape

(572, 17)

In [216]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from scipy.special import softmax

## Split data

In [217]:
feature_cols = ['int_rank', 'total_tds_rank', 'total_epa_rank', 'total_yards_rank', 'games_won_rank']

# Split into training and test sets
train = df[df['season'] < 2017]
test = df[df['season'] >= 2017]
x_train = train[feature_cols]
y_train = train['act_MVP']
x_test = test[feature_cols]
y_test = test['act_MVP']

In [218]:
# Fit the model
logr = LogisticRegression()
logr.fit(x_train, y_train)

LogisticRegression()

In [219]:
# Calculate the model accruacy
score = logr.score(x_test, y_test)
score

0.9736842105263158

## Gather coefficients

In [220]:
print("Features: ", feature_cols)
print("Coefficients: ", logr.coef_)
print("Intercept: ", logr.intercept_)

Features:  ['int_rank', 'total_tds_rank', 'total_epa_rank', 'total_yards_rank', 'games_won_rank']
Coefficients:  [[ 0.11190171 -0.1840512  -0.4738122  -0.19496197  0.14663249]]
Intercept:  [-3.88298092]


# 4. Testing the model

In [230]:
new = df[df['season'] == 2004]
new['prob_MVP'] = logr.predict_proba(new[feature_cols]).tolist()
new['prob_MVP'] = new['prob_MVP'].apply(lambda x: x[1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new['prob_MVP'] = logr.predict_proba(new[feature_cols]).tolist()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new['prob_MVP'] = new['prob_MVP'].apply(lambda x: x[1])


In [231]:
pd.set_option("display.precision", 12)

In [232]:
new = new[['player_id', 'recent_team', 'season', 'prob_MVP', 'act_MVP', 'int_rank', 'total_tds_rank', 'total_epa_rank', 'total_yards_rank', 'games_won_rank']]
new.sort_values(by=['prob_MVP'], ascending=False).head(15)

Unnamed: 0,player_id,recent_team,season,prob_MVP,act_MVP,int_rank,total_tds_rank,total_epa_rank,total_yards_rank,games_won_rank
12,00-0010346,IND,2004,0.495345523208,1,19.0,1.0,1.0,2.0,19.0
4,00-0003739,MIN,2004,0.318369697797,0,16.5,2.0,2.0,1.0,19.0
13,00-0011022,PHI,2004,0.158769223381,0,22.0,3.0,4.0,3.0,19.0
25,00-0020531,LAC,2004,0.036823354808,0,33.5,5.5,5.0,13.0,19.0
20,00-0019596,NE,2004,0.009822947453,0,14.5,4.0,6.0,8.0,19.0
9,00-0006355,KC,2004,0.009356237587,0,6.0,10.0,3.0,5.0,19.0
21,00-0019599,LA,2004,0.001802719656,0,9.0,9.0,8.0,4.0,19.0
6,00-0005106,GB,2004,0.00132792119,0,1.5,5.5,7.0,7.0,19.0
15,00-0013042,DEN,2004,0.000433330413,0,1.5,7.5,9.0,6.0,19.0
5,00-0004161,CAR,2004,0.000263780318,0,12.5,7.5,11.0,10.0,19.0
