In [51]:
import pandas as pd
import numpy as np

In [74]:
pstats = pd.read_csv('./data/player_stats_2005.csv', index_col = "date", parse_dates = True)

In [75]:
pstats.sort_index(inplace=True)
pstats.drop(columns = 'Unnamed: 0', inplace=True)

In [76]:
pstats = pstats[pstats['year'] >= 2010]

In [77]:
pstats.shape

(181675, 45)

In [78]:
pstats = pstats[pstats['game_number'] <= 16]

In [79]:
pstats.shape

(174889, 45)

In [80]:
pstats['home_team'] = np.where(pstats['game_location'] == 'H', pstats['team'], pstats['opponent'])

In [81]:
pstats.reset_index(inplace=True)

In [82]:
pstats.columns

Index(['date', 'player_id', 'year', 'game_number', 'age', 'team',
       'game_location', 'opponent', 'game_won', 'player_team_score',
       'opponent_score', 'passing_attempts', 'passing_completions',
       'passing_yards', 'passing_rating', 'passing_touchdowns',
       'passing_interceptions', 'passing_sacks', 'passing_sacks_yards_lost',
       'rushing_attempts', 'rushing_yards', 'rushing_touchdowns',
       'receiving_targets', 'receiving_receptions', 'receiving_yards',
       'receiving_touchdowns', 'kick_return_attempts', 'kick_return_yards',
       'kick_return_touchdowns', 'punt_return_attempts', 'punt_return_yards',
       'punt_return_touchdowns', 'defense_sacks', 'defense_tackles',
       'defense_tackle_assists', 'defense_interceptions',
       'defense_interception_yards', 'defense_interception_touchdowns',
       'defense_safeties', 'point_after_attemps', 'point_after_makes',
       'field_goal_attempts', 'field_goal_makes', 'punting_attempts',
       'punting_yards', '

In [83]:
pstats = pstats[['date','player_id', 'game_number', 'team', 'home_team', 'opponent', 'game_won', 'player_team_score', 'opponent_score',
    'passing_yards', 'rushing_yards', 'receiving_yards']]

In [84]:
pstats.head(1)

Unnamed: 0,date,player_id,game_number,team,home_team,opponent,game_won,player_team_score,opponent_score,passing_yards,rushing_yards,receiving_yards
0,2010-09-09,3778,1,NOR,NOR,MIN,True,14,9,0,0,0


In [85]:
pstats['date'] = pstats['date'].astype(str)

In [86]:
nfl_yards_agg = pd.DataFrame(pstats.groupby(['date','game_number', 'team']).sum()[['passing_yards','rushing_yards']])
nfl_yards_agg.reset_index(inplace=True)

In [87]:
nfl_yards_agg[nfl_yards_agg['team']== 'STL']

Unnamed: 0,date,game_number,team,passing_yards,rushing_yards
24,2010-09-12,1,STL,253,85
58,2010-09-19,2,STL,167,75
90,2010-09-26,3,STL,235,133
119,2010-10-03,4,STL,289,88
147,2010-10-10,5,STL,215,128
...,...,...,...,...,...
2939,2015-12-06,12,STL,146,66
2970,2015-12-13,13,STL,124,203
2976,2015-12-17,14,STL,234,98
3035,2015-12-27,15,STL,103,102


In [92]:
nfl_yards_agg['team'].nunique()

32

In [89]:
def desc_to_id(id):

    id_dict = {
       
        'GNB': "GB",
        'NOR': 'NO',
        'TAM': 'TB',
        'KAN': 'KC',
        'NWE': 'NE',
        'SFO': 'SF',
        'ARI': 'ARI',
        'ATL': 'ATL',
        'BAL': 'BAL',
        'BUF': 'BUF',
        'CAR': 'CAR',
        'CHI': 'CHI',
        'CIN': 'CIN',
        'CLE': 'CLE',
        'DAL': 'DAL',
        'DEN': 'DEN',
        'DET': 'DET',
        'HOU': 'HOU',
        'IND': 'IND',
        'JAX': 'JAX',
        'LAC': 'LAC',
        'LAR': 'LAR',
        'MIA': 'MIA',        
        'MIN': 'MIN',
        'NYG': 'NYG',
        'NYJ': 'NYJ',
        'OAK': 'OAK',
        'PHI': 'PHI',
        'PIT': 'PIT',
        'SDG': 'LAC',
        'SEA': 'SEA',
        'STL': 'LAR',
        'TEN': 'TEN',
        'WAS': 'WAS',
            }
    
    try:
        return id_dict[id]
    except:
        return 'CHECK!'

In [90]:
nfl_yards_agg['team'] = nfl_yards_agg['team'].apply(desc_to_id)

In [91]:
nfl_yards_agg['team'].nunique()

32

In [97]:
nfl_yards_agg['id'] = nfl_yards_agg['team'] + nfl_yards_agg['date']

In [98]:
nfl_yards_agg.head()

Unnamed: 0,date,game_number,team,passing_yards,rushing_yards,id
0,2010-09-09,1,MIN,171,91,MIN2010-09-09
1,2010-09-09,1,NO,237,79,NO2010-09-09
2,2010-09-12,1,ARI,297,112,ARI2010-09-12
3,2010-09-12,1,ATL,252,58,ATL2010-09-12
4,2010-09-12,1,BUF,139,50,BUF2010-09-12


#### Bringing in Scores data

In [102]:
nfl_scores = pd.read_csv('./data/nfl_scores_agg.csv', index_col=0)

In [103]:
nfl_scores.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,team,score,opponent_score,team_favorite_id,spread_favorite,over_under_line,margin,home_team,id
0,2010-09-09,2010,1,MIN,9.0,14.0,NO,-5.0,49.5,-5.0,NO,MIN2010-09-09
0,2010-09-09,2010,1,NO,14.0,9.0,NO,-5.0,49.5,5.0,NO,NO2010-09-09
10,2010-09-12,2010,1,ARI,17.0,13.0,ARI,-3.0,39.5,4.0,LAR,ARI2010-09-12
8,2010-09-12,2010,1,ATL,9.0,15.0,ATL,-1.5,39.5,-6.0,PIT,ATL2010-09-12
1,2010-09-12,2010,1,BUF,10.0,15.0,MIA,-3.0,39.0,-5.0,BUF,BUF2010-09-12


In [104]:
nfl_agg_agg = pd.merge(nfl_scores, nfl_yards_agg, on = 'id')

In [106]:
nfl_agg_agg.tail(33)

Unnamed: 0,schedule_date,schedule_season,schedule_week,team_x,score,opponent_score,team_favorite_id,spread_favorite,over_under_line,margin,home_team,id,date,game_number,team_y,passing_yards,rushing_yards
3905,2017-11-23,2017,12,DET,23.0,30.0,MIN,-3.0,44.5,-7.0,DET,DET2017-11-23,2017-11-23,11,DET,250,53
3906,2017-11-23,2017,12,LAC,28.0,6.0,LAC,-1.0,48.0,22.0,DAL,LAC2017-11-23,2017-11-23,11,LAC,434,81
3907,2017-11-23,2017,12,MIN,30.0,23.0,MIN,-3.0,44.5,7.0,DET,MIN2017-11-23,2017-11-23,11,MIN,282,136
3908,2017-11-23,2017,12,NYG,10.0,20.0,WAS,-7.5,45.0,-10.0,WAS,NYG2017-11-23,2017-11-23,11,NYG,113,84
3909,2017-11-23,2017,12,WAS,20.0,10.0,WAS,-7.5,45.0,10.0,WAS,WAS2017-11-23,2017-11-23,11,WAS,242,122
3910,2017-11-26,2017,12,ARI,27.0,24.0,JAX,-5.0,38.0,3.0,ARI,ARI2017-11-26,2017-11-26,11,ARI,241,108
3911,2017-11-26,2017,12,ATL,34.0,20.0,ATL,-10.0,48.5,14.0,ATL,ATL2017-11-26,2017-11-26,11,ATL,368,148
3912,2017-11-26,2017,12,BUF,16.0,10.0,KC,-10.0,46.0,6.0,KC,BUF2017-11-26,2017-11-26,11,BUF,183,104
3913,2017-11-26,2017,12,CAR,35.0,27.0,CAR,-5.0,40.0,8.0,NYJ,CAR2017-11-26,2017-11-26,11,CAR,168,145
3914,2017-11-26,2017,12,CHI,3.0,31.0,PHI,-13.5,44.0,-28.0,PHI,CHI2017-11-26,2017-11-26,11,CHI,147,6
