In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [2]:
qb=pd.read_csv('../input/qb/qb_loc_cleaned.csv')

In [3]:
qb.dtypes

year           int64
Name          object
Team          object
Week           int64
Opponent      object
att            int64
comp%        float64
yds            int64
td             int64
rat          float64
def_rk         int64
h/a_1        float64
surface_0    float64
log_att      float64
log_comp     float64
log_yds      float64
salary         int64
points       float64
dtype: object

In [4]:
qb['salary'] = qb['salary'].replace('null', 0)

In [159]:

def create_sorted_dict (df):
    df_sorted = df.sort_values(['year','Name','Week','Opponent'])
    sorted_dict = {}
    for player in df_sorted['Name']:
        sorted_dict['{}'.format(player)] = {'year':df_sorted[df_sorted['Name']==player]['year'],
                                            'points':df_sorted[df_sorted['Name']==player]['points'],
                                            'def_rk':df_sorted[df_sorted['Name']==player]['def_rk'],
                                            'surface_0':df_sorted[df_sorted['Name']==player]['surface_0'],
                                            'h/a_1':df_sorted[df_sorted['Name']== player]['h/a_1'],
                                            'salary':df_sorted[df_sorted['Name']==player]['salary']
                                           }
       
    return sorted_dict

In [6]:
qb_sorted_dict = create_sorted_dict(qb)

In [7]:
qb_sorted_dict.keys()

dict_keys(['AJ McCarron', 'Aaron Rodgers', 'Alex Smith', 'Alex Tanney', 'Andrew Luck', 'Andy Dalton', 'Austin Davis', 'BJ Daniels', 'Ben Roethlisberger', 'Blaine Gabbert', 'Blake Bortles', 'Brandon Weeden', 'Brian Hoyer', 'Brock Osweiler', 'Cam Newton', 'Carson Palmer', 'Case Keenum', 'Charlie Whitehurst', 'Chase Daniel', 'Colin Kaepernick', 'Colt McCoy', 'Dan Orlovsky', 'Derek Anderson', 'Derek Carr', 'Drew Brees', 'Drew Stanton', 'EJ Manuel', 'Eli Manning', 'Geno Smith', 'Jameis Winston', 'Jay Cutler', 'Jimmy Clausen', 'Jimmy Garoppolo', 'Joe Flacco', 'Johnny Manziel', 'Josh Freeman', 'Josh McCown', 'Kellen Clemens', 'Kellen Moore', 'Kirk Cousins', 'Landry Jones', 'Luke McCown', 'Marcus Mariota', 'Mark Sanchez', 'Matt Cassel', 'Matt Hasselbeck', 'Matt McGloin', 'Matt Moore', 'Matt Ryan', 'Matt Schaub', 'Matthew Stafford', 'Michael Vick', 'Nick Foles', 'Peyton Manning', 'Philip Rivers', 'Russell Wilson', 'Ryan Fitzpatrick', 'Ryan Lindley', 'Ryan Mallett', 'Ryan Nassib', 'Ryan Tannehil

In [90]:
qb_sorted_dict['Aaron Rodgers']['points']

3       23.06
38      22.26
72      37.92
107     16.26
138     18.54
168     19.60
229      6.18
259     36.96
288     25.12
321     19.08
349     14.38
386     26.62
421     19.42
457     11.06
493     10.94
529     14.84
569     23.56
601     19.42
637     26.40
702     17.86
732     14.46
765     27.74
797     33.84
827     27.18
858     32.54
886     32.34
916     26.12
950     21.24
984     11.98
1020    40.18
1058    37.20
1098    20.54
1130    23.52
1162    28.82
1196    23.06
1230    24.04
1261     0.72
1552    24.90
1657    24.94
1694    16.04
1726    19.90
1762    17.02
Name: points, dtype: float64

In [8]:
# Create a data frame to investigate the points attribute of the dataset. 


def get_points_df(df):
    total_points = df['points'].sum()
    total_games = df['points'].count()

    val_lst = []
    sal_lst = []
    games_lst = []
    wght_avg=[]
    players = []
    avg_lst=[]

    for name in list(df['Name'].unique()):
        players.append(name)
        player_games = df[df['Name'] ==name].describe().loc['count'][0]
        games_lst.append(player_games)
        avg_points = round(df[df['Name'] ==name].describe()['points'].mean(),2)
        avg_lst.append(avg_points)
        wght_avg.append((round(avg_points + (avg_points* (player_games/total_games)),2)))
        sal_avg= round(df[df['Name'] ==name].describe()['salary'].mean(),2)
        sal_lst.append(sal_avg)
        val = round(sal_avg/avg_points,2)
        val_lst.append(val)
     
    df_eng = pd.DataFrame()
    df_eng['player'] = (df['Name'].unique())
    df_eng['games'] = games_lst
    df_eng['avg_points']= avg_lst
    df_eng['wght_avg'] = wght_avg
    df_eng['sal_avg'] = sal_lst
    df_eng['val'] = val_lst
    df_eng.head()
    
    return df_eng

In [9]:
qb_points = get_points_df(qb)

In [11]:
qb_points.to_csv('../input/qb/qb_points.csv', index=False)

## Group Dataframes

In [12]:
# A function to create a dictonary of grouped data frames

def grouped_dict(csv):
    
    df= pd.read_csv(csv)
    

    yearly_dict = {}
    for year in list(df['year'].unique()):
        yearly_dict['player_{}'.format(year)] = (df[df['year']==year]).drop('year',axis=1).groupby(['Name','Week','Opponent']).sum()
        yearly_dict['player_{}'.format(year)]


    return yearly_dict

        
        

In [13]:
def conv_atts(df):
    df['salary'] = df['salary'].apply(lambda x: int(x))
    df['h/a_1'] = df['h/a_1'].astype('category')
    df['surface_0'] = df['surface_0'].astype('category')
    df['def_rk'] = df['def_rk'].astype('category')

    return df

In [14]:
qb_group = grouped_dict('../input/qb/qb_loc_cleaned.csv')
qb = conv_atts(qb)
qb.dtypes

year            int64
Name           object
Team           object
Week            int64
Opponent       object
att             int64
comp%         float64
yds             int64
td              int64
rat           float64
def_rk       category
h/a_1        category
surface_0    category
log_att       float64
log_comp      float64
log_yds       float64
salary          int64
points        float64
dtype: object

In [61]:
qb_group['player_2015'].columns

Index(['att', 'comp%', 'yds', 'td', 'rat', 'def_rk', 'h/a_1', 'surface_0',
       'log_att', 'log_comp', 'log_yds', 'salary', 'points'],
      dtype='object')

In [15]:
def drop_col (grouped_df):
    for group in grouped_df:
        grouped_df[group].drop('def_rk', axis=1,inplace=True)
        grouped_df[group].drop('h/a_1', axis=1, inplace=True)
        grouped_df[group].drop('surface_0',axis=1, inplace=True)
        grouped_df[group].drop('salary',axis=1, inplace=True)
        grouped_df[group].drop('points',axis=1, inplace=True)
    return grouped_df

In [16]:
temp = drop_col(qb_group)
temp['player_2015'].columns

Index(['att', 'comp%', 'yds', 'td', 'rat', 'log_att', 'log_comp', 'log_yds'], dtype='object')

In [17]:
qb_group['player_2015'].columns

Index(['att', 'comp%', 'yds', 'td', 'rat', 'log_att', 'log_comp', 'log_yds'], dtype='object')

## Career Average

In [18]:
# Week 1 stats are player's career average through 2014 season
# Populate current dataframe with current year averages of players

def career_average (grouped_df1, grouped_df2=None):
    new_grouped_df1 = pd.DataFrame(index= grouped_df1.index)
    for col in grouped_df1: # Iterate through every attribute in the team_stats dataframe
        points=[]  # Create an empty list to hold the season average of the current attribute

        try:
            for player in grouped_df1.index.levels[0]:  # Iterate through every player in the team_stats df
                if player in list(grouped_df2.index.levels[0].unique()):
                    total =[grouped_df2.loc[player][col][-1]] # If player not a rookie, use final average from 2015
                else:
                    total =[]  # create an empty list to hold the weekly attributes value
                
                for week in grouped_df1.loc[player].index: # Iterate throough every week for the current player
                    total.append(grouped_df1.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                    points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list
                    
            new_grouped_df1['{}_car'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe


                                
        except:
            for player in grouped_df1.index.levels[0]:  # Iterate through every player in the team_stats df
                total = []

                for week in grouped_df1.loc[player].index: # Iterate throough every week for the current player
                    total.append(grouped_df1.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                    points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list

            new_grouped_df1['{}_car'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe
    return new_grouped_df1

In [19]:
df_career_2015 = career_average(qb_group['player_2015'])
df_career_2016 = career_average(qb_group['player_2016'], qb_group['player_2015'])
df_career_2017 = career_average(qb_group['player_2017'], qb_group['player_2016'])
df_career_2018 = career_average(qb_group['player_2018'], qb_group['player_2017'])

In [20]:
player_career = pd.concat([df_career_2015, df_career_2016])
player_career = pd.concat([player_career, df_career_2017])
player_career = pd.concat([player_career, df_career_2018])

In [21]:
player_stats = pd.concat([qb_group['player_2015'],qb_group['player_2016']])
player_stats = pd.concat([player_stats,qb_group['player_2017']])
player_stats = pd.concat([player_stats,qb_group['player_2018']])
player_stats.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att,comp%,yds,td,rat,log_att,log_comp,log_yds
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Tom Brady,3,DET,26,53.8,133,1,65.06,1.414973,1.146128,2.123852
Tom Brady,4,MIA,35,65.7,274,3,94.23,1.544068,1.361728,2.437751
Tyrod Taylor,1,PIT,40,37.5,197,1,51.77,1.60206,1.176091,2.294466
Tyrod Taylor,2,NO,30,73.3,246,1,94.58,1.477121,1.342423,2.390935
Tyrod Taylor,3,NYJ,14,28.6,19,0,39.58,1.146128,0.60206,1.278754


In [23]:
qb_stats =  player_career

In [24]:
qb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_car,comp%_car,yds_car,td_car,rat_car,log_att_car,log_comp_car,log_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AJ McCarron,12,LAR,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48
AJ McCarron,13,CLE,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88
AJ McCarron,14,PIT,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4
AJ McCarron,15,SF,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62
AJ McCarron,16,DEN,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76


## Recent Player Stats
Calculate the 4 game moving average of all attributes for each player and insert resluts into positional dataframe

In [25]:
# A function designed to calcuate a player's 4 game moving average in seasons 2015,2016,2017,2018

def moving_average(grouped_df, games):
    new_grouped_df = pd.DataFrame(index = grouped_df.index)
    for col in grouped_df:  # Iterate through every attribute in the team_stats dataframe
        ma4 = []  # Create an empty list to hold the season average of the current attribute
        for player in grouped_df.index.levels[0]:  # Iterate through every player in the team_stats df
            total = [] # INstantiate a deque container with a max length of 3 to hold the three most current games for player
            count = 0 # Counter to track week number 
            for week in grouped_df.loc[player].index: # Iterate through every week for relevant player d
                if count < games: # check count value to start 3 game moving average
                    ma4.append(0)  # week 1 through 3 have 3 game movong average of 0
                    total.append(grouped_df.loc[player].loc[week][col])  # Add the value current player's attribute total from the left
                    count += 1  # Increase count
                else:  # Once 3 game moving average is avaliable
                    ma4.append(np.mean(total))  # Add the average of the 3 games held in totals 3 game moivng average list
                    total.append(grouped_df.loc[player].loc[week][col]) # Replace 3 set block with a first in first out process
        new_grouped_df['{}_ma'.format(col.lower())] = ma4  # Populate databse with 3 game moving average list
       

    return new_grouped_df   

In [26]:
player_ma_2015 = moving_average(qb_group['player_2015'],4)
player_ma_2016 = moving_average(qb_group['player_2016'],4)
player_ma_2017 = moving_average(qb_group['player_2017'],4)
player_ma_2018 = moving_average(qb_group['player_2018'],4)

In [27]:
player_yearly_ma = pd.concat([player_ma_2015, player_ma_2016])
player_yearly_ma = pd.concat([player_yearly_ma,player_ma_2017])
player_yearly_ma = pd.concat([player_yearly_ma,player_ma_2018])

In [28]:
qb_stats = pd.concat([player_yearly_ma,player_career], axis=1)
qb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_ma,comp%_ma,yds_ma,td_ma,rat_ma,log_att_ma,log_comp_ma,log_yds_ma,att_car,comp%_car,yds_car,td_car,rat_car,log_att_car,log_comp_car,log_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AJ McCarron,12,LAR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48
AJ McCarron,13,CLE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88
AJ McCarron,14,PIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4
AJ McCarron,15,SF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62
AJ McCarron,16,DEN,14.25,76.725,123.5,0.75,92.35,0.826123,0.704886,1.621584,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76


## Current Stats
Weekly moving average for every year from 2015 through 2018

In [29]:
# Week 1 stats are player's career average through 2014 season
# Populate current dataframe with current year averages of players

def insert_data (grouped_df):
    new_df = pd.DataFrame(index=grouped_df.index)
    for col in grouped_df: # Iterate through every attribute in the team_stats dataframe
        points=[]  # Create an empty list to hold the season average of the current attribute

        for player in grouped_df.index.levels[0]:  # Iterate through every player in the team_stats df
            total =[]# create an empty list to hold the weekly attributes valu3

            for week in grouped_df.loc[player].index: # Iterate throough every week for the current player
                total.append(grouped_df.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list

        new_df['{}_avg'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe
    return new_df

In [30]:
qb_week_2015 = insert_data(qb_group['player_2015'])
qb_week_2016 = insert_data(qb_group['player_2016'])
qb_week_2017 = insert_data(qb_group['player_2017'])
qb_week_2018 = insert_data(qb_group['player_2018'])

In [31]:
qb_week_2015.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_avg,comp%_avg,yds_avg,td_avg,rat_avg,log_att_avg,log_comp_avg,log_yds_avg
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AJ McCarron,12,LAR,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48
AJ McCarron,13,CLE,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88
AJ McCarron,14,PIT,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4
AJ McCarron,15,SF,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62
AJ McCarron,16,DEN,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76


In [136]:
qb_week_2015['year']=2015
qb_week_2016['year']=2016
qb_week_2017['year']=2017
qb_week_2018['year']=2018

In [137]:
qb_week = pd.concat([qb_week_2015,qb_week_2016])
qb_week = pd.concat([qb_week, qb_week_2017])
qb_week = pd.concat([qb_week, qb_week_2018])

In [141]:

year

Name              Week  Opponent
AJ McCarron       12    LAR         2015
                  13    CLE         2015
                  14    PIT         2015
                  15    SF          2015
                  16    DEN         2015
                  17    BAL         2015
Aaron Rodgers     1     CHI         2015
                  2     SEA         2015
                  3     KC          2015
                  4     SF          2015
                  5     LAR         2015
                  6     LAC         2015
                  8     DEN         2015
                  9     CAR         2015
                  10    DET         2015
                  11    MIN         2015
                  12    CHI         2015
                  13    DET         2015
                  14    DAL         2015
                  15    OAK         2015
                  16    ARI         2015
                  17    MIN         2015
Alex Smith        1     HOU         2015
                  2     

In [34]:
cols = qb_week.columns.tolist()
cols = cols[-1:] +cols[:-1]
qb_week = qb_week[cols]
qb_week.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,year,att_avg,comp%_avg,yds_avg,td_avg,rat_avg,log_att_avg,log_comp_avg,log_yds_avg
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AJ McCarron,12,LAR,2015,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48
AJ McCarron,13,CLE,2015,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88
AJ McCarron,14,PIT,2015,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4
AJ McCarron,15,SF,2015,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62
AJ McCarron,16,DEN,2015,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76


In [35]:
qb_stats = pd.concat([qb_week, qb_stats],axis=1)
qb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,year,att_avg,comp%_avg,yds_avg,td_avg,rat_avg,log_att_avg,log_comp_avg,log_yds_avg,att_ma,...,log_comp_ma,log_yds_ma,att_car,comp%_car,yds_car,td_car,rat_car,log_att_car,log_comp_car,log_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
AJ McCarron,12,LAR,2015,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48,0.0,...,0.0,0.0,1.0,100.0,3.0,0.0,79.17,0.0,0.0,0.48
AJ McCarron,13,CLE,2015,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88,0.0,...,0.0,0.0,2.0,83.35,11.0,0.0,81.6,0.24,0.15,0.88
AJ McCarron,14,PIT,2015,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4,0.0,...,0.0,0.0,12.0,78.5,100.67,0.67,84.61,0.66,0.55,1.4
AJ McCarron,15,SF,2015,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62,0.0,...,0.0,0.0,14.25,76.72,123.5,0.75,92.35,0.83,0.7,1.62
AJ McCarron,16,DEN,2015,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76,14.25,...,0.704886,1.621584,18.4,73.96,138.8,0.8,91.44,0.97,0.83,1.76


In [38]:
qb_stats.columns

Index(['year', 'att_avg', 'comp%_avg', 'yds_avg', 'td_avg', 'rat_avg',
       'log_att_avg', 'log_comp_avg', 'log_yds_avg', 'att_ma', 'comp%_ma',
       'yds_ma', 'td_ma', 'rat_ma', 'log_att_ma', 'log_comp_ma', 'log_yds_ma',
       'att_car', 'comp%_car', 'yds_car', 'td_car', 'rat_car', 'log_att_car',
       'log_comp_car', 'log_yds_car'],
      dtype='object')

In [120]:
qb_stats.to_csv('../input/qb/qb_stats.csv', index=True)

In [90]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

In [39]:
qb_stats.shape[1]

25

In [124]:
qb_stats = pd.read_csv('../input/qb/qb_stats.csv', index_col=[0,1,2])

In [40]:
#A function that will covert a time series database into a supervised learning database

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
            cols.append(df.shift(i))
            names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [41]:
qb_stats.loc['Aaron Rodgers']

Unnamed: 0_level_0,Unnamed: 1_level_0,year,att_avg,comp%_avg,yds_avg,td_avg,rat_avg,log_att_avg,log_comp_avg,log_yds_avg,att_ma,...,log_comp_ma,log_yds_ma,att_car,comp%_car,yds_car,td_car,rat_car,log_att_car,log_comp_car,log_yds_car
Week,Opponent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,CHI,2015,23.0,78.3,189.0,3.0,140.49,1.36,1.26,2.28,0.0,...,0.0,0.0,23.0,78.3,189.0,3.0,140.49,1.36,1.26,2.28
2,SEA,2015,28.0,77.05,219.0,2.5,128.68,1.44,1.33,2.34,0.0,...,0.0,0.0,28.0,77.05,219.0,2.5,128.68,1.44,1.33,2.34
3,KC,2015,30.33,74.23,257.0,3.33,131.93,1.47,1.34,2.4,0.0,...,0.0,0.0,30.33,74.23,257.0,3.33,131.93,1.47,1.34,2.4
4,SF,2015,30.75,72.88,248.75,2.75,123.69,1.48,1.34,2.39,0.0,...,0.0,0.0,30.75,72.88,248.75,2.75,123.69,1.48,1.34,2.39
5,LAR,2015,30.6,70.96,247.2,2.6,115.51,1.48,1.33,2.39,30.75,...,1.343962,2.386338,30.6,70.96,247.2,2.6,115.51,1.48,1.33,2.39
6,LAC,2015,30.33,68.33,248.5,2.5,114.2,1.48,1.31,2.39,30.6,...,1.33092,2.385474,30.33,68.33,248.5,2.5,114.2,1.48,1.31,2.39
8,DEN,2015,29.14,67.66,224.0,2.14,107.85,1.46,1.29,2.32,30.333333,...,1.309787,2.388985,29.14,67.66,224.0,2.14,107.85,1.46,1.29,2.32
9,CAR,2015,31.5,65.71,242.12,2.38,106.44,1.49,1.3,2.35,29.142857,...,1.286407,2.3172,31.5,65.71,242.12,2.38,106.44,1.49,1.3,2.35
10,DET,2015,34.78,64.79,252.22,2.33,103.9,1.52,1.33,2.37,31.5,...,1.300349,2.348428,34.78,64.79,252.22,2.33,103.9,1.52,1.33,2.37
11,MIN,2015,34.7,63.02,248.2,2.3,102.2,1.52,1.32,2.36,34.777778,...,1.327428,2.367764,34.7,63.02,248.2,2.3,102.2,1.52,1.32,2.36


In [44]:
points_dict = {}
reframed_dict = {}
for player in qb_stats.index.levels[0]:
    #points_dict['{}'.format(player)]['points'] = qb_stats.loc[player]['points'][0]
    #points_dict['{}'.format(player)]['def_rk'] = qb_stats.loc[player]['def_rk']
    #points_dict['{}'.format(player)]['h/a_1'] = qb_stats.loc[player]['h/a_1']
    #points_dict['{}'.format(player)]['surface_0']= qb_stats.loc[player]['surface_0']
    #points_dict['{}'.format(player)]['salary']=qb_stats.loc[player]['salary']
    #rop_list = ['points','def_rk','h/a_1','surface_0','salary']
    #qb_stats.drop(drop_list, axis=1, inplace=True)
    reframed_dict['{}_reframed'.format(player)]= series_to_supervised(qb_stats.loc[player])

In [46]:
reframed_dict['Aaron Rodgers_reframed'][['var1(t-1)','var1(t)']]

Unnamed: 0_level_0,Unnamed: 1_level_0,var1(t-1),var1(t)
Week,Opponent,Unnamed: 2_level_1,Unnamed: 3_level_1
2,SEA,23.0,28.0
3,KC,28.0,30.33
4,SF,30.33,30.75
5,LAR,30.75,30.6
6,LAC,30.6,30.33
8,DEN,30.33,29.14
9,CAR,29.14,31.5
10,DET,31.5,34.78
11,MIN,34.78,34.7
12,CHI,34.7,35.45


In [158]:
# A function to insert the non reframed columns back into the stats dataframe 

def _insert_non_reframed(df, col_dict):
    non_reframed = ['year','def_rk','h/a_1','surface_0','salary','points']
    for player in df.index.levels[0]:
        for col in non_reframed:
            non_reframed_dict['{}_reframed'.format(player)][col] = non_reframed_dict['{}'.format(player)][col][1:].values

In [109]:
reframed_dict['Aaron Rodgers_reframed']

Unnamed: 0_level_0,Unnamed: 1_level_0,var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var9(t-1),var10(t-1),...,var21(t),var22(t),var23(t),var24(t),def_rk,h/a_1,surface_0,salary,points,year
Week,Opponent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2,SEA,23.0,78.3,189.0,3.0,140.49,1.36,1.26,2.28,0.0,0.0,...,128.68,1.44,1.33,2.34,2,1.0,1.0,7100,22.26,2015
3,KC,28.0,77.05,219.0,2.5,128.68,1.44,1.33,2.34,0.0,0.0,...,131.93,1.47,1.34,2.4,9,1.0,1.0,6000,37.92,2015
4,SF,30.33,74.23,257.0,3.33,131.93,1.47,1.34,2.4,0.0,0.0,...,123.69,1.48,1.34,2.39,16,0.0,1.0,7200,16.26,2015
5,LAR,30.75,72.88,248.75,2.75,123.69,1.48,1.34,2.39,0.0,0.0,...,115.51,1.48,1.33,2.39,10,1.0,1.0,7900,18.54,2015
6,LAC,30.6,70.96,247.2,2.6,115.51,1.48,1.33,2.39,30.75,72.875,...,114.2,1.48,1.31,2.39,13,1.0,1.0,8100,19.6,2015
8,DEN,30.33,68.33,248.5,2.5,114.2,1.48,1.31,2.39,30.6,70.96,...,107.85,1.46,1.29,2.32,1,0.0,1.0,7500,6.18,2015
9,CAR,29.14,67.66,224.0,2.14,107.85,1.46,1.29,2.32,30.333333,68.333333,...,106.44,1.49,1.3,2.35,6,0.0,1.0,8500,36.96,2015
10,DET,31.5,65.71,242.12,2.38,106.44,1.49,1.3,2.35,29.142857,67.657143,...,103.9,1.52,1.33,2.37,23,1.0,1.0,6900,25.12,2015
11,MIN,34.78,64.79,252.22,2.33,103.9,1.52,1.33,2.37,31.5,65.7125,...,102.2,1.52,1.32,2.36,11,0.0,0.0,5000,19.08,2015
12,CHI,34.7,63.02,248.2,2.3,102.2,1.52,1.32,2.36,34.777778,64.788889,...,98.58,1.53,1.32,2.36,25,1.0,1.0,5300,14.38,2015


In [157]:
reframed_dict['Aaron Rodgers_reframed']['year']

Week  Opponent
2     SEA         2015
3     KC          2015
4     SF          2015
5     LAR         2015
6     LAC         2015
8     DEN         2015
9     CAR         2015
10    DET         2015
11    MIN         2015
12    CHI         2015
13    DET         2015
14    DAL         2015
15    OAK         2015
16    ARI         2015
17    MIN         2015
1     JAX         2016
2     MIN         2016
3     DET         2016
5     NYG         2016
6     DAL         2016
7     CHI         2016
8     ATL         2016
9     IND         2016
10    TEN         2016
11    WAS         2016
12    PHI         2016
14    SEA         2016
15    CHI         2016
16    MIN         2016
17    DET         2016
1     SEA         2017
2     ATL         2017
3     CIN         2017
4     CHI         2017
5     DAL         2017
6     MIN         2017
15    CAR         2017
1     CHI         2018
2     MIN         2018
3     WAS         2018
4     BUF         2018
Name: year, dtype: int64

In [162]:
qb_stats = pd.concat(reframed_dict)

In [71]:
train  = qb
test = week1

In [122]:
qb_stats.columns

Index(['year', 'att_avg', 'comp%_avg', 'yds_avg', 'td_avg', 'rat_avg',
       'log_att_avg', 'log_comp_avg', 'att_ma', 'comp%_ma', 'yds_ma', 'td_ma',
       'rat_ma', 'log_att_ma', 'log_comp_ma', 'log_yds_ma', 'att_car',
       'comp%_car', 'yds_car', 'td_car', 'rat_car', 'log_att_car',
       'log_comp_car', 'log_yds_car'],
      dtype='object')

In [73]:
week1.columns

Index(['Player', 'Team', 'Week', 'Opp', 'Comp', 'Att', 'Pct', 'Yds', 'Yds/Att',
       'TD', 'Int', 'ru_att', 'ru_yds', 'yds/ru_att', 'ru_td', 'points',
       'year', 'def_ru_rk', 'def_pass_rk'],
      dtype='object')

In [74]:
y_train = train['points']
y_test = test['points']
X_train = train.drop('points',axis=1)
X_test = test.drop('points', axis=1)

In [75]:
X_train = X_train[X_train.columns[4:]]
X_test = X_test[X_test.columns[4:]]

In [348]:

X_train = X_train[X_train.columns[4:11]]
X_test = X_test[X_test.columns[4:11]]

In [76]:
X_train.shape

(2729, 14)

In [77]:
X_test.shape

(57, 14)

In [34]:
X_train.columns

Index(['Comp', 'Att', 'Pct', 'Yds', 'Yds/Att', 'TD', 'Int', 'ru_att', 'ru_yds',
       'yds/ru_att', 'ru_td', 'year', 'def_ru_rk', 'def_pass_rk'],
      dtype='object')

In [78]:
reframed = series_to_supervised(X_train.values.astype('float32'),1,1)

In [79]:
lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [80]:
coeff = pd.DataFrame(X_train.columns)
coeff['coefficients'] = lr.coef_
coeff



Unnamed: 0,0,coefficients
0,Comp,0.029938
1,Att,-0.076581
2,Pct,-0.013013
3,Yds,0.058688
4,Yds/Att,-0.179413
5,TD,4.002376
6,Int,-1.242807
7,ru_att,-0.086593
8,ru_yds,0.114284
9,yds/ru_att,-0.025317


In [81]:
yhat = lr.predict(X_test)
yhat.shape

(57,)

In [359]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

rmse_val = rmse(yhat, y_test)
print("RMSE error is: " + str(rmse_val))

RMSE error is: 1.68578446369


In [82]:
results = pd.DataFrame()
results['Player'] = week1['Player']
#results['score'] = y_test
results['Predicted'] = yhat
#results['Week'] = qb[qb['year']==2017]['Week']
#results.groupby('Player').mean()[['score','Predicted']]
results

Unnamed: 0,Player,Predicted
0,Aaron Rodgers,22.622505
1,Drew Brees,22.93539
2,Tom Brady,22.578469
3,Deshaun Watson,21.599166
4,Cam Newton,20.770403
5,Matthew Stafford,21.079952
6,Ben Roethlisberger,20.937474
7,Andrew Luck,20.630576
8,Philip Rivers,20.697301
9,Kirk Cousins,20.45094


In [323]:
results.shape

(300, 3)

In [84]:
results.to_csv('2018_week1_qb.csv', index= False)