In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [114]:
rb = pd.read_csv('../input/rb/rb_loc_cleaned.csv')

In [115]:
rb.dtypes

year           int64
Name          object
Team          object
Week           int64
Opponent      object
att            int64
yds            int64
td             int64
tar            int64
rec_yds        int64
def_rk         int64
h/a_1        float64
surface_0    float64
salary         int64
points       float64
dtype: object

In [116]:
rb['salary'] = rb['salary'].replace('null', 0)

In [117]:

def create_sorted_dict (df):
    df_sorted = df.sort_values(['year','Name','Week','Opponent'])
    sorted_dict = {}
    for player in df_sorted['Name']:
        sorted_dict['{}'.format(player)] = {'year':df_sorted[df_sorted['Name']==player]['year'],
                                            'points':df_sorted[df_sorted['Name']==player]['points'],
                                            'def_rk':df_sorted[df_sorted['Name']==player]['def_rk'],
                                            'surface_0':df_sorted[df_sorted['Name']==player]['surface_0'],
                                            'h/a_1':df_sorted[df_sorted['Name']== player]['h/a_1'],
                                            'salary':df_sorted[df_sorted['Name']==player]['salary']
                                           }
       
    return sorted_dict

In [118]:
rb_sorted_dict = create_sorted_dict(rb)

In [120]:
rb_sorted_dict.keys()

dict_keys(['Adrian Peterson', 'Ahmad Bradshaw', 'Akeem Hunt', 'Alfred Blue', 'Alfred Morris', 'Alonzo Harris', 'Ameer Abdullah', 'Andre Ellington', 'Andre Williams', 'Anthony Dixon', 'Anthony Sherman', 'Antone Smith', 'Antonio Andrews', 'Arian Foster', 'Austin Johnson', 'Benjamin Cunningham', 'Bernard Pierce', 'Bilal Powell', 'Bishop Sankey', 'Bobby Rainey', 'Branden Oliver', 'Brandon Bolden', 'Bruce Miller', 'Bryce Brown', 'CJ Anderson', 'CJ Spiller', 'Cameron Artis-Payne', 'Carlos Hyde', 'Charcandrick West', 'Charles Sims', 'Chris Ivory', 'Chris Johnson', 'Chris Polk', 'Chris Thompson', 'Christine Michael', 'Cierre Wood', 'Corey Grant', 'Damien Williams', 'Dan Herron', 'Danny Woodhead', 'Darrel Young', 'Darren McFadden', 'Darren Sproles', 'David Cobb', 'David Johnson', 'DeAngelo Williams', 'DeMarco Murray', 'Denard Robinson', 'Derrick Coleman', 'Devonta Freeman', 'Dexter McCluster', 'Dion Lewis', 'Donald Brown', 'Doug Martin', 'DuJuan Harris', 'Duke Johnson', 'Eddie Lacy', 'Fitzgeral

In [27]:
rb_sorted_dict['Jordan Howard']['points']

1374     5.1
1445    13.2
1506    19.2
1564    28.3
3903    12.0
1690     2.2
1742    33.2
1849    12.0
1905    10.9
1970    15.7
2031    32.7
4011    13.0
2161    21.3
4033    16.8
2296    16.5
2361    15.6
2433     0.7
2509    35.6
4078    14.3
2656     7.6
2721    21.6
2787     6.5
2842    18.1
2969     5.4
3029    21.5
3093     3.9
3159     4.3
3225    31.5
3295    10.3
3362    16.4
3427     2.4
3498    15.7
3573     9.8
3638    16.1
3704     2.5
Name: points, dtype: float64

In [13]:
# Create a data frame to investigate the points attribute of the dataset. 


def get_points_df(df):
    total_points = df['points'].sum()
    total_games = df['points'].count()

    val_lst = []
    sal_lst = []
    games_lst = []
    wght_avg=[]
    players = []
    avg_lst=[]

    for name in list(df['Name'].unique()):
        players.append(name)
        player_games = df[df['Name'] ==name].describe().loc['count'][0]
        games_lst.append(player_games)
        avg_points = round(df[df['Name'] ==name].describe()['points'].mean(),2)
        avg_lst.append(avg_points)
        wght_avg.append((round(avg_points + (avg_points* (player_games/total_games)),2)))
        sal_avg= round(df[df['Name'] ==name].describe()['salary'].mean(),2)
        sal_lst.append(sal_avg)
        val = round(sal_avg/avg_points,2)
        val_lst.append(val)
     
    df_eng = pd.DataFrame()
    df_eng['player'] = (df['Name'].unique())
    df_eng['games'] = games_lst
    df_eng['avg_points']= avg_lst
    df_eng['wght_avg'] = wght_avg
    df_eng['sal_avg'] = sal_lst
    df_eng['val'] = val_lst
    df_eng.head()
    
    return df_eng

In [14]:
rb_points = get_points_df(rb)

In [15]:
rb_points.to_csv('../input/rb/rb_points.csv', index=False)

## Group Dataframes

In [16]:
# A function to create a dictonary of grouped data frames

def grouped_dict(csv):
    
    df= pd.read_csv(csv)
    

    yearly_dict = {}
    for year in list(df['year'].unique()):
        yearly_dict['player_{}'.format(year)] = (df[df['year']==year]).drop('year',axis=1).groupby(['Name','Week','Opponent']).sum()
        yearly_dict['player_{}'.format(year)]


    return yearly_dict

        
        

In [17]:
def conv_atts(df):
    df['salary'] = df['salary'].apply(lambda x: int(x))
    df['h/a_1'] = df['h/a_1'].astype('category')
    df['surface_0'] = df['surface_0'].astype('category')
    df['def_rk'] = df['def_rk'].astype('category')

    return df

In [28]:
rb_group = grouped_dict('../input/rb/rb_loc_cleaned.csv')
rb = conv_atts(rb)
rb.dtypes

year            int64
Name           object
Team           object
Week            int64
Opponent       object
att             int64
yds             int64
td              int64
tar             int64
rec_yds         int64
def_rk       category
h/a_1        category
surface_0    category
salary          int64
points        float64
dtype: object

In [29]:
rb_group['player_2015'].columns

Index(['att', 'yds', 'td', 'tar', 'rec_yds', 'def_rk', 'h/a_1', 'surface_0',
       'salary', 'points'],
      dtype='object')

In [30]:
def drop_col (grouped_df):
    for group in grouped_df:
        grouped_df[group].drop('def_rk', axis=1,inplace=True)
        grouped_df[group].drop('h/a_1', axis=1, inplace=True)
        grouped_df[group].drop('surface_0',axis=1, inplace=True)
        grouped_df[group].drop('salary',axis=1, inplace=True)
        grouped_df[group].drop('points',axis=1, inplace=True)
    return grouped_df

In [31]:
rb = drop_col(rb_group)
rb['player_2015'].columns

Index(['att', 'yds', 'td', 'tar', 'rec_yds'], dtype='object')

## Career Average

In [32]:
# Week 1 stats are player's career average through 2014 season
# Populate current dataframe with current year averages of players

def career_average (grouped_df1, grouped_df2=None):
    new_grouped_df1 = pd.DataFrame(index= grouped_df1.index)
    for col in grouped_df1: # Iterate through every attribute in the team_stats dataframe
        points=[]  # Create an empty list to hold the season average of the current attribute

        try:
            for player in grouped_df1.index.levels[0]:  # Iterate through every player in the team_stats df
                if player in list(grouped_df2.index.levels[0].unique()):
                    total =[grouped_df2.loc[player][col][-1]] # If player not a rookie, use final average from 2015
                else:
                    total =[]  # create an empty list to hold the weekly attributes value
                
                for week in grouped_df1.loc[player].index: # Iterate throough every week for the current player
                    total.append(grouped_df1.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                    points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list
                    
            new_grouped_df1['{}_car'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe


                                
        except:
            for player in grouped_df1.index.levels[0]:  # Iterate through every player in the team_stats df
                total = []

                for week in grouped_df1.loc[player].index: # Iterate throough every week for the current player
                    total.append(grouped_df1.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                    points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list

            new_grouped_df1['{}_car'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe
    return new_grouped_df1

In [34]:
df_career_2015 = career_average(rb_group['player_2015'])
df_career_2016 = career_average(rb_group['player_2016'], rb_group['player_2015'])
df_career_2017 = career_average(rb_group['player_2017'], rb_group['player_2016'])
df_career_2018 = career_average(rb_group['player_2018'], rb_group['player_2017'])

In [35]:
player_career = pd.concat([df_career_2015, df_career_2016])
player_career = pd.concat([player_career, df_career_2017])
player_career = pd.concat([player_career, df_career_2018])

In [37]:
player_stats = pd.concat([rb_group['player_2015'],rb_group['player_2016']])
player_stats = pd.concat([player_stats,rb_group['player_2017']])
player_stats = pd.concat([player_stats,rb_group['player_2018']])
player_stats.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att,yds,td,tar,rec_yds
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Wayne Gallman,3,HOU,6,25,0,1,0
Wayne Gallman,4,NO,2,6,0,3,17
Wendell Smallwood,2,TB,7,28,0,1,2
Wendell Smallwood,3,IND,10,56,1,5,35
Wendell Smallwood,4,TEN,5,39,0,5,15


In [40]:
rb_stats =  player_career

In [41]:
rb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_car,yds_car,td_car,tar_car,rec_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adrian Peterson,1,SF,10.0,31.0,0.0,3.0,21.0
Adrian Peterson,2,DET,19.5,82.5,0.0,2.5,39.5
Adrian Peterson,3,LAC,19.67,97.0,0.67,1.67,26.33
Adrian Peterson,4,DEN,18.75,93.0,0.75,2.75,23.0
Adrian Peterson,6,KC,20.2,86.4,0.6,2.4,17.8


## Recent Player Stats
Calculate the 4 game moving average of all attributes for each player and insert resluts into positional dataframe

In [42]:
# A function designed to calcuate a player's 4 game moving average in seasons 2015,2016,2017,2018

def moving_average(grouped_df, games):
    new_grouped_df = pd.DataFrame(index = grouped_df.index)
    for col in grouped_df:  # Iterate through every attribute in the team_stats dataframe
        ma4 = []  # Create an empty list to hold the season average of the current attribute
        for player in grouped_df.index.levels[0]:  # Iterate through every player in the team_stats df
            total = [] # INstantiate a deque container with a max length of 3 to hold the three most current games for player
            count = 0 # Counter to track week number 
            for week in grouped_df.loc[player].index: # Iterate through every week for relevant player d
                if count < games: # check count value to start 3 game moving average
                    ma4.append(0)  # week 1 through 3 have 3 game movong average of 0
                    total.append(grouped_df.loc[player].loc[week][col])  # Add the value current player's attribute total from the left
                    count += 1  # Increase count
                else:  # Once 3 game moving average is avaliable
                    ma4.append(np.mean(total))  # Add the average of the 3 games held in totals 3 game moivng average list
                    total.append(grouped_df.loc[player].loc[week][col]) # Replace 3 set block with a first in first out process
        new_grouped_df['{}_ma'.format(col.lower())] = ma4  # Populate databse with 3 game moving average list
       

    return new_grouped_df   

In [43]:
player_ma_2015 = moving_average(rb_group['player_2015'],4)
player_ma_2016 = moving_average(rb_group['player_2016'],4)
player_ma_2017 = moving_average(rb_group['player_2017'],4)
player_ma_2018 = moving_average(rb_group['player_2018'],4)

In [44]:
player_yearly_ma = pd.concat([player_ma_2015, player_ma_2016])
player_yearly_ma = pd.concat([player_yearly_ma,player_ma_2017])
player_yearly_ma = pd.concat([player_yearly_ma,player_ma_2018])

In [45]:
rb_stats = pd.concat([player_yearly_ma,player_career], axis=1)
rb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_ma,yds_ma,td_ma,tar_ma,rec_yds_ma,att_car,yds_car,td_car,tar_car,rec_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Adrian Peterson,1,SF,0.0,0.0,0.0,0.0,0.0,10.0,31.0,0.0,3.0,21.0
Adrian Peterson,2,DET,0.0,0.0,0.0,0.0,0.0,19.5,82.5,0.0,2.5,39.5
Adrian Peterson,3,LAC,0.0,0.0,0.0,0.0,0.0,19.67,97.0,0.67,1.67,26.33
Adrian Peterson,4,DEN,0.0,0.0,0.0,0.0,0.0,18.75,93.0,0.75,2.75,23.0
Adrian Peterson,6,KC,18.75,93.0,0.75,2.75,23.0,20.2,86.4,0.6,2.4,17.8


## Current Stats
Weekly moving average for every year from 2015 through 2018

In [46]:
# Week 1 stats are player's career average through 2014 season
# Populate current dataframe with current year averages of players

def insert_data (grouped_df):
    new_df = pd.DataFrame(index=grouped_df.index)
    for col in grouped_df: # Iterate through every attribute in the team_stats dataframe
        points=[]  # Create an empty list to hold the season average of the current attribute

        for player in grouped_df.index.levels[0]:  # Iterate through every player in the team_stats df
            total =[]# create an empty list to hold the weekly attributes valu3

            for week in grouped_df.loc[player].index: # Iterate throough every week for the current player
                total.append(grouped_df.loc[player].loc[week][col]) # Add player's value for the current week's attribute to the total list
                points.append(round(np.mean(total),2))  # Add the average of the total list to the season average list

        new_df['{}_avg'.format(col.lower())] = points # Insert season average list for current attribute into the current stats dataframe
    return new_df

In [47]:
rb_week_2015 = insert_data(rb_group['player_2015'])
rb_week_2016 = insert_data(rb_group['player_2016'])
rb_week_2017 = insert_data(rb_group['player_2017'])
rb_week_2018 = insert_data(rb_group['player_2018'])

In [48]:
rb_week_2015.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,att_avg,yds_avg,td_avg,tar_avg,rec_yds_avg
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adrian Peterson,1,SF,10.0,31.0,0.0,3.0,21.0
Adrian Peterson,2,DET,19.5,82.5,0.0,2.5,39.5
Adrian Peterson,3,LAC,19.67,97.0,0.67,1.67,26.33
Adrian Peterson,4,DEN,18.75,93.0,0.75,2.75,23.0
Adrian Peterson,6,KC,20.2,86.4,0.6,2.4,17.8


In [49]:
rb_week_2015['year']=2015
rb_week_2016['year']=2016
rb_week_2017['year']=2017
rb_week_2018['year']=2018

In [50]:
rb_week = pd.concat([rb_week_2015,rb_week_2016])
rb_week = pd.concat([rb_week, rb_week_2017])
rb_week = pd.concat([rb_week, rb_week_2018])

In [59]:
year = rb_week['year']
year

Name               Week  Opponent
Adrian Peterson    1     SF          2015
                   2     DET         2015
                   3     LAC         2015
                   4     DEN         2015
                   6     KC          2015
                   7     DET         2015
                   8     CHI         2015
                   9     LAR         2015
                   10    OAK         2015
                   11    GB          2015
                   12    ATL         2015
                   13    SEA         2015
                   14    ARI         2015
                   15    CHI         2015
                   16    NYG         2015
                   17    GB          2015
Ahmad Bradshaw     6     NE          2015
                   7     NO          2015
                   8     CAR         2015
                   9     DEN         2015
                   11    ATL         2015
                   12    TB          2015
Akeem Hunt         11    NYJ         2015


In [52]:
cols = rb_week.columns.tolist()
cols = cols[-1:] +cols[:-1]
rb_week = rb_week[cols]
rb_week.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,year,att_avg,yds_avg,td_avg,tar_avg,rec_yds_avg
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adrian Peterson,1,SF,2015,10.0,31.0,0.0,3.0,21.0
Adrian Peterson,2,DET,2015,19.5,82.5,0.0,2.5,39.5
Adrian Peterson,3,LAC,2015,19.67,97.0,0.67,1.67,26.33
Adrian Peterson,4,DEN,2015,18.75,93.0,0.75,2.75,23.0
Adrian Peterson,6,KC,2015,20.2,86.4,0.6,2.4,17.8


In [53]:
rb_stats = pd.concat([rb_week, rb_stats],axis=1)
rb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,year,att_avg,yds_avg,td_avg,tar_avg,rec_yds_avg,att_ma,yds_ma,td_ma,tar_ma,rec_yds_ma,att_car,yds_car,td_car,tar_car,rec_yds_car
Name,Week,Opponent,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Adrian Peterson,1,SF,2015,10.0,31.0,0.0,3.0,21.0,0.0,0.0,0.0,0.0,0.0,10.0,31.0,0.0,3.0,21.0
Adrian Peterson,2,DET,2015,19.5,82.5,0.0,2.5,39.5,0.0,0.0,0.0,0.0,0.0,19.5,82.5,0.0,2.5,39.5
Adrian Peterson,3,LAC,2015,19.67,97.0,0.67,1.67,26.33,0.0,0.0,0.0,0.0,0.0,19.67,97.0,0.67,1.67,26.33
Adrian Peterson,4,DEN,2015,18.75,93.0,0.75,2.75,23.0,0.0,0.0,0.0,0.0,0.0,18.75,93.0,0.75,2.75,23.0
Adrian Peterson,6,KC,2015,20.2,86.4,0.6,2.4,17.8,18.75,93.0,0.75,2.75,23.0,20.2,86.4,0.6,2.4,17.8


In [60]:
rb_stats.drop('year', axis=1, inplace=True)

In [61]:
rb_stats.columns

Index(['att_avg', 'yds_avg', 'td_avg', 'tar_avg', 'rec_yds_avg', 'att_ma',
       'yds_ma', 'td_ma', 'tar_ma', 'rec_yds_ma', 'att_car', 'yds_car',
       'td_car', 'tar_car', 'rec_yds_car'],
      dtype='object')

In [54]:
#A function that will covert a time series database into a supervised learning database

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
            cols.append(df.shift(i))
            names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [72]:
rb_stats.loc['Jordan Howard'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,att_avg,yds_avg,td_avg,tar_avg,rec_yds_avg,att_ma,yds_ma,td_ma,tar_ma,rec_yds_ma,att_car,yds_car,td_car,tar_car,rec_yds_car
Week,Opponent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2,PHI,3.0,22.0,0.0,2.0,9.0,0.0,0.0,0.0,0.0,0.0,3.0,22.0,0.0,2.0,9.0
3,DAL,6.0,33.5,0.0,4.0,28.0,0.0,0.0,0.0,0.0,0.0,6.0,33.5,0.0,4.0,28.0
4,DET,11.67,59.33,0.0,4.0,25.67,0.0,0.0,0.0,0.0,0.0,11.67,59.33,0.0,4.0,25.67
5,IND,12.75,74.0,0.0,3.75,30.5,0.0,0.0,0.0,0.0,0.0,12.75,74.0,0.0,3.75,30.5
6,JAX,13.2,66.0,0.2,3.8,25.6,12.75,74.0,0.0,3.75,30.5,13.2,66.0,0.2,3.8,25.6


In [62]:
points_dict = {}
reframed_dict = {}
for player in rb_stats.index.levels[0]:
    #points_dict['{}'.format(player)]['points'] = qb_stats.loc[player]['points'][0]
    #points_dict['{}'.format(player)]['def_rk'] = qb_stats.loc[player]['def_rk']
    #points_dict['{}'.format(player)]['h/a_1'] = qb_stats.loc[player]['h/a_1']
    #points_dict['{}'.format(player)]['surface_0']= qb_stats.loc[player]['surface_0']
    #points_dict['{}'.format(player)]['salary']=qb_stats.loc[player]['salary']
    #rop_list = ['points','def_rk','h/a_1','surface_0','salary']
    #qb_stats.drop(drop_list, axis=1, inplace=True)
    reframed_dict['{}_reframed'.format(player)]= series_to_supervised(rb_stats.loc[player])

In [77]:
reframed_dict.keys()

dict_keys(['Aaron Jones_reframed', 'Aaron Ripkowski_reframed', 'Adrian Peterson_reframed', 'Ahmad Bradshaw_reframed', 'Akeem Hunt_reframed', 'Alex Armah_reframed', 'Alex Collins_reframed', 'Alfred Blue_reframed', 'Alfred Morris_reframed', 'Alonzo Harris_reframed', 'Alvin Kamara_reframed', 'Ameer Abdullah_reframed', 'Andre Ellington_reframed', 'Andre Williams_reframed', 'Andy Janovich_reframed', 'Anthony Dixon_reframed', 'Anthony Sherman_reframed', 'Antone Smith_reframed', 'Antonio Andrews_reframed', 'Arian Foster_reframed', 'Austin Ekeler_reframed', 'Austin Johnson_reframed', 'Benjamin Cunningham_reframed', 'Bernard Pierce_reframed', 'Bilal Powell_reframed', 'Bishop Sankey_reframed', 'Bobby Rainey_reframed', 'Branden Oliver_reframed', 'Brandon Bolden_reframed', 'Brandon Burks_reframed', 'Brandon Wilds_reframed', 'Brian Hill_reframed', 'Bronson Hill_reframed', 'Bruce Miller_reframed', 'Bryce Brown_reframed', 'Byron Marshall_reframed', 'CJ Anderson_reframed', 'CJ Ham_reframed', 'CJ Prosi

In [150]:
# A function to insert the non reframed columns back into the stats dataframe 

def insert_non_reframed(df, col_dict):
    non_reframed = ['year','def_rk','h/a_1','surface_0','salary','points']
    for player in df.index.levels[0]:
        for col in non_reframed:
            reframed_dict['{}_reframed'.format(player)][col] = col_dict['{}'.format(player)][col][1:].val

In [139]:
non_reframed = ['year','def_rk','h/a_1','surface_0','salary','points']
for player in rb_stats.index.levels[0]:
    for col in non_reframed:
        reframed_dict['{}_reframed'.format(player)][col] = rb_sorted_dict['{}'.format(player)][col][1:].value

In [148]:
reframed_dict['Aaron Jones_reframed']

Unnamed: 0_level_0,Unnamed: 1_level_0,var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var9(t-1),var10(t-1),...,var6(t),var7(t),var8(t),var9(t),var10(t),var11(t),var12(t),var13(t),var14(t),var15(t)
Week,Opponent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
5,DAL,13.0,49.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,16.0,87.0,1.0,0.5,4.5
6,MIN,16.0,87.0,1.0,0.5,4.5,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,15.0,71.67,0.67,1.67,3.33
7,NO,15.0,71.67,0.67,1.67,3.33,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,15.5,86.5,0.75,2.5,4.25
9,DET,15.5,86.5,0.75,2.5,4.25,0.0,0.0,0.0,0.0,0.0,...,15.5,86.5,0.75,2.5,4.25,13.4,71.6,0.6,3.0,3.2
10,CHI,13.4,71.6,0.6,3.0,3.2,15.5,86.5,0.75,2.5,4.25,...,13.4,71.6,0.6,3.0,3.2,11.67,61.67,0.5,2.67,2.67
13,TB,11.67,61.67,0.5,2.67,2.67,13.4,71.6,0.6,3.0,3.2,...,11.666667,61.666667,0.5,2.666667,2.666667,10.14,55.71,0.57,2.29,2.29
14,CLE,10.14,55.71,0.57,2.29,2.29,11.666667,61.666667,0.5,2.666667,2.666667,...,10.142857,55.714286,0.571429,2.285714,2.285714,9.38,48.5,0.5,2.0,2.0
15,CAR,9.38,48.5,0.5,2.0,2.0,10.142857,55.714286,0.571429,2.285714,2.285714,...,9.375,48.5,0.5,2.0,2.0,8.67,48.33,0.44,2.0,2.44
16,MIN,8.67,48.33,0.44,2.0,2.44,9.375,48.5,0.5,2.0,2.0,...,8.666667,48.333333,0.444444,2.0,2.444444,8.1,44.8,0.4,1.8,2.2
3,WAS,8.1,44.8,0.4,1.8,2.2,8.666667,48.333333,0.444444,2.0,2.444444,...,0.0,0.0,0.0,0.0,0.0,4.5,27.5,0.0,0.5,2.5


In [122]:
for player in rb_stats.index.levels[0]:
    print('{}_reframed'.format(player))

Aaron Jones_reframed
Aaron Ripkowski_reframed
Adrian Peterson_reframed
Ahmad Bradshaw_reframed
Akeem Hunt_reframed
Alex Armah_reframed
Alex Collins_reframed
Alfred Blue_reframed
Alfred Morris_reframed
Alonzo Harris_reframed
Alvin Kamara_reframed
Ameer Abdullah_reframed
Andre Ellington_reframed
Andre Williams_reframed
Andy Janovich_reframed
Anthony Dixon_reframed
Anthony Sherman_reframed
Antone Smith_reframed
Antonio Andrews_reframed
Arian Foster_reframed
Austin Ekeler_reframed
Austin Johnson_reframed
Benjamin Cunningham_reframed
Bernard Pierce_reframed
Bilal Powell_reframed
Bishop Sankey_reframed
Bobby Rainey_reframed
Branden Oliver_reframed
Brandon Bolden_reframed
Brandon Burks_reframed
Brandon Wilds_reframed
Brian Hill_reframed
Bronson Hill_reframed
Bruce Miller_reframed
Bryce Brown_reframed
Byron Marshall_reframed
CJ Anderson_reframed
CJ Ham_reframed
CJ Prosise_reframed
CJ Spiller_reframed
Cameron Artis-Payne_reframed
Carlos Hyde_reframed
Cedric Peerman_reframed
Charcandrick West_re

In [142]:
rb_stats = pd.concat(reframed_dict)

In [146]:
rb_stats.columns

Index(['var1(t-1)', 'var2(t-1)', 'var3(t-1)', 'var4(t-1)', 'var5(t-1)',
       'var6(t-1)', 'var7(t-1)', 'var8(t-1)', 'var9(t-1)', 'var10(t-1)',
       'var11(t-1)', 'var12(t-1)', 'var13(t-1)', 'var14(t-1)', 'var15(t-1)',
       'var1(t)', 'var2(t)', 'var3(t)', 'var4(t)', 'var5(t)', 'var6(t)',
       'var7(t)', 'var8(t)', 'var9(t)', 'var10(t)', 'var11(t)', 'var12(t)',
       'var13(t)', 'var14(t)', 'var15(t)'],
      dtype='object')

In [164]:
qb_stats.to_csv('../input/qb/qb_stats.csv', index=True)

In [71]:
train  = qb
test = week1

In [122]:
qb_stats.columns

Index(['year', 'att_avg', 'comp%_avg', 'yds_avg', 'td_avg', 'rat_avg',
       'log_att_avg', 'log_comp_avg', 'att_ma', 'comp%_ma', 'yds_ma', 'td_ma',
       'rat_ma', 'log_att_ma', 'log_comp_ma', 'log_yds_ma', 'att_car',
       'comp%_car', 'yds_car', 'td_car', 'rat_car', 'log_att_car',
       'log_comp_car', 'log_yds_car'],
      dtype='object')

In [73]:
week1.columns

Index(['Player', 'Team', 'Week', 'Opp', 'Comp', 'Att', 'Pct', 'Yds', 'Yds/Att',
       'TD', 'Int', 'ru_att', 'ru_yds', 'yds/ru_att', 'ru_td', 'points',
       'year', 'def_ru_rk', 'def_pass_rk'],
      dtype='object')

In [74]:
y_train = train['points']
y_test = test['points']
X_train = train.drop('points',axis=1)
X_test = test.drop('points', axis=1)

In [75]:
X_train = X_train[X_train.columns[4:]]
X_test = X_test[X_test.columns[4:]]

In [348]:

X_train = X_train[X_train.columns[4:11]]
X_test = X_test[X_test.columns[4:11]]

In [76]:
X_train.shape

(2729, 14)

In [77]:
X_test.shape

(57, 14)

In [34]:
X_train.columns

Index(['Comp', 'Att', 'Pct', 'Yds', 'Yds/Att', 'TD', 'Int', 'ru_att', 'ru_yds',
       'yds/ru_att', 'ru_td', 'year', 'def_ru_rk', 'def_pass_rk'],
      dtype='object')

In [78]:
reframed = series_to_supervised(X_train.values.astype('float32'),1,1)

In [79]:
lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [80]:
coeff = pd.DataFrame(X_train.columns)
coeff['coefficients'] = lr.coef_
coeff



Unnamed: 0,0,coefficients
0,Comp,0.029938
1,Att,-0.076581
2,Pct,-0.013013
3,Yds,0.058688
4,Yds/Att,-0.179413
5,TD,4.002376
6,Int,-1.242807
7,ru_att,-0.086593
8,ru_yds,0.114284
9,yds/ru_att,-0.025317


In [81]:
yhat = lr.predict(X_test)
yhat.shape

(57,)

In [359]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

rmse_val = rmse(yhat, y_test)
print("RMSE error is: " + str(rmse_val))

RMSE error is: 1.68578446369


In [82]:
results = pd.DataFrame()
results['Player'] = week1['Player']
#results['score'] = y_test
results['Predicted'] = yhat
#results['Week'] = qb[qb['year']==2017]['Week']
#results.groupby('Player').mean()[['score','Predicted']]
results

Unnamed: 0,Player,Predicted
0,Aaron Rodgers,22.622505
1,Drew Brees,22.93539
2,Tom Brady,22.578469
3,Deshaun Watson,21.599166
4,Cam Newton,20.770403
5,Matthew Stafford,21.079952
6,Ben Roethlisberger,20.937474
7,Andrew Luck,20.630576
8,Philip Rivers,20.697301
9,Kirk Cousins,20.45094


In [323]:
results.shape

(300, 3)

In [84]:
results.to_csv('2018_week1_qb.csv', index= False)