In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

In [2]:
qb = pd.read_csv('../input/qb/qb_stats.csv')

In [3]:

qb.dtypes

Unnamed: 0      int64
Name           object
Week          float64
Opponent       object
att_avg       float64
comp%_avg     float64
yds_avg       float64
td_avg        float64
rat_avg       float64
def_rk_avg    float64
att_ma        float64
comp%_ma      float64
yds_ma        float64
td_ma         float64
rat_ma        float64
def_rk_ma     float64
att_car       float64
comp%_car     float64
yds_car       float64
td_car        float64
rat_car       float64
def_rk_car    float64
games         float64
year          float64
def_rk        float64
h/a_1         float64
surface_0     float64
points        float64
dtype: object

In [6]:
def conv_atts(df):
    df['h/a_1'] = df['h/a_1'].astype('category')
    df['surface_0'] = df['surface_0'].astype('category')

    return df

In [7]:
qb = conv_atts(qb)
qb.dtypes


Unnamed: 0       int64
Name            object
Week           float64
Opponent        object
att_avg        float64
comp%_avg      float64
yds_avg        float64
td_avg         float64
rat_avg        float64
def_rk_avg     float64
att_ma         float64
comp%_ma       float64
yds_ma         float64
td_ma          float64
rat_ma         float64
def_rk_ma      float64
att_car        float64
comp%_car      float64
yds_car        float64
td_car         float64
rat_car        float64
def_rk_car     float64
games          float64
year           float64
def_rk         float64
h/a_1         category
surface_0     category
points         float64
dtype: object

In [9]:
qb.isnull().sum()

Unnamed: 0    0
Name          0
Week          0
Opponent      0
att_avg       0
comp%_avg     0
yds_avg       0
td_avg        0
rat_avg       0
def_rk_avg    0
att_ma        0
comp%_ma      0
yds_ma        0
td_ma         0
rat_ma        0
def_rk_ma     0
att_car       0
comp%_car     0
yds_car       0
td_car        0
rat_car       0
def_rk_car    0
games         0
year          0
def_rk        0
h/a_1         0
surface_0     0
points        0
dtype: int64

In [11]:
qb[qb['Name']=='Patrick Mahomes']

Unnamed: 0.1,Unnamed: 0,Name,Week,Opponent,att_avg,comp%_avg,yds_avg,td_avg,rat_avg,def_rk_avg,...,yds_car,td_car,rat_car,def_rk_car,games,year,def_rk,h/a_1,surface_0,points
1472,1654,Patrick Mahomes,17.0,LAC,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,2018.0,24.0,0.0,1.0,28.34
1473,1655,Patrick Mahomes,1.0,PIT,0.0,0.0,0.0,0.0,0.0,0.0,...,216.79,1.43,102.24,17.64,3.0,2018.0,31.0,0.0,1.0,41.84
1474,1656,Patrick Mahomes,2.0,SF,27.0,55.6,256.0,4.0,127.47,24.0,...,146.5,0.0,81.33,12.5,4.0,2018.0,26.0,1.0,1.0,28.26
1475,1657,Patrick Mahomes,3.0,DEN,27.5,68.85,291.0,5.0,141.12,27.5,...,196.67,1.0,91.81,14.67,5.0,2018.0,19.0,0.0,1.0,25.86
1476,1658,Patrick Mahomes,4.0,DEN,31.0,66.97,298.67,4.33,132.56,27.0,...,177.25,0.75,81.84,14.0,1.0,2017.0,10.0,0.0,1.0,11.36


In [68]:
train = qb[qb['year']!=2018]
train = train[train['Week']!=4]
train['year'].unique()

array([2015, 2017, 2016], dtype=int64)

In [69]:
train.shape

(1554, 30)

In [70]:
test = qb[qb['year']==2018]
test=test[test['Week']==4]
test.shape

(29, 30)

In [71]:
test.columns

Index(['year', 'player', 'Week', 'Opponent', 'games_avg', 'att_avg',
       'comp%_avg', 'yds_avg', 'td_avg', 'rat_avg', 'def_rk_avg', 'games_ma',
       'att_ma', 'comp%_ma', 'yds_ma', 'td_ma', 'rat_ma', 'def_rk_ma',
       'games_car', 'att_car', 'comp%_car', 'yds_car', 'td_car', 'rat_car',
       'def_rk_car', 'def_rk', 'h/a_1', 'surface_0', 'salary', 'points'],
      dtype='object')

In [72]:
# Target variables 

y_train = train['points']
y_test = test['points']

# Predictors, all numeric variables minus target variable
X_train = train.iloc[:,4:-2]
X_test = test.iloc[:,4:-2]

In [73]:

X_test.columns

Index(['games_avg', 'att_avg', 'comp%_avg', 'yds_avg', 'td_avg', 'rat_avg',
       'def_rk_avg', 'games_ma', 'att_ma', 'comp%_ma', 'yds_ma', 'td_ma',
       'rat_ma', 'def_rk_ma', 'games_car', 'att_car', 'comp%_car', 'yds_car',
       'td_car', 'rat_car', 'def_rk_car', 'def_rk', 'h/a_1', 'surface_0'],
      dtype='object')

In [90]:
scaler = MinMaxScaler()
scaler.fit_transform(X_train)

array([[ 0.        ,  1.        ,  0.0070922 , ...,  0.        ,
         0.51998093,  0.14457831],
       [ 0.01960784,  0.8335    ,  0.02600473, ...,  0.19142857,
         0.5486886 ,  0.39759036],
       [ 0.21568627,  0.785     ,  0.23799054, ...,  0.21428571,
         0.62250835,  0.5       ],
       ..., 
       [ 0.46411765,  0.6547    ,  0.33174941, ...,  0.21428571,
         0.48526466,  0.81325301],
       [ 0.48039216,  0.6695    ,  0.39184397, ...,  0.22857143,
         0.45808298,  0.85542169],
       [ 0.58039216,  0.6414    ,  0.42411348, ...,  0.19142857,
         0.36185026,  0.8253012 ]])

In [74]:
X_test.shape

(29, 24)

In [75]:
X_train.shape

(1554, 24)

In [16]:
y_train.reset_index(inplace=True,drop=True)
X_train.reset_index(inplace=True, drop=True)

In [76]:
lr = LinearRegression()

In [77]:
lr.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [78]:
coeff = pd.DataFrame(X_train.columns)
coeff['coefficients'] = lr.coef_
coeff



Unnamed: 0,0,coefficients
0,games_avg,-1.557618
1,att_avg,0.06954
2,comp%_avg,0.094842
3,yds_avg,0.011553
4,td_avg,1.067263
5,rat_avg,-0.096874
6,def_rk_avg,0.040532
7,games_ma,-0.086614
8,att_ma,-0.216187
9,comp%_ma,0.118289


In [79]:
X_train.shape

(1554, 24)

In [80]:
yhat = lr.predict(X_test)

In [81]:
def rmse(predictions, targets):
    return np.sqrt(((predictions - targets) ** 2).mean())

rmse_val = rmse(yhat, y_test)
print("RMSE error is: " + str(rmse_val))

RMSE error is: 11.886112724


In [82]:
results = pd.DataFrame()
results['Player'] = test['player']
#results['score'] = y_test
results['Predicted'] = yhat
results['points'] = test['points']
#results['Week'] = qb[qb['year']==2017]['Week']
#results.groupby('Player').mean()[['score','Predicted']]
results

Unnamed: 0,Player,Predicted,points
49,Aaron Rodgers_reframed,20.573807,17.02
123,Andrew Luck_reframed,19.606521,39.66
171,Andy Dalton_reframed,24.064387,28.78
177,Baker Mayfield_reframed,14.830017,16.8
221,Ben Roethlisberger_reframed,20.109991,15.86
293,Blake Bortles_reframed,17.671645,28.32
497,Carson Wentz_reframed,18.251134,24.72
531,Case Keenum_reframed,22.272994,8.8
606,Dak Prescott_reframed,14.274891,18.4
666,Derek Carr_reframed,22.945548,36.58


In [83]:
results.

SyntaxError: invalid syntax (<ipython-input-83-905f1e55433b>, line 1)

In [24]:
results.to_csv('../output/qb_test.csv', index= False)