## Quarterbacks

In [32]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import pandas as pd
pd.options.display.max_columns = None # See all columns
import warnings
warnings.filterwarnings('ignore')
from fantasy_football import Fantasy

# Import fantasy class which will be used for building datasets
football = Fantasy()

##### First, lets grab our QB data from our data-collection notebook

In [34]:
%store -r QB_data

##### Next, we pull more advanced data for rushing and receiving

In [35]:
# Rushing advanced
rushing_advanced = football.getAdvancedStats("rushing", "2020")

In [40]:
# Receiving advanced
passing_advanced = football.getAdvancedStats("passing", "2020") 

In [41]:
passing_advanced.head()

Unnamed: 0,Rk,Player,Age,IAY/PA,CAY/Cmp,CAY/PA,YAC/Cmp
0,1,D. Watson,25,8.9,7.5,5.2,5.2
1,2,P. Mahomes,25,8.4,6.3,4.2,5.8
2,3,T. Brady,43,9.1,7.1,4.6,4.5
3,4,M. Ryan,35,8.5,7.2,4.7,4.0
4,5,J. Allen,24,8.5,6.8,4.7,4.7


##### Lets merge our advanced data

In [42]:
QB_advanced = pd.merge(QB_data, rushing_advanced, on='Player', how='left')
QB_advanced = pd.merge(QB_advanced, passing_advanced, on='Player', how='left')

In [43]:
QB_advanced

Unnamed: 0,Player,Pos,Tm,GP,Comp,Inc,Passing_Yds,Passing_Td,Int,Pic6,Sks,Passing_1st,Att,Rushing_Yds,Rushing_Td,Rushing_1st,Total,Lost,PPG,Rank,YBC/Att,YAC/Att,Att/Br,Rk,Age,IAY/PA,CAY/Cmp,CAY/PA,YAC/Cmp
0,L. Jackson,QB,Bal,15,16.133333,8.933333,183.8,1.733333,0.6,0.066667,1.933333,9.2,10.6,67.0,0.466667,3.733333,0.666667,0.266667,21.081667,7.0,5.0,1.3,19.9,22,23,8.6,6.6,4.2,4.8
1,J. Allen,QB,Buf,16,24.75,11.0,284.0,2.3125,0.625,0.0,1.625,14.25,6.375,26.3125,0.5,2.375,0.5625,0.375,24.563529,31.0,2.8,1.3,25.5,5,24,8.5,6.8,4.7,4.7
2,R. Wilson,QB,Sea,16,24.0,10.875,263.25,2.5,0.8125,0.0625,2.9375,13.3125,5.1875,32.0625,0.125,1.5625,0.4375,0.25,17.772857,3.0,5.9,0.3,,9,32,8.6,6.2,4.3,4.8
3,K. Murray,QB,Ari,16,23.4375,11.4375,248.1875,1.625,0.75,0.0625,1.6875,12.8125,8.3125,51.1875,0.6875,3.25,0.5,0.25,22.177143,20.0,5.2,1.0,26.6,13,23,7.8,5.9,4.0,4.7
4,P. Mahomes,QB,Kc,15,26.0,13.2,316.0,2.533333,0.4,0.0,1.466667,15.866667,4.133333,20.533333,0.133333,1.4,0.333333,0.133333,22.038824,29.0,4.2,0.8,31.0,2,25,8.4,6.3,4.2,5.8
5,D. Prescott,QB,Dal,5,30.2,14.2,371.2,1.8,0.8,0.2,2.0,18.6,3.6,18.6,0.6,1.6,0.6,0.6,20.66,5.0,3.6,1.6,9.0,32,27,7.9,6.9,4.7,5.4
6,J. Herbert,QB,Lac,15,26.4,13.266667,289.066667,2.066667,0.666667,0.0,2.133333,14.4,3.666667,15.6,0.333333,1.333333,0.533333,0.066667,23.28,6.0,3.2,1.0,27.5,6,22,7.4,5.4,3.6,5.6
7,T. Brady,QB,Tb,16,25.0625,13.0625,289.5625,2.5,0.75,0.125,1.3125,14.5625,1.875,0.375,0.1875,0.375,0.25,0.0625,22.749412,11.0,-0.1,0.3,30.0,3,43,9.1,7.1,4.6,4.5
8,A. Rodgers,QB,Gb,16,23.25,9.625,268.6875,3.0,0.3125,0.0625,1.25,13.5,2.375,9.3125,0.1875,0.9375,0.25,0.125,21.01875,13.0,3.8,0.2,,7,37,7.9,5.5,3.9,6.0
9,R. Tannehill,QB,Ten,16,19.6875,10.375,238.6875,2.0625,0.4375,0.0,1.5,12.625,2.6875,16.625,0.4375,1.375,0.375,0.0625,16.609412,25.0,5.1,1.0,,15,32,8.4,7.3,4.8,4.8


##### Some players did not match, but we still want to include them! Lets fill in the resulting NaN columns with column averages.

First, lets convert the relevant columns to float types

In [44]:
cols_to_change = ['Att/Br']
QB_advanced[cols_to_change] = QB_advanced[cols_to_change].replace('', pd.np.nan)
for col in cols_to_change:
    QB_advanced[col] = QB_advanced[col].astype(float)


In [45]:
numeric_cols = QB_advanced.select_dtypes(include=['float64', 'int64']).columns
QB_advanced[numeric_cols] = QB_advanced[numeric_cols].fillna(QB_advanced[numeric_cols].mean())

##### Finally, lets make sure to drop any duplicates

In [46]:
QB_advanced = QB_advanced.drop_duplicates(subset='Player', keep='first')

##### Lets check out our merged dataset!

In [47]:
QB_advanced

Unnamed: 0,Player,Pos,Tm,GP,Comp,Inc,Passing_Yds,Passing_Td,Int,Pic6,Sks,Passing_1st,Att,Rushing_Yds,Rushing_Td,Rushing_1st,Total,Lost,PPG,Rank,YBC/Att,YAC/Att,Att/Br,Rk,Age,IAY/PA,CAY/Cmp,CAY/PA,YAC/Cmp
0,L. Jackson,QB,Bal,15,16.133333,8.933333,183.8,1.733333,0.6,0.066667,1.933333,9.2,10.6,67.0,0.466667,3.733333,0.666667,0.266667,21.081667,7.0,5.0,1.3,19.9,22,23,8.6,6.6,4.2,4.8
1,J. Allen,QB,Buf,16,24.75,11.0,284.0,2.3125,0.625,0.0,1.625,14.25,6.375,26.3125,0.5,2.375,0.5625,0.375,24.563529,31.0,2.8,1.3,25.5,5,24,8.5,6.8,4.7,4.7
2,R. Wilson,QB,Sea,16,24.0,10.875,263.25,2.5,0.8125,0.0625,2.9375,13.3125,5.1875,32.0625,0.125,1.5625,0.4375,0.25,17.772857,3.0,5.9,0.3,25.70625,9,32,8.6,6.2,4.3,4.8
3,K. Murray,QB,Ari,16,23.4375,11.4375,248.1875,1.625,0.75,0.0625,1.6875,12.8125,8.3125,51.1875,0.6875,3.25,0.5,0.25,22.177143,20.0,5.2,1.0,26.6,13,23,7.8,5.9,4.0,4.7
4,P. Mahomes,QB,Kc,15,26.0,13.2,316.0,2.533333,0.4,0.0,1.466667,15.866667,4.133333,20.533333,0.133333,1.4,0.333333,0.133333,22.038824,29.0,4.2,0.8,31.0,2,25,8.4,6.3,4.2,5.8
5,D. Prescott,QB,Dal,5,30.2,14.2,371.2,1.8,0.8,0.2,2.0,18.6,3.6,18.6,0.6,1.6,0.6,0.6,20.66,5.0,3.6,1.6,9.0,32,27,7.9,6.9,4.7,5.4
6,J. Herbert,QB,Lac,15,26.4,13.266667,289.066667,2.066667,0.666667,0.0,2.133333,14.4,3.666667,15.6,0.333333,1.333333,0.533333,0.066667,23.28,6.0,3.2,1.0,27.5,6,22,7.4,5.4,3.6,5.6
7,T. Brady,QB,Tb,16,25.0625,13.0625,289.5625,2.5,0.75,0.125,1.3125,14.5625,1.875,0.375,0.1875,0.375,0.25,0.0625,22.749412,11.0,-0.1,0.3,30.0,3,43,9.1,7.1,4.6,4.5
8,A. Rodgers,QB,Gb,16,23.25,9.625,268.6875,3.0,0.3125,0.0625,1.25,13.5,2.375,9.3125,0.1875,0.9375,0.25,0.125,21.01875,13.0,3.8,0.2,25.70625,7,37,7.9,5.5,3.9,6.0
9,R. Tannehill,QB,Ten,16,19.6875,10.375,238.6875,2.0625,0.4375,0.0,1.5,12.625,2.6875,16.625,0.4375,1.375,0.375,0.0625,16.609412,25.0,5.1,1.0,25.70625,15,32,8.4,7.3,4.8,4.8


### Next step, normalize values