### QB using Fantasy Pros stats

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
pd.options.display.max_columns = None # See all columns
import warnings
warnings.filterwarnings('ignore')
from fantasy_football import Fantasy

# Import fantasy class which will be used for building datasets
football = Fantasy()

The bulk of the preprocessing logic can be found in the source code.

##### To make our dataset, lets pull quarterbacks from the last ~10 years

In [3]:
QB_2021 = football.prepare_QB("2021", True)
QB_2020 = football.prepare_QB("2020", False)
QB_2019 = football.prepare_QB("2019", False)
QB_2018 = football.prepare_QB("2018", False)
QB_2017 = football.prepare_QB("2017", False)
QB_2016 = football.prepare_QB("2016", False)
QB_2015 = football.prepare_QB("2015", False)
QB_2014 = football.prepare_QB("2014", False)
QB_2013 = football.prepare_QB("2013", False)

In [4]:
all_QB = [QB_2020, QB_2019, QB_2018, QB_2017, 
          QB_2016, QB_2015, QB_2014, QB_2013]

QB = pd.concat(all_QB, ignore_index=True)
QB

Unnamed: 0,CMP,Passing_att,PCT,Passing_Yds,Y/A,Passing_Td,INT,SACKS,Rushing_att,Rushing_Yds,Rushing_Td,FPTS/G,y,AIR/A,10+ YDS,20+ YDS,30+ YDS,40+ YDS,50+ YDS,PKT TIME,KNCK,HRRY,BLITZ,POOR,DROP,RZ ATT,RTG
0,0.929972,0.879834,0.967177,0.898236,0.852941,0.770833,0.500000,0.362167,0.564433,0.397222,0.625000,0.995745,24.6,0.901961,0.884352,0.867537,0.666667,0.571429,0.234375,0.833333,0.459906,0.654093,1.000000,0.710974,0.586504,0.971747,0.694444
1,0.880291,0.857852,0.923414,0.784349,0.735294,0.541667,0.600000,0.377852,0.764175,0.765741,0.859375,0.957447,23.3,0.764706,0.673501,0.615672,0.888889,1.000000,0.585938,0.666667,0.271226,0.557008,0.632777,0.710974,0.287591,0.712757,0.610501
2,0.873193,0.807609,1.000000,0.849541,0.897059,1.000000,0.250000,0.268061,0.152062,0.145370,0.234375,0.948936,22.7,0.745098,0.740590,0.797575,1.000000,1.000000,0.585938,0.666667,0.306604,0.319691,0.682853,0.549459,0.636322,0.948202,0.811966
3,0.977288,0.966504,0.903720,1.000000,0.882353,0.844444,0.320000,0.322433,0.333333,0.311605,0.166667,1.000000,22.0,0.823529,1.000000,1.000000,0.869136,0.609524,0.250000,0.833333,0.691824,0.677824,0.604957,0.869655,0.707729,0.912100,0.780627
4,0.896851,0.835871,0.989059,0.953690,1.000000,0.687500,0.350000,0.722909,0.487113,0.418519,0.234375,0.919149,20.4,1.000000,0.913104,0.965485,1.000000,0.785714,0.585938,0.833333,0.518868,0.794325,0.682853,0.634467,0.437047,0.983519,0.744811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225,0.701414,0.772520,0.892086,0.651717,0.769231,0.370248,0.646465,0.526646,0.563473,0.566129,0.727273,0.589844,14.7,0.811321,0.645740,0.684492,0.749311,0.636364,0.545455,0.000000,0.000000,0.000000,0.837084,0.000000,0.652038,0.542149,0.894349
226,0.722963,0.814163,0.871942,0.631197,0.703297,0.252121,0.553086,0.698851,0.259459,0.178769,0.000000,0.421875,12.0,0.622642,0.626607,0.580392,0.387879,0.333333,0.400000,0.000000,0.000000,0.000000,0.801980,0.000000,0.551724,0.659394,0.601201
227,0.675556,0.776935,0.854676,0.586059,0.692308,0.425175,0.410256,0.848806,0.299376,0.120943,0.000000,0.476562,11.2,0.830189,0.562953,0.506787,0.410256,0.538462,0.615385,0.000000,0.000000,0.000000,0.868241,0.000000,0.445623,0.525874,0.774775
228,0.640000,0.742944,0.846043,0.576082,0.703297,0.320000,0.533333,0.772414,0.763964,0.519242,0.533333,0.523438,10.8,0.716981,0.530942,0.658824,0.484848,0.400000,0.000000,0.000000,0.000000,0.000000,0.623762,0.000000,0.524138,0.305455,0.936937


In [5]:
# Dean's feature selection:

y = QB['y']
X = QB.drop(['y', 'FPTS/G'], axis=1)

info = football.getBestFeatures(X, y)
info

{'mean mse': 9.589303594717714,
 'model': LinearRegression(),
 'features': Index(['Passing_Yds', 'Passing_Td', '20+ YDS', '30+ YDS', '40+ YDS'], dtype='object'),
 'coefficients': array([-4.60484691,  6.68898043,  1.44877185,  5.04781461, -0.06748839])}

In [6]:
coefficients = info["coefficients"]
features = info['features']
test_y = QB_2021['y']
test_x = QB_2021[features]

In [7]:
values = test_x.mul(coefficients)
predictions = values.sum(axis = 1)

In [8]:
results = pd.DataFrame()
results['actual'] = test_y
results['predictions'] = predictions
results['difference'] = results['actual'] - results['predictions']

In [9]:
results.loc[:, 'difference'].mean()

12.797572610941982