In [67]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import np_utils
from keras.optimizers import Adam

In [68]:
avg_players = pd.read_csv('../data/avg_modeling_data.csv').drop(columns='Unnamed: 0')
final_year_players = pd.read_csv('../data/final_year_modeling_data.csv').drop(columns='Unnamed: 0')

### Preprocessing

In [69]:
avg_players.head()

Unnamed: 0,draft_overall,draft_round,draft_year,player,position,school,conference,wins,losses,rank,...,fumbles_forced,punt_ret,punt_ret_yds,punt_ret_yds_per_ret,punt_ret_td,kick_ret,kick_ret_yds,kick_ret_yds_per_ret,kick_ret_td,avg_grade
0,1,1,2006,Mario Williams,DE,North Carolina St.,ACC,7.0,5.0,37.0,...,0.0,1.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,66.8
1,2,1,2006,Reggie Bush,RB,USC,Pac-10,12.333333,0.666667,2.333333,...,0.0,14.666667,186.333333,9.2,1.0,22.333333,507.333333,23.5,0.333333,61.325
2,3,1,2006,Vince Young,QB,Texas,Big 12,11.333333,1.333333,6.333333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59.2
3,5,1,2006,A.J. Hawk,LB,Ohio St.,Big Ten,10.75,2.0,10.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.275
4,6,1,2006,Vernon Davis,TE,Maryland,ACC,6.666667,5.0,47.0,...,0.0,0.0,0.0,0.0,0.0,1.0,22.666667,7.566667,0.0,67.45


In [70]:
avg_players.columns

Index(['draft_overall', 'draft_round', 'draft_year', 'player', 'position',
       'school', 'conference', 'wins', 'losses', 'rank', 'nfl_team',
       'nfl_team_win_perc', 'bench_reps', 'broad_jump', 'cone', 'forty_yd',
       'height', 'shuttle', 'vertical', 'weight', 'games', 'pass_cmp',
       'pass_att', 'pass_cmp_pct', 'pass_yds', 'pass_yds_per_att',
       'adj_pass_yds_per_att', 'pass_td', 'pass_int', 'pass_rating',
       'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td', 'rec', 'rec_yds',
       'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended', 'fumbles_rec',
       'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced', 'punt_ret',
       'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td', 'kick_ret',
       'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_

In [71]:
avg_players = pd.get_dummies(avg_players, columns=['conference', 'nfl_team'])

In [72]:
len(avg_players['player'].unique())

2325

In [73]:
len(avg_players)

2370

In [74]:
avg_players['player'].value_counts()

Brandon Williams        9
D.J. Moore              4
Marcus Smith            4
C.J. Wilson             4
Aaron Brown             4
Mike Thomas             4
Will Davis              4
Marcus Thomas           4
Mike Williams           4
Josh Robinson           4
Michael Johnson         4
Kyle Williams           4
Chris Jones             4
Corey Wootton           2
Jared Abbrederis        1
Laquon Treadwell        1
Phillip Merling         1
Dalton Schultz          1
Derek Fine              1
Ben Heeney              1
C.J. Uzomah             1
Korey Hall              1
Carlos Henderson        1
Mike Daniels            1
Christian Kirk          1
Howard Wilson           1
Oscar Lua               1
Olivier Vernon          1
Arthur Lynch            1
Marcus Davenport        1
                       ..
Will Clarke             1
Michael Egnew           1
Will Dissly             1
Tom Savage              1
Carl Bradford           1
Dominique Barber        1
Adrian Arrington        1
Mike Gesicki

In [75]:
avg_players[avg_players['player']=='Brandon Williams']

Unnamed: 0,draft_overall,draft_round,draft_year,player,position,school,wins,losses,rank,nfl_team_win_perc,...,nfl_team_Raiders,nfl_team_Rams,nfl_team_Ravens,nfl_team_Redskins,nfl_team_Saints,nfl_team_Seahawks,nfl_team_Steelers,nfl_team_Texans,nfl_team_Titans,nfl_team_Vikings
63,84,3,2006,Brandon Williams,WR,Wisconsin,8.818182,4.181818,29.636364,0.25,...,0,0,0,0,0,0,0,0,0,0
64,84,3,2006,Brandon Williams,WR,Wisconsin,8.818182,4.181818,29.636364,0.25,...,0,0,0,0,0,0,0,0,0,0
65,84,3,2006,Brandon Williams,WR,Wisconsin,8.818182,4.181818,29.636364,0.25,...,0,0,0,0,0,0,0,0,0,0
642,120,4,2009,Brandon Williams,DE,Texas Tech,8.818182,4.181818,29.636364,0.563,...,0,0,0,0,0,0,0,0,0,0
643,120,4,2009,Brandon Williams,DE,Texas Tech,8.818182,4.181818,29.636364,0.563,...,0,0,0,0,0,0,0,0,0,0
644,120,4,2009,Brandon Williams,DE,Texas Tech,8.818182,4.181818,29.636364,0.563,...,0,0,0,0,0,0,0,0,0,0
1866,92,3,2016,Brandon Williams,CB,Texas A&M,8.818182,4.181818,29.636364,0.813,...,0,0,0,0,0,0,0,0,0,0
1867,92,3,2016,Brandon Williams,CB,Texas A&M,8.818182,4.181818,29.636364,0.813,...,0,0,0,0,0,0,0,0,0,0
1868,92,3,2016,Brandon Williams,CB,Texas A&M,8.818182,4.181818,29.636364,0.813,...,0,0,0,0,0,0,0,0,0,0


In [76]:
avg_players.drop_duplicates(subset='player', inplace=True)

In [77]:
final_year_players.head()

Unnamed: 0,draft_overall,draft_round,draft_year,player,position,school,conference,wins,losses,rank,...,fumbles_forced,punt_ret,punt_ret_yds,punt_ret_yds_per_ret,punt_ret_td,kick_ret,kick_ret_yds,kick_ret_yds_per_ret,kick_ret_td,avg_grade
0,1,1,2006,Mario Williams,DE,North Carolina St.,ACC,7.0,5.0,37.0,...,0.0,1.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,66.8
1,2,1,2006,Reggie Bush,RB,USC,Pac-10,12.0,1.0,2.0,...,0.0,18.0,179.0,9.9,1.0,28.0,493.0,17.6,0.0,61.325
2,3,1,2006,Vince Young,QB,Texas,Big 12,13.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,59.2
3,5,1,2006,A.J. Hawk,LB,Ohio St.,Big Ten,10.0,2.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.275
4,6,1,2006,Vernon Davis,TE,Maryland,ACC,5.0,6.0,65.0,...,0.0,0.0,0.0,0.0,0.0,3.0,68.0,22.7,0.0,67.45


In [78]:
len(final_year_players)

2340

In [79]:
len(final_year_players['player'].unique())

2325

In [80]:
final_year_players['player'].value_counts()

Brandon Williams        3
D.J. Moore              2
Marcus Smith            2
C.J. Wilson             2
Aaron Brown             2
Corey Wootton           2
Mike Thomas             2
Will Davis              2
Marcus Thomas           2
Mike Williams           2
Josh Robinson           2
Michael Johnson         2
Chris Jones             2
Kyle Williams           2
Arthur Lynch            1
Laquon Treadwell        1
Phillip Merling         1
Dalton Schultz          1
Derek Fine              1
Ben Heeney              1
C.J. Uzomah             1
Korey Hall              1
Carlos Henderson        1
Mike Daniels            1
Christian Kirk          1
Howard Wilson           1
Oscar Lua               1
Marcus Davenport        1
Ernie Sims              1
Olivier Vernon          1
                       ..
Will Clarke             1
Michael Egnew           1
Will Dissly             1
Tom Savage              1
Carl Bradford           1
Dominique Barber        1
Adrian Arrington        1
Mike Gesicki

In [81]:
final_year_players.drop_duplicates(subset='player', inplace=True)

In [82]:
final_year_players = pd.get_dummies(final_year_players, columns=['conference', 'nfl_team'])

# QuarterBacks

In [600]:
avg_qbs = avg_players[avg_players['position']=='QB']
final_year_qbs = final_year_players[final_year_players['position']=='QB']

## Average Stats

In [601]:
len(avg_qbs)

134

In [602]:
avg_qbs.isnull().sum()

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                     4
losses                   4
rank                     4
nfl_team_win_perc        0
bench_reps              28
broad_jump              14
cone                    16
forty_yd                15
height                  11
shuttle                 16
vertical                13
weight                  11
games                    3
pass_cmp                 3
pass_att                 3
pass_cmp_pct             3
pass_yds                 3
pass_yds_per_att         3
adj_pass_yds_per_att     3
pass_td                  3
pass_int                 3
pass_rating              3
rush_att                 3
rush_yds                 3
                        ..
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
nfl_team_Chargers        0
n

In [603]:
avg_qbs.dropna(subset=['games'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [604]:
avg_qbs.isnull().sum()[:50]

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                     1
losses                   1
rank                     1
nfl_team_win_perc        0
bench_reps              27
broad_jump              14
cone                    16
forty_yd                15
height                  11
shuttle                 16
vertical                13
weight                  11
games                    0
pass_cmp                 0
pass_att                 0
pass_cmp_pct             0
pass_yds                 0
pass_yds_per_att         0
adj_pass_yds_per_att     0
pass_td                  0
pass_int                 0
pass_rating              0
rush_att                 0
rush_yds                 0
rush_yds_per_att         0
rush_td                  0
rec                      0
rec_yds                  0
rec_yds_per_rec          0
rec_td                   0
scrim_att                0
s

In [605]:
avg_qbs[avg_qbs['wins'].isnull()==True]

Unnamed: 0,draft_overall,draft_round,draft_year,player,position,school,wins,losses,rank,nfl_team_win_perc,...,nfl_team_Rams,nfl_team_Ravens,nfl_team_Redskins,nfl_team_Saints,nfl_team_Seahawks,nfl_team_Steelers,nfl_team_Texans,nfl_team_Titans,nfl_team_Vikings,defense
1792,2,1,2016,Carson Wentz,QB,North Dakota St.,,,,0.438,...,0,0,0,0,0,0,0,0,0,0


In [606]:
avg_qbs.dropna(subset=['wins'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [607]:
len(avg_qbs)

130

In [608]:
avg_qbs.drop(columns='bench_reps', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [609]:
avg_qbs[['player', 'draft_year', 'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight']]

Unnamed: 0,player,draft_year,broad_jump,cone,forty_yd,height,shuttle,vertical,weight
2,Vince Young,2006,0.0,0.00,4.48,77.0,0.00,0.0,229.0
8,Matt Leinart,2006,0.0,0.00,4.90,77.0,0.00,0.0,223.0
9,Jay Cutler,2006,0.0,7.10,4.77,75.0,4.26,0.0,226.0
38,Kellen Clemens,2006,0.0,0.00,4.89,74.0,0.00,0.0,224.0
48,Tarvaris Jackson,2006,107.0,7.40,4.69,74.0,4.38,31.5,226.0
60,Charlie Whitehurst,2006,112.0,7.00,4.75,77.0,4.11,33.0,223.0
66,Brodie Croyle,2006,104.0,7.34,4.88,74.0,4.25,30.5,205.0
79,Brad Smith,2006,128.0,7.00,4.46,74.0,4.33,39.5,213.0
124,Omar Jacobs,2006,,,,,,,
143,Reggie McNeal,2006,121.0,6.96,4.40,74.0,4.25,32.0,198.0


In [610]:
avg_qbs.dropna(subset= ['broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight'], how='all').isnull().sum()

draft_overall           0
draft_round             0
draft_year              0
player                  0
position                0
school                  0
wins                    0
losses                  0
rank                    0
nfl_team_win_perc       0
broad_jump              3
cone                    5
forty_yd                4
height                  0
shuttle                 5
vertical                2
weight                  0
games                   0
pass_cmp                0
pass_att                0
pass_cmp_pct            0
pass_yds                0
pass_yds_per_att        0
adj_pass_yds_per_att    0
pass_td                 0
pass_int                0
pass_rating             0
rush_att                0
rush_yds                0
rush_yds_per_att        0
                       ..
nfl_team_Bills          0
nfl_team_Broncos        0
nfl_team_Browns         0
nfl_team_Buccaneers     0
nfl_team_Cardinals      0
nfl_team_Chargers       0
nfl_team_Chiefs         0
nfl_team_Col

In [611]:
avg_qbs.dropna(subset= ['broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight'], how='all', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [612]:
avg_qbs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [613]:
avg_qbs.isnull().sum().sum()

0

In [614]:
avg_qbs.columns[50:]

Index(['fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced', 'punt_ret',
       'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td', 'kick_ret',
       'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td', 'avg_grade',
       'conference_ACC', 'conference_American', 'conference_Big 12',
       'conference_Big East', 'conference_Big Ten', 'conference_CUSA',
       'conference_Ind', 'conference_MAC', 'conference_MVC', 'conference_MWC',
       'conference_Pac-10', 'conference_Pac-12', 'conference_SEC',
       'conference_Sun Belt', 'conference_WAC', 'nfl_team_49ers',
       'nfl_team_Bears', 'nfl_team_Bengals', 'nfl_team_Bills',
       'nfl_team_Broncos', 'nfl_team_Browns', 'nfl_team_Buccaneers',
       'nfl_team_Cardinals', 'nfl_team_Chargers', 'nfl_team_Chiefs',
       'nfl_team_Colts', 'nfl_team_Cowboys', 'nfl_team_Dolphins',
       'nfl_team_Eagles', 'nfl_team_Falcons', 'nfl_team_Giants',
       'nfl_team_Jaguars', 'nfl_team_Jets', 'nfl_team_Lions',
       'nfl_team_Packers', 'nfl

In [615]:
X_train_avg_qbs = avg_qbs[avg_qbs['draft_year']<2015]
y_train_avg_qbs = avg_qbs[avg_qbs['draft_year']<2015]['avg_grade']

X_test_avg_qbs = avg_qbs[avg_qbs['draft_year']>=2015]
y_test_avg_qbs = avg_qbs[avg_qbs['draft_year']>=2015]['avg_grade']

In [623]:
features = [
#     'draft_overall', 'draft_round', 'draft_year', 
            'wins', 'losses', 
            'rank', 'nfl_team_win_perc', 'broad_jump',
           'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight', 'games',
       'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds', 'pass_yds_per_att',
       'adj_pass_yds_per_att', 'pass_td', 'pass_int', 'pass_rating',
       'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
           'conference_ACC', 'conference_American', 'conference_Big 12',
       'conference_Big East', 'conference_Big Ten', 'conference_CUSA',
       'conference_Ind', 'conference_MAC', 'conference_MVC', 'conference_MWC',
       'conference_Pac-10', 'conference_Pac-12', 'conference_SEC',
       'conference_Sun Belt', 'conference_WAC', 'nfl_team_49ers',
       'nfl_team_Bears', 'nfl_team_Bengals', 'nfl_team_Bills',
       'nfl_team_Broncos', 'nfl_team_Browns', 'nfl_team_Buccaneers',
       'nfl_team_Cardinals', 'nfl_team_Chargers', 'nfl_team_Chiefs',
       'nfl_team_Colts', 'nfl_team_Cowboys', 'nfl_team_Dolphins',
       'nfl_team_Eagles', 'nfl_team_Falcons', 'nfl_team_Giants',
       'nfl_team_Jaguars', 'nfl_team_Jets', 'nfl_team_Lions',
       'nfl_team_Packers', 'nfl_team_Panthers', 'nfl_team_Patriots',
       'nfl_team_Raiders', 'nfl_team_Rams', 'nfl_team_Ravens',
       'nfl_team_Redskins', 'nfl_team_Saints', 'nfl_team_Seahawks',
       'nfl_team_Steelers', 'nfl_team_Texans', 'nfl_team_Titans',
       'nfl_team_Vikings']

#### Linear Regression

In [624]:
lr = LinearRegression()
lr.fit(X_train_avg_qbs[features], y_train_avg_qbs)
lr_avg_qbs_preds = lr.predict(X_test_avg_qbs[features])
r2_score(y_test_avg_qbs, lr_avg_qbs_preds)

-3.15197084660329

In [625]:
mean_squared_error(y_test_avg_qbs, lr_avg_qbs_preds)

4896.953557864026

#### Random Forest

In [626]:
rf = RandomForestRegressor()
rf.fit(X_train_avg_qbs[features], y_train_avg_qbs)
rf_avg_qbs_preds = rf.predict(X_test_avg_qbs[features])
r2_score(y_test_avg_qbs, rf_avg_qbs_preds)

-0.1847084918181141

In [627]:
mean_squared_error(y_test_avg_qbs, rf_avg_qbs_preds)

1397.2791906249995

#### Neural Net

In [664]:
np.random.seed(42)
qb_model = Sequential()
qb_model.add(Dense(X_train_avg_qbs[features].shape[1], input_shape=(X_train_avg_qbs[features].shape[1],), activation='relu'))
qb_model.add(Dropout(0.5))
qb_model.add(Dense(12, activation='relu'))
qb_model.add(Dense(8, activation='relu'))
qb_model.add(Dense(1))

adam = Adam(lr=0.01)
qb_model.compile(loss='mean_squared_error', optimizer=adam)

In [665]:
qb_model.fit(X_train_avg_qbs[features], y_train_avg_qbs, validation_data=(X_test_avg_qbs[features], y_test_avg_qbs), epochs=60, batch_size=32)

Train on 84 samples, validate on 30 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x1ef33f9d470>

## Final Year Stats

In [666]:
final_year_qbs.isnull().sum()

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                     4
losses                   4
rank                     4
nfl_team_win_perc        0
bench_reps              28
broad_jump              14
cone                    16
forty_yd                15
height                  11
shuttle                 16
vertical                13
weight                  11
games                    4
pass_cmp                 3
pass_att                 3
pass_cmp_pct             3
pass_yds                 3
pass_yds_per_att         3
adj_pass_yds_per_att     3
pass_td                  3
pass_int                 3
pass_rating              3
rush_att                 3
rush_yds                 3
                        ..
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
nfl_team_Chargers        0
n

In [667]:
final_year_qbs.dropna(subset=['games'], inplace=True)
final_year_qbs.dropna(subset=['wins'], inplace=True)
final_year_qbs.drop(columns='bench_reps', inplace=True)
final_year_qbs.dropna(subset= ['broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight'], how='all', inplace=True)
final_year_qbs.dropna(inplace=True)
final_year_qbs.isnull().sum().sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pand

0

In [668]:
X_train_final_year_qbs = final_year_qbs[final_year_qbs['draft_year']<2015]
y_train_final_year_qbs = final_year_qbs[final_year_qbs['draft_year']<2015]['avg_grade']

X_test_final_year_qbs = final_year_qbs[final_year_qbs['draft_year']>=2015]
y_test_final_year_qbs = final_year_qbs[final_year_qbs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [669]:
lr = LinearRegression()
lr.fit(X_train_final_year_qbs[features], y_train_final_year_qbs)
lr_final_year_qbs_preds = lr.predict(X_test_final_year_qbs[features])
r2_score(y_test_final_year_qbs, lr_final_year_qbs_preds)

-0.6537896554744433

In [670]:
mean_squared_error(y_test_final_year_qbs, lr_final_year_qbs_preds)

1821.135413967135

#### Random Forest

In [679]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train_final_year_qbs[features], y_train_final_year_qbs)
rf_final_year_qbs_preds = rf_reg.predict(X_test_final_year_qbs[features])

r2_score(y_test_final_year_qbs, rf_final_year_qbs_preds)

0.018063295850573757

In [681]:
mean_squared_error(y_test_final_year_qbs, rf_final_year_qbs_preds)

1081.298156800766

#### Neural Net

In [688]:
np.random.seed(42)
qb_model = Sequential()
qb_model.add(Dense(X_train_final_year_qbs[features].shape[1], input_shape=(X_train_final_year_qbs[features].shape[1],), activation='relu'))
qb_model.add(Dropout(0.5))
qb_model.add(Dense(12, activation='relu'))
qb_model.add(Dense(6, activation='relu'))
qb_model.add(Dense(1))

adam = Adam(lr=0.01)
qb_model.compile(loss='mean_squared_error', optimizer=adam)

In [689]:
qb_model.fit(X_train_final_year_qbs[features], y_train_final_year_qbs, validation_data=(X_test_final_year_qbs[features], y_test_final_year_qbs), epochs=100, batch_size=32)

Train on 84 samples, validate on 29 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1ef37289c88>

In [698]:
qb_model.predict(final_year_qbs[final_year_qbs['player']=='Dak Prescott'][features])

array([[34.112915]], dtype=float32)

In [699]:
final_year_qbs[final_year_qbs['player']=='Dak Prescott']['avg_grade']

1873    75.725
Name: avg_grade, dtype: float64

In [694]:
final_year_qbs['player']

2                Vince Young
8               Matt Leinart
9                 Jay Cutler
38            Kellen Clemens
48          Tarvaris Jackson
60        Charlie Whitehurst
64             Brodie Croyle
77                Brad Smith
140            Reggie McNeal
141         Bruce Gradkowski
176         JaMarcus Russell
195              Brady Quinn
206               Kevin Kolb
209                John Beck
211             Drew Stanton
249            Trent Edwards
257          Isaiah Stanback
306               Troy Smith
322            Jordan Palmer
359                Matt Ryan
369               Joe Flacco
402              Brian Brohm
403               Chad Henne
433          Kevin O'Connell
477             Dennis Dixon
481               Erik Ainge
496             Colt Brennan
505            Andre Woodson
510               Matt Flynn
520               Alex Brink
                ...         
1575               Tajh Boyd
1598          Jameis Winston
1599          Marcus Mariota
1652         G

# Running Back

In [337]:
avg_rbs = avg_players[(avg_players['position']=='RB') | (avg_players['position']=='FB')]
fy_rbs = final_year_players[(final_year_players['position']=='RB') | (final_year_players['position']=='FB')]

## Average Statistics

In [338]:
len(avg_rbs)

269

In [339]:
avg_rbs.isnull().sum()[:50]

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                     9
losses                   9
rank                     9
nfl_team_win_perc        0
bench_reps              38
broad_jump              40
cone                    50
forty_yd                39
height                  34
shuttle                 48
vertical                37
weight                  34
games                    9
pass_cmp                 9
pass_att                 9
pass_cmp_pct             9
pass_yds                 9
pass_yds_per_att         9
adj_pass_yds_per_att     9
pass_td                  9
pass_int                 9
pass_rating              9
rush_att                 9
rush_yds                 9
rush_yds_per_att         9
rush_td                  9
rec                      9
rec_yds                  9
rec_yds_per_rec          9
rec_td                   9
scrim_att                9
s

In [340]:
avg_rbs.isnull().sum()[50:]

fumbles_rec             9
fumbles_rec_yds         9
fumbles_rec_td          9
fumbles_forced          9
punt_ret                9
punt_ret_yds            9
punt_ret_yds_per_ret    9
punt_ret_td             9
kick_ret                9
kick_ret_yds            9
kick_ret_yds_per_ret    9
kick_ret_td             9
avg_grade               0
conference_ACC          0
conference_American     0
conference_Big 12       0
conference_Big East     0
conference_Big Ten      0
conference_CUSA         0
conference_Ind          0
conference_MAC          0
conference_MVC          0
conference_MWC          0
conference_Pac-10       0
conference_Pac-12       0
conference_SEC          0
conference_Sun Belt     0
conference_WAC          0
nfl_team_49ers          0
nfl_team_Bears          0
nfl_team_Bengals        0
nfl_team_Bills          0
nfl_team_Broncos        0
nfl_team_Browns         0
nfl_team_Buccaneers     0
nfl_team_Cardinals      0
nfl_team_Chargers       0
nfl_team_Chiefs         0
nfl_team_Col

In [341]:
avg_rbs.columns[:50]

Index(['draft_overall', 'draft_round', 'draft_year', 'player', 'position',
       'school', 'wins', 'losses', 'rank', 'nfl_team_win_perc', 'bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended'],
      dtype='object')

In [342]:
avg_rbs.columns[50:]

Index(['fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td',
       'avg_grade', 'conference_ACC', 'conference_American',
       'conference_Big 12', 'conference_Big East', 'conference_Big Ten',
       'conference_CUSA', 'conference_Ind', 'conference_MAC', 'conference_MVC',
       'conference_MWC', 'conference_Pac-10', 'conference_Pac-12',
       'conference_SEC', 'conference_Sun Belt', 'conference_WAC',
       'nfl_team_49ers', 'nfl_team_Bears', 'nfl_team_Bengals',
       'nfl_team_Bills', 'nfl_team_Broncos', 'nfl_team_Browns',
       'nfl_team_Buccaneers', 'nfl_team_Cardinals', 'nfl_team_Chargers',
       'nfl_team_Chiefs', 'nfl_team_Colts', 'nfl_team_Cowboys',
       'nfl_team_Dolphins', 'nfl_team_Eagles', 'nfl_team_Falcons',
       'nfl_team_Giants', 'nfl_team_Jaguars', 'nfl_team_Jets',
       'nfl_team_Lions', 'nfl_team

In [343]:
avg_rbs.dropna(subset=['wins', 'losses', 'rank','pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended',
        'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

avg_rbs.dropna(subset=['bench_reps', 'broad_jump', 'cone', 'forty_yd',
                       'height', 'shuttle', 'vertical','weight'], how='all', inplace=True)

avg_rbs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [344]:
len(avg_rbs)

205

In [371]:
features = [
#     'draft_overall', 'draft_round', 'draft_year', 
            'wins', 'losses', 'rank', 'nfl_team_win_perc', 
    'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight', 'games',
       'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 
#     'scrim_att', 'scrim_yds', 'scrim_yds_per_att', 'scrim_td',
    'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td',
           'conference_ACC', 'conference_American', 'conference_Big 12',
       'conference_Big East', 'conference_Big Ten', 'conference_CUSA',
       'conference_Ind', 'conference_MAC', 'conference_MVC', 'conference_MWC',
       'conference_Pac-10', 'conference_Pac-12', 'conference_SEC',
       'conference_Sun Belt', 'conference_WAC', 'nfl_team_49ers',
       'nfl_team_Bears', 'nfl_team_Bengals', 'nfl_team_Bills',
       'nfl_team_Broncos', 'nfl_team_Browns', 'nfl_team_Buccaneers',
       'nfl_team_Cardinals', 'nfl_team_Chargers', 'nfl_team_Chiefs',
       'nfl_team_Colts', 'nfl_team_Cowboys', 'nfl_team_Dolphins',
       'nfl_team_Eagles', 'nfl_team_Falcons', 'nfl_team_Giants',
       'nfl_team_Jaguars', 'nfl_team_Jets', 'nfl_team_Lions',
       'nfl_team_Packers', 'nfl_team_Panthers', 'nfl_team_Patriots',
       'nfl_team_Raiders', 'nfl_team_Rams', 'nfl_team_Ravens',
       'nfl_team_Redskins', 'nfl_team_Saints', 'nfl_team_Seahawks',
       'nfl_team_Steelers', 'nfl_team_Texans', 'nfl_team_Titans',
       'nfl_team_Vikings']

In [354]:
X_train_avg_rbs = avg_rbs[avg_rbs['draft_year']<2015]
y_train_avg_rbs = avg_rbs[avg_rbs['draft_year']<2015]['avg_grade']
X_test_avg_rbs = avg_rbs[avg_rbs['draft_year']>=2015]
y_test_avg_rbs = avg_rbs[avg_rbs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [355]:
lr = LinearRegression()
lr.fit(X_train_avg_rbs[features], y_train_avg_rbs)
lr_avg_rbs_preds = lr.predict(X_test_avg_rbs[features])

r2_score(y_test_avg_rbs, lr_avg_rbs_preds)

0.006493266700320133

In [356]:
mean_squared_error(y_test_avg_rbs, lr_avg_rbs_preds)

717.2949569511742

#### Random Forest

In [360]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train_avg_rbs[features], y_train_avg_rbs)
rf_avg_rbs_preds = rf_reg.predict(X_test_avg_rbs[features])

r2_score(y_test_avg_rbs, rf_avg_rbs_preds)

0.10731353539001831

In [361]:
mean_squared_error(y_test_avg_rbs, rf_avg_rbs_preds)

644.5044384114584

#### Neural Net

In [362]:
np.random.seed(42)
rb_model = Sequential()
rb_model.add(Dense(X_train_avg_rbs[features].shape[1], input_shape=(X_train_avg_rbs[features].shape[1],), activation='relu'))
rb_model.add(Dropout(0.5))
rb_model.add(Dense(12, activation='relu'))
# rb_model.add(Dense(10, activation='relu'))
rb_model.add(Dense(1))

adam = Adam(lr=0.01)
rb_model.compile(loss='mean_squared_error', optimizer=adam)

In [363]:
rb_model.fit(X_train_avg_rbs[features], y_train_avg_rbs, validation_data=(X_test_avg_rbs[features], y_test_avg_rbs), epochs=100, batch_size=32)

Train on 157 samples, validate on 48 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1ef7de4d7f0>

## Final Year Stats

In [365]:
fy_rbs.isnull().sum()

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                     9
losses                   9
rank                     9
nfl_team_win_perc        0
bench_reps              38
broad_jump              40
cone                    50
forty_yd                39
height                  34
shuttle                 48
vertical                37
weight                  34
games                   11
pass_cmp                 9
pass_att                 9
pass_cmp_pct             9
pass_yds                 9
pass_yds_per_att         9
adj_pass_yds_per_att     9
pass_td                  9
pass_int                 9
pass_rating              9
rush_att                 9
rush_yds                 9
                        ..
nfl_team_Bengals         0
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
n

In [366]:
fy_rbs.dropna(subset=['wins', 'losses', 'rank','pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended',
        'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

fy_rbs.dropna(subset=['bench_reps', 'broad_jump', 'cone', 'forty_yd',
                       'height', 'shuttle', 'vertical','weight'], how='all', inplace=True)

fy_rbs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [367]:
fy_rbs.isnull().sum().sum()

0

In [372]:
X_train_fy_rbs = fy_rbs[fy_rbs['draft_year']<2015]
y_train_fy_rbs = fy_rbs[fy_rbs['draft_year']<2015]['avg_grade']
X_test_fy_rbs = fy_rbs[fy_rbs['draft_year']>=2015]
y_test_fy_rbs = fy_rbs[fy_rbs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [373]:
lr = LinearRegression()
lr.fit(X_train_fy_rbs[features], y_train_fy_rbs)
lr_fy_rbs_preds = lr.predict(X_test_fy_rbs[features])

r2_score(y_test_fy_rbs, lr_fy_rbs_preds)

-0.47333388764098006

In [374]:
mean_squared_error(y_test_fy_rbs, lr_fy_rbs_preds)

1085.3671247246527

#### Random Forest

In [380]:
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train_fy_rbs[features], y_train_fy_rbs)
rf_fy_rbs_preds = rf_reg.predict(X_test_fy_rbs[features])

r2_score(y_test_fy_rbs, rf_fy_rbs_preds)

0.05639500191769775

In [382]:
mean_squared_error(y_test_fy_rbs, rf_fy_rbs_preds)

695.1294966032609

#### Neural Net

In [397]:
np.random.seed(42)
rb_model = Sequential()
rb_model.add(Dense(X_train_fy_rbs[features].shape[1], input_shape=(X_train_fy_rbs[features].shape[1],), activation='relu'))
rb_model.add(Dropout(0.5))
rb_model.add(Dense(12, activation='relu'))
rb_model.add(Dense(6, activation='relu'))
rb_model.add(Dense(1))

adam = Adam(lr=0.01)
rb_model.compile(loss='mean_squared_error', optimizer=adam)

In [398]:
rb_model.fit(X_train_fy_rbs[features], y_train_fy_rbs, validation_data=(X_test_fy_rbs[features], y_test_fy_rbs), epochs=75, batch_size=32)

Train on 157 samples, validate on 46 samples
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.callbacks.History at 0x1ef04338198>

# Receiver

In [316]:
avg_recs = avg_players[(avg_players['position']=='WR') | (avg_players['position']=='TE')]
fy_recs = final_year_players[(final_year_players['position']=='WR') | (final_year_players['position']=='TE')]

## Average Statistics

In [321]:
avg_recs.isnull().sum()[:50]

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                    22
losses                  22
rank                    22
nfl_team_win_perc        0
bench_reps              92
broad_jump              86
cone                    97
forty_yd                86
height                  75
shuttle                 95
vertical                86
weight                  75
games                   22
pass_cmp                22
pass_att                22
pass_cmp_pct            22
pass_yds                22
pass_yds_per_att        22
adj_pass_yds_per_att    22
pass_td                 22
pass_int                22
pass_rating             22
rush_att                22
rush_yds                22
rush_yds_per_att        22
rush_td                 22
rec                     22
rec_yds                 22
rec_yds_per_rec         22
rec_td                  22
scrim_att               22
s

In [322]:
avg_recs.isnull().sum()[50:]

fumbles_rec             22
fumbles_rec_yds         22
fumbles_rec_td          22
fumbles_forced          22
punt_ret                22
punt_ret_yds            22
punt_ret_yds_per_ret    22
punt_ret_td             22
kick_ret                22
kick_ret_yds            22
kick_ret_yds_per_ret    22
kick_ret_td             22
avg_grade                0
conference_ACC           0
conference_American      0
conference_Big 12        0
conference_Big East      0
conference_Big Ten       0
conference_CUSA          0
conference_Ind           0
conference_MAC           0
conference_MVC           0
conference_MWC           0
conference_Pac-10        0
conference_Pac-12        0
conference_SEC           0
conference_Sun Belt      0
conference_WAC           0
nfl_team_49ers           0
nfl_team_Bears           0
nfl_team_Bengals         0
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
nfl_team_Chargers        0
n

In [318]:
len(avg_recs)

530

In [319]:
avg_recs.columns[:50]

Index(['draft_overall', 'draft_round', 'draft_year', 'player', 'position',
       'school', 'wins', 'losses', 'rank', 'nfl_team_win_perc', 'bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended'],
      dtype='object')

In [320]:
avg_recs.columns[50:]

Index(['fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td',
       'avg_grade', 'conference_ACC', 'conference_American',
       'conference_Big 12', 'conference_Big East', 'conference_Big Ten',
       'conference_CUSA', 'conference_Ind', 'conference_MAC', 'conference_MVC',
       'conference_MWC', 'conference_Pac-10', 'conference_Pac-12',
       'conference_SEC', 'conference_Sun Belt', 'conference_WAC',
       'nfl_team_49ers', 'nfl_team_Bears', 'nfl_team_Bengals',
       'nfl_team_Bills', 'nfl_team_Broncos', 'nfl_team_Browns',
       'nfl_team_Buccaneers', 'nfl_team_Cardinals', 'nfl_team_Chargers',
       'nfl_team_Chiefs', 'nfl_team_Colts', 'nfl_team_Cowboys',
       'nfl_team_Dolphins', 'nfl_team_Eagles', 'nfl_team_Falcons',
       'nfl_team_Giants', 'nfl_team_Jaguars', 'nfl_team_Jets',
       'nfl_team_Lions', 'nfl_team

In [325]:
avg_recs.dropna(subset=['wins', 'losses', 'rank', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

avg_recs.dropna(subset=['bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight'], how='all', inplace=True)

avg_recs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [330]:
avg_recs.isnull().sum().sum()

0

In [480]:
features = [
#     'draft_overall', 'draft_round', 'draft_year', 
            'wins', 'losses', 'rank', 'nfl_team_win_perc', 
    'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight', 'games',
       'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 
#     'scrim_att', 'scrim_yds', 'scrim_yds_per_att', 'scrim_td',
    'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td',
           'conference_ACC', 'conference_American', 'conference_Big 12',
       'conference_Big East', 'conference_Big Ten', 'conference_CUSA',
       'conference_Ind', 'conference_MAC', 'conference_MVC', 'conference_MWC',
       'conference_Pac-10', 'conference_Pac-12', 'conference_SEC',
       'conference_Sun Belt', 'conference_WAC', 'nfl_team_49ers',
       'nfl_team_Bears', 'nfl_team_Bengals', 'nfl_team_Bills',
       'nfl_team_Broncos', 'nfl_team_Browns', 'nfl_team_Buccaneers',
       'nfl_team_Cardinals', 'nfl_team_Chargers', 'nfl_team_Chiefs',
       'nfl_team_Colts', 'nfl_team_Cowboys', 'nfl_team_Dolphins',
       'nfl_team_Eagles', 'nfl_team_Falcons', 'nfl_team_Giants',
       'nfl_team_Jaguars', 'nfl_team_Jets', 'nfl_team_Lions',
       'nfl_team_Packers', 'nfl_team_Panthers', 'nfl_team_Patriots',
       'nfl_team_Raiders', 'nfl_team_Rams', 'nfl_team_Ravens',
       'nfl_team_Redskins', 'nfl_team_Saints', 'nfl_team_Seahawks',
       'nfl_team_Steelers', 'nfl_team_Texans', 'nfl_team_Titans',
       'nfl_team_Vikings']

In [481]:
X_train_avg_recs = avg_recs[avg_recs['draft_year']<2015]
y_train_avg_recs = avg_recs[avg_recs['draft_year']<2015]['avg_grade']
X_test_avg_recs = avg_recs[avg_recs['draft_year']>=2015]
y_test_avg_recs = avg_recs[avg_recs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [482]:
lr = LinearRegression()
lr.fit(X_train_avg_recs[features], y_train_avg_recs)
lr_avg_recs_preds = lr.predict(X_test_avg_recs[features])

r2_score(y_test_avg_recs, lr_avg_recs_preds)

-0.4734360540290099

In [489]:
mean_squared_error(y_test_avg_recs, lr_avg_recs_preds)

1039.1182544364533

#### Random Forest

In [493]:
rf = RandomForestRegressor()
rf.fit(X_train_avg_recs[features], y_train_avg_recs)
rf_avg_recs_preds = rf.predict(X_test_avg_recs[features])

r2_score(y_test_avg_recs, rf_avg_recs_preds)

-0.07230940968804167

In [494]:
mean_squared_error(y_test_avg_recs, rf_avg_recs_preds)

756.2298200617284

#### Neural Net

In [495]:
np.random.seed(42)
rec_model = Sequential()
rec_model.add(Dense(X_train_avg_recs[features].shape[1], input_shape=(X_train_avg_recs[features].shape[1],), activation='relu'))
rec_model.add(Dropout(0.5))
rec_model.add(Dense(12, activation='relu'))
rec_model.add(Dense(6, activation='relu'))
rec_model.add(Dense(1))

adam = Adam(lr=0.01)
rec_model.compile(loss='mean_squared_error', optimizer=adam)

In [496]:
rec_model.fit(X_train_avg_recs[features], y_train_avg_recs, validation_data=(X_test_avg_recs[features], y_test_avg_recs), epochs=50, batch_size=32)

Train on 309 samples, validate on 99 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1ef14ed4358>

## Final Year Stats

In [497]:
fy_recs.isnull().sum()

draft_overall            0
draft_round              0
draft_year               0
player                   0
position                 0
school                   0
wins                    23
losses                  23
rank                    23
nfl_team_win_perc        0
bench_reps              92
broad_jump              86
cone                    97
forty_yd                86
height                  75
shuttle                 95
vertical                86
weight                  75
games                   25
pass_cmp                22
pass_att                22
pass_cmp_pct            22
pass_yds                22
pass_yds_per_att        22
adj_pass_yds_per_att    22
pass_td                 22
pass_int                22
pass_rating             22
rush_att                22
rush_yds                22
                        ..
nfl_team_Bengals         0
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
n

In [498]:
fy_recs.dropna(subset=['wins', 'losses', 'rank', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

fy_recs.dropna(subset=['bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight'], how='all', inplace=True)

fy_recs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


In [500]:
fy_recs.isnull().sum().sum()

0

In [501]:
X_train_fy_recs = fy_recs[fy_recs['draft_year']<2015]
y_train_fy_recs = fy_recs[fy_recs['draft_year']<2015]['avg_grade']
X_test_fy_recs = fy_recs[fy_recs['draft_year']>=2015]
y_test_fy_recs = fy_recs[fy_recs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [502]:
lr = LinearRegression()
lr.fit(X_train_fy_recs[features], y_train_fy_recs)
lr_fy_recs_preds = lr.predict(X_test_fy_recs[features])

r2_score(y_test_fy_recs, lr_fy_recs_preds)

-0.7245357211595806

In [503]:
mean_squared_error(y_test_fy_recs, lr_fy_recs_preds)

1199.9069294024655

#### Random Forest

In [505]:
rf = RandomForestRegressor()
rf.fit(X_train_fy_recs[features], y_train_fy_recs)
rf_fy_recs_preds = rf.predict(X_test_fy_recs[features])

r2_score(y_test_fy_recs, rf_fy_recs_preds)

-0.1740290583130415

In [506]:
mean_squared_error(y_test_fy_recs, rf_fy_recs_preds)

816.8723820011337

In [509]:
np.random.seed(42)
rec_model = Sequential()
rec_model.add(Dense(X_train_fy_recs[features].shape[1], input_shape=(X_train_fy_recs[features].shape[1],), activation='relu'))
rec_model.add(Dropout(0.5))
rec_model.add(Dense(12, activation='relu'))
rec_model.add(Dense(6, activation='relu'))
rec_model.add(Dense(1))

adam = Adam(lr=0.01)
rec_model.compile(loss='mean_squared_error', optimizer=adam)

In [511]:
rec_model.fit(X_train_fy_recs[features], y_train_fy_recs, validation_data=(X_test_fy_recs[features], y_test_fy_recs), epochs=50, batch_size=32)

Train on 309 samples, validate on 98 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1ef05c08c50>

# Defense

## Average Stats

In [543]:
def_pos = ['CB', 'DE', 'ILB', 'S', 'DT', 'OLB', 'LB', 'DB', 'DL', 'NT']
avg_players['defense'] = avg_players['position'].map(lambda x: 1 if x in def_pos else 0)
final_year_players['defense'] = final_year_players['position'].map(lambda x: 1 if x in def_pos else 0)

In [544]:
avg_defs = avg_players[avg_players['defense']==1]
fy_defs = final_year_players[final_year_players['defense']==1]

In [545]:
len(avg_defs)

1392

In [546]:
avg_defs.columns[:50]

Index(['draft_overall', 'draft_round', 'draft_year', 'player', 'position',
       'school', 'wins', 'losses', 'rank', 'nfl_team_win_perc', 'bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended'],
      dtype='object')

In [547]:
avg_defs.columns[50:]

Index(['fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td',
       'avg_grade', 'conference_ACC', 'conference_American',
       'conference_Big 12', 'conference_Big East', 'conference_Big Ten',
       'conference_CUSA', 'conference_Ind', 'conference_MAC', 'conference_MVC',
       'conference_MWC', 'conference_Pac-10', 'conference_Pac-12',
       'conference_SEC', 'conference_Sun Belt', 'conference_WAC',
       'nfl_team_49ers', 'nfl_team_Bears', 'nfl_team_Bengals',
       'nfl_team_Bills', 'nfl_team_Broncos', 'nfl_team_Browns',
       'nfl_team_Buccaneers', 'nfl_team_Cardinals', 'nfl_team_Chargers',
       'nfl_team_Chiefs', 'nfl_team_Colts', 'nfl_team_Cowboys',
       'nfl_team_Dolphins', 'nfl_team_Eagles', 'nfl_team_Falcons',
       'nfl_team_Giants', 'nfl_team_Jaguars', 'nfl_team_Jets',
       'nfl_team_Lions', 'nfl_team

In [548]:
avg_defs.isnull().sum()[50:]

fumbles_rec             67
fumbles_rec_yds         67
fumbles_rec_td          67
fumbles_forced          67
punt_ret                67
punt_ret_yds            67
punt_ret_yds_per_ret    67
punt_ret_td             67
kick_ret                67
kick_ret_yds            67
kick_ret_yds_per_ret    67
kick_ret_td             67
avg_grade                0
conference_ACC           0
conference_American      0
conference_Big 12        0
conference_Big East      0
conference_Big Ten       0
conference_CUSA          0
conference_Ind           0
conference_MAC           0
conference_MVC           0
conference_MWC           0
conference_Pac-10        0
conference_Pac-12        0
conference_SEC           0
conference_Sun Belt      0
conference_WAC           0
nfl_team_49ers           0
nfl_team_Bears           0
                        ..
nfl_team_Bills           0
nfl_team_Broncos         0
nfl_team_Browns          0
nfl_team_Buccaneers      0
nfl_team_Cardinals       0
nfl_team_Chargers        0
n

In [549]:
avg_defs.dropna(subset=['wins', 'losses', 'rank', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended', 'fumbles_rec', 
        'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

avg_defs.dropna(subset=['bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight'], how='all', inplace=True)
avg_defs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [550]:
avg_defs.isnull().sum().sum()

0

In [551]:
features = [#     'draft_overall', 'draft_round', 'draft_year', 
            'wins', 'losses', 'rank', 'nfl_team_win_perc', 
        'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical', 'weight', 'games',
       'tackles_solo', 'tackles_assists', 'tackles_total', 'tackles_loss', 
        'sacks', 'def_int', 'def_int_yds', 'fumbles_rec', 'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended',
           'conference_ACC', 'conference_American', 'conference_Big 12',
       'conference_Big East', 'conference_Big Ten', 'conference_CUSA',
       'conference_Ind', 'conference_MAC', 'conference_MVC', 'conference_MWC',
       'conference_Pac-10', 'conference_Pac-12', 'conference_SEC',
       'conference_Sun Belt', 'conference_WAC', 'nfl_team_49ers',
       'nfl_team_Bears', 'nfl_team_Bengals', 'nfl_team_Bills',
       'nfl_team_Broncos', 'nfl_team_Browns', 'nfl_team_Buccaneers',
       'nfl_team_Cardinals', 'nfl_team_Chargers', 'nfl_team_Chiefs',
       'nfl_team_Colts', 'nfl_team_Cowboys', 'nfl_team_Dolphins',
       'nfl_team_Eagles', 'nfl_team_Falcons', 'nfl_team_Giants',
       'nfl_team_Jaguars', 'nfl_team_Jets', 'nfl_team_Lions',
       'nfl_team_Packers', 'nfl_team_Panthers', 'nfl_team_Patriots',
       'nfl_team_Raiders', 'nfl_team_Rams', 'nfl_team_Ravens',
       'nfl_team_Redskins', 'nfl_team_Saints', 'nfl_team_Seahawks',
       'nfl_team_Steelers', 'nfl_team_Texans', 'nfl_team_Titans',
       'nfl_team_Vikings']

In [552]:
X_train_avg_defs = avg_defs[avg_defs['draft_year']<2015]
y_train_avg_defs = avg_defs[avg_defs['draft_year']<2015]['avg_grade']
X_test_avg_defs = avg_defs[avg_defs['draft_year']>=2015]
y_test_avg_defs = avg_defs[avg_defs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [553]:
lr = LinearRegression()
lr.fit(X_train_avg_defs[features], y_train_avg_defs)
lr_avg_defs_preds = lr.predict(X_test_avg_defs[features])

r2_score(y_test_avg_defs, lr_avg_defs_preds)

-0.015830513602204555

In [556]:
mean_squared_error(y_test_avg_defs, lr_avg_defs_preds)

624.8085228025868

#### Random Forest

In [555]:
rf = RandomForestRegressor()
rf.fit(X_train_avg_defs[features], y_train_avg_defs)
rf_avg_defs_preds = rf.predict(X_test_avg_defs[features])

r2_score(y_test_avg_defs, rf_avg_defs_preds)

-0.07769684901674023

In [557]:
mean_squared_error(y_test_avg_defs, rf_avg_defs_preds)

662.8607501416667

#### Neural Net

In [580]:
np.random.seed(42)
def_model = Sequential()
def_model.add(Dense(X_train_avg_defs[features].shape[1], input_shape=(X_train_avg_defs[features].shape[1],), activation='relu'))
def_model.add(Dropout(0.5))
def_model.add(Dense(8, activation='relu'))
def_model.add(Dense(4, activation='relu'))
def_model.add(Dense(1))

adam = Adam(lr=0.01)
def_model.compile(loss='mean_squared_error', optimizer=adam)

In [581]:
def_model.fit(X_train_avg_defs[features], y_train_avg_defs, validation_data=(X_test_avg_defs[features], y_test_avg_defs), epochs=25, batch_size=32)

Train on 780 samples, validate on 250 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1ef1cf65dd8>

## Final Year Stats

In [558]:
fy_defs.isnull().sum()

draft_overall             0
draft_round               0
draft_year                0
player                    0
position                  0
school                    0
wins                     67
losses                   67
rank                     67
nfl_team_win_perc         0
bench_reps              257
broad_jump              247
cone                    292
forty_yd                241
height                  215
shuttle                 291
vertical                250
weight                  215
games                   107
pass_cmp                 67
pass_att                 67
pass_cmp_pct             67
pass_yds                 67
pass_yds_per_att         67
adj_pass_yds_per_att     67
pass_td                  67
pass_int                 67
pass_rating              67
rush_att                 67
rush_yds                 67
                       ... 
nfl_team_Bills            0
nfl_team_Broncos          0
nfl_team_Browns           0
nfl_team_Buccaneers       0
nfl_team_Cardinals  

In [559]:
fy_defs.dropna(subset=['wins', 'losses', 'rank', 'games', 'pass_cmp', 'pass_att', 'pass_cmp_pct', 'pass_yds',
       'pass_yds_per_att', 'adj_pass_yds_per_att', 'pass_td', 'pass_int',
       'pass_rating', 'rush_att', 'rush_yds', 'rush_yds_per_att', 'rush_td',
       'rec', 'rec_yds', 'rec_yds_per_rec', 'rec_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'tackles_solo', 'tackles_assists',
       'tackles_total', 'tackles_loss', 'sacks', 'def_int', 'def_int_yds',
       'def_int_yds_per_int', 'def_int_td', 'pass_defended', 'fumbles_rec', 
        'fumbles_rec_yds', 'fumbles_rec_td', 'fumbles_forced',
       'punt_ret', 'punt_ret_yds', 'punt_ret_yds_per_ret', 'punt_ret_td',
       'kick_ret', 'kick_ret_yds', 'kick_ret_yds_per_ret', 'kick_ret_td'], how='all', inplace=True)

fy_defs.dropna(subset=['bench_reps',
       'broad_jump', 'cone', 'forty_yd', 'height', 'shuttle', 'vertical',
       'weight'], how='all', inplace=True)

fy_defs.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [561]:
fy_defs.isnull().sum().sum()

0

In [562]:
X_train_fy_defs = fy_defs[fy_defs['draft_year']<2015]
y_train_fy_defs = fy_defs[fy_defs['draft_year']<2015]['avg_grade']
X_test_fy_defs = fy_defs[fy_defs['draft_year']>=2015]
y_test_fy_defs = fy_defs[fy_defs['draft_year']>=2015]['avg_grade']

#### Linear Regression

In [582]:
lr = LinearRegression()
lr.fit(X_train_fy_defs[features], y_train_fy_defs)
lr_fy_defs_preds = lr.predict(X_test_fy_defs[features])

r2_score(y_test_fy_defs, lr_fy_defs_preds)

-0.027808425064910436

In [583]:
mean_squared_error(y_test_fy_defs, lr_fy_defs_preds)

632.1757962473813

#### Random Forest

In [584]:
rf = RandomForestRegressor()
rf.fit(X_train_fy_defs[features], y_train_fy_defs)
rf_fy_defs_preds = rf.predict(X_test_fy_defs[features])

r2_score(y_test_fy_defs, rf_fy_defs_preds)

-0.11304343604630485

In [585]:
mean_squared_error(y_test_fy_defs, rf_fy_defs_preds)

684.6014327972222

#### Neural Net

In [594]:
np.random.seed(42)
def_model = Sequential()
def_model.add(Dense(X_train_fy_defs[features].shape[1], input_shape=(X_train_fy_defs[features].shape[1],), activation='relu'))
def_model.add(Dropout(0.5))
def_model.add(Dense(8, activation='relu'))
def_model.add(Dense(4, activation='relu'))
def_model.add(Dense(1))

adam = Adam(lr=0.01)
def_model.compile(loss='mean_squared_error', optimizer=adam)

In [595]:
def_model.fit(X_train_fy_defs[features], y_train_fy_defs, validation_data=(X_test_fy_defs[features], y_test_fy_defs), epochs=25, batch_size=32)

Train on 744 samples, validate on 250 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1ef219578d0>

In [596]:
avg_players[avg_players['player']=='Patrick Mahomes']

Unnamed: 0,draft_overall,draft_round,draft_year,player,position,school,wins,losses,rank,nfl_team_win_perc,...,nfl_team_Rams,nfl_team_Ravens,nfl_team_Redskins,nfl_team_Saints,nfl_team_Seahawks,nfl_team_Steelers,nfl_team_Texans,nfl_team_Titans,nfl_team_Vikings,defense
1990,10,1,2017,Patrick Mahomes,QB,Texas Tech,5.5,7.0,77.5,0.75,...,0,0,0,0,0,0,0,0,0,0
