# General Assembly DSI - Denver 2018
## Capstone Project - DFS Model
This is my capstone project at General Assembly's fifth [Data Science Immersive](https://generalassemb.ly/education/data-science-immersive) cohort in 2018. I am developing a model to assist in optimizing NFL lineups on the daily fantasy sports platforms [Draft Kings](https://www.draftkings.com/) and [Fan Duel](https://www.fanduel.com/).

### Problem Statement

Can we build a model to predict a football player’s fantasy football performance to estimate their value and implement the model in conjunction with a daily fantasy strategy to be profitable?

In [57]:
import pandas as pd

In [58]:
qb_stats = pd.read_csv('../data/qb_stats.csv')
quarterbacks = pd.read_csv('../data/quarterbacks.csv')

In [59]:
qb_stats.head()

Unnamed: 0,Age,Attempts,Completions,Interceptions,Name,Oppt,Rating,TDs,Team,Week,Y/A,Yards,Year,Month
0,28.291,25,17,0,"Fitzpatrick, Ryan",KC,133.0,4,BUF,1,8.32,208,2011,9
1,27.28,35,27,0,"Rodgers, Aaron",NO,132.1,3,GB,1,8.91,312,2011,9
2,34.04,48,32,1,"Brady, Tom",MIA,121.6,4,NE,1,10.77,517,2011,9
3,23.216,33,24,1,"Stafford, Matthew",TB,118.9,3,DET,1,9.24,305,2011,9
4,26.238,29,17,0,"Flacco, Joe",PIT,117.6,3,BAL,1,7.72,224,2011,9


In [60]:
quarterbacks.head()

Unnamed: 0,Name,Week,Year,Team,Oppt,h/a,FD points,FD salary,Opp_Avg_Att_Allowed,Opp_Avg_Comp_Allowed,...,Temperature,Wind,Weather_DOME,Weather_Fog,Weather_Rain,Weather_Rain | Fog,Weather_Snow,Weather_Snow | Fog,Weather_Snow | Freezing Rain,Weather_Sunny
0,"Batch, Charlie",2,2011,PIT,SEA,1,0.0,5000.0,29.0,17.0,...,63.0,1.0,0,0,0,0,0,0,0,1
1,"Bradford, Sam",2,2011,LAR,NYG,0,16.74,7300.0,32.0,14.0,...,68.0,6.0,0,0,0,0,0,0,0,1
2,"Brady, Tom",2,2011,NE,LAC,1,29.22,9100.0,49.0,30.0,...,63.0,12.0,0,0,0,0,0,0,0,1
3,"Brees, Drew",2,2011,NO,CHI,1,23.0,9300.0,35.0,27.0,...,72.0,0.0,1,0,0,0,0,0,0,0
4,"Brunell, Mark",2,2011,NYJ,JAX,1,0.0,5000.0,36.0,23.0,...,71.0,1.0,0,0,0,0,0,0,0,1


In [61]:
quarterbacks.shape

(4136, 27)

In [62]:
pd.merge(quarterbacks, qb_stats, on = ['Name', 'Team', 'Week', 'Year', 'Oppt']).shape

(2664, 36)

> Because we scraped player stats by those players who were active, many of the players were removed from the dataset upon merging.

In [63]:
quarterbacks = pd.merge(quarterbacks, qb_stats, on = ['Name', 'Team', 'Week', 'Year', 'Oppt'])

In [64]:
quarterbacks.head()

Unnamed: 0,Name,Week,Year,Team,Oppt,h/a,FD points,FD salary,Opp_Avg_Att_Allowed,Opp_Avg_Comp_Allowed,...,Weather_Sunny,Age,Attempts,Completions,Interceptions,Rating,TDs,Y/A,Yards,Month
0,"Bradford, Sam",2,2011,LAR,NYG,0,16.74,7300.0,32.0,14.0,...,1,23.315,46,22,0,79.2,1,7.2,331,9
1,"Brady, Tom",2,2011,NE,LAC,1,29.22,9100.0,49.0,30.0,...,1,34.046,40,31,0,135.7,3,10.58,423,9
2,"Brees, Drew",2,2011,NO,CHI,1,23.0,9300.0,35.0,27.0,...,0,32.246,37,26,0,118.1,3,7.3,270,9
3,"Cassel, Matt",2,2011,KC,DET,0,0.32,7700.0,25.0,17.0,...,0,29.124,22,15,3,44.5,0,6.05,133,9
4,"Dalton, Andy",2,2011,CIN,DEN,0,21.58,5100.0,40.0,19.0,...,1,23.324,41,27,0,107.0,2,8.1,332,9


In [78]:
quarterbacks.groupby(['Name', 'Year', 'Week'])[['Attempts', 'Completions', 'Interceptions', 
                                                               'Rating', 'TDs', 'Y/A', 'Yards']].sum().head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Attempts,Completions,Interceptions,Rating,TDs,Y/A,Yards
Name,Year,Week,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Barkley, Matt",2013,7,20,11,3,35.2,0,6.45,129
"Barkley, Matt",2013,8,26,17,1,65.9,0,6.08,158
"Barkley, Matt",2013,9,3,2,0,75.7,0,4.33,13
"Barkley, Matt",2014,10,1,0,0,39.6,0,0.0,0
"Barkley, Matt",2016,7,15,6,2,18.3,0,5.4,81
"Barkley, Matt",2016,12,54,28,2,72.8,3,5.85,316
"Barkley, Matt",2016,13,18,11,0,97.5,0,10.67,192
"Barkley, Matt",2016,14,32,20,0,92.2,1,6.63,212
"Barkley, Matt",2016,15,43,30,3,81.7,2,8.42,362
"Barkley, Matt",2016,16,40,24,5,62.8,2,8.08,323


In [73]:
features = ['Attempts', 'Completions', 'Interceptions', 'Rating', 'TDs', 'Y/A', 'Yards']

In [81]:
rolling_qb_stats = quarterbacks.groupby(['Name', 'Year'])[features].rolling(window = 3, min_periods = 0).mean()

In [82]:
shifted_qb_stats = rolling_qb_stats.groupby(level = [0,1]).shift()

In [84]:
shifted_qb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Attempts,Completions,Interceptions,Rating,TDs,Y/A,Yards
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Barkley, Matt",2013,882,,,,,,,
"Barkley, Matt",2013,1048,20.0,11.0,3.0,35.2,0.0,6.45,129.0
"Barkley, Matt",2013,1203,23.0,14.0,2.0,50.55,0.0,6.265,143.5
"Barkley, Matt",2014,1385,,,,,,,
"Barkley, Matt",2016,955,,,,,,,


In [87]:
for index, row in quarterbacks.iterrows():
    rolling_index = (row['Name'], row['Year'], index)
    quarterbacks.loc[index, features] = shifted_qb_stats.loc[rolling_index, features]

In [90]:
quarterbacks.dropna(inplace = True)

In [97]:
quarterbacks['Month'] = quarterbacks['Month'].map({12: 'December', 11: 'November', 10: 'October', 9: 'September', 1: 'January'})

In [102]:
# quarterbacks.to_csv('../data/quarterbacks_final.csv', index = False)

### Runningbacks

In [106]:
runningbacks = pd.read_csv('../data/runningbacks.csv')
rb_stats = pd.read_csv('../data/rb_stats.csv')

In [109]:
runningbacks.head()

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,FD points,FD salary,Opp_Avg_Carries,Opp_Rank,Opp_Avg_TDs_Allowed,Opp_Avg_Yds_Allowed,Favored,Spread,O/U,Weather,Temperature,Wind
0,2,2011,2603,"McFadden, Darren",RB,OAK,a,BUF,27.8,9100.0,12.0,4,0.0,25.0,BUF,-4.0,41.0,Sunny,66.0,5.0
1,2,2011,2536,"Bush, Michael",RB,OAK,a,BUF,8.3,5900.0,12.0,4,0.0,25.0,BUF,-4.0,41.0,Sunny,66.0,5.0
2,2,2011,2665,"Reece, Marcel",RB,OAK,a,BUF,3.9,4500.0,12.0,4,0.0,25.0,BUF,-4.0,41.0,Sunny,66.0,5.0
3,2,2011,2820,"Jones, Taiwan",RB,OAK,a,BUF,0.4,4500.0,12.0,4,0.0,25.0,BUF,-4.0,41.0,Sunny,66.0,5.0
4,2,2011,2332,"Cartwright, Rock",RB,OAK,a,BUF,0.0,4500.0,12.0,4,0.0,25.0,BUF,-4.0,41.0,Sunny,66.0,5.0


In [112]:
runningbacks.shape

(12339, 20)

In [110]:
rb_stats.head()

Unnamed: 0,Age,Carries,Date,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Rush_TDs,Rush_Yds,Targets,Team,Week,Year
0,26.238,16.0,2011-11-14,November,"Peterson, Adrian",GB,0.0,6.0,2.0,0.0,98.0,3.0,MIN,10,2011
1,24.343,5.0,2011-11-13,November,"Blount, LeGarrette",HOU,0.0,0.0,0.0,0.0,15.0,0.0,TB,10,2011
2,28.183,22.0,2011-11-13,November,"Gore, Frank",NYG,0.0,19.0,3.0,0.0,59.0,4.0,SF,10,2011
3,21.327,13.0,2011-11-13,November,"Ingram, Mark",ATL,0.0,0.0,0.0,0.0,40.0,0.0,NO,10,2011
4,21.047,2.0,2011-11-13,November,"Lewis, Dion",ARI,0.0,-3.0,1.0,0.0,10.0,1.0,PHI,10,2011


In [113]:
rb_stats.shape

(3826, 15)

In [116]:
runningbacks = pd.merge(runningbacks, rb_stats, on = ['Name', 'Week', 'Year', 'Team', 'Oppt'])

In [117]:
runningbacks.shape

(3703, 30)

In [119]:
runningbacks['Year'].value_counts()

2017    1061
2016     813
2015     646
2014     501
2013     304
2012     236
2011     142
Name: Year, dtype: int64

In [126]:
runningbacks.columns

Index(['Week', 'Year', 'Name', 'Team', 'h/a', 'Oppt', 'FD points', 'FD salary',
       'Opp_Avg_Carries', 'Opp_Rank', 'Opp_Avg_TDs_Allowed',
       'Opp_Avg_Yds_Allowed', 'Favored', 'Spread', 'O/U', 'Weather',
       'Temperature', 'Wind', 'Age', 'Carries', 'Month', 'Rec_TDs', 'Rec_Yds',
       'Receptions', 'Rush_TDs', 'Rush_Yds', 'Targets'],
      dtype='object')

In [125]:
runningbacks.drop(['Pos'], axis = 1, inplace = True)

In [128]:
rb_cols = ['Name', 'Age', 'Week', 'Year', 'Team', 'Oppt', 'h/a', 'Favored', 'Spread', 'O/U', 'Weather', 'Temperature',
           'Wind', 'Month', 'Carries', 'Rush_Yds', 'Rush_TDs', 'Receptions', 'Targets', 'Rec_Yds', 'Rec_TDs',
           'Opp_Avg_Carries', 'Opp_Rank', 'Opp_Avg_TDs_Allowed', 'Opp_Avg_Yds_Allowed', 'FD salary', 'FD points']

runningbacks = runningbacks[rb_cols]

In [131]:
# runningbacks.to_csv('../data/runningbacks_final.csv')

### Wide Receivers

In [132]:
receivers = pd.read_csv('../data/receivers.csv')
wr_stats = pd.read_csv('../data/wr_stats.csv')

In [135]:
receivers.shape

(15662, 21)

In [136]:
wr_stats.shape

(6932, 11)

In [133]:
receivers.head()

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,FD points,FD salary,...,Opp_Avg_Rec_Allowed,Opp_Avg_Targets_Allowed,Opp_Avg_TDs_Allowed,Opp_Avg_Yds_Allowed,Favored,Spread,O/U,Weather,Temperature,Wind
0,2,2011,3610,"Austin, Miles",WR,DAL,a,SF,36.6,7100.0,...,13.0,25.0,1.0,161.0,DAL,-3.0,41.5,Sunny,72.0,3.0
1,2,2011,3934,"Holley, Jesse",WR,DAL,a,SF,11.1,4500.0,...,13.0,25.0,1.0,161.0,DAL,-3.0,41.5,Sunny,72.0,3.0
2,2,2011,3876,"Ogletree, Kevin",WR,DAL,a,SF,5.6,4500.0,...,13.0,25.0,1.0,161.0,DAL,-3.0,41.5,Sunny,72.0,3.0
3,2,2011,5130,"Harris, Dwayne",WR,DAL,a,SF,0.0,4500.0,...,13.0,25.0,1.0,161.0,DAL,-3.0,41.5,Sunny,72.0,3.0
4,2,2011,5076,"Williams, Kyle",WR,SF,h,DAL,7.7,4500.0,...,11.0,22.0,2.0,137.0,DAL,-3.0,41.5,Sunny,72.0,3.0


In [134]:
wr_stats.head()

Unnamed: 0,Age,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Targets,Team,Week,Year
0,22.362,9,"Baldwin, Doug",PIT,0.666667,41.0,3.333333,5.666667,SEA,2,2011
1,22.364,9,"Britt, Kenny",BAL,2.0,136.0,5.0,10.0,TEN,2,2011
2,23.07,9,"Brown, Antonio",SEA,0.333333,81.333333,4.333333,8.0,PIT,2,2011
3,21.027,9,"Cobb, Randall",CAR,0.0,40.0,2.666667,3.0,GB,2,2011
4,24.187,9,"Decker, Eric",CIN,0.0,18.333333,1.333333,4.333333,DEN,2,2011


In [140]:
receivers = pd.merge(receivers, wr_stats, on = ['Name', 'Year', 'Week', 'Team', 'Oppt'])

In [142]:
receivers.drop(['GID', 'Pos'], axis = 1, inplace = True)

In [143]:
# receivers.to_csv('../data/receivers_final.csv', index = False)

### Tight Ends

In [144]:
tight_ends = pd.read_csv('../data/tightends.csv')
te_stats = pd.read_csv('../data/te_stats.csv')

In [145]:
tight_ends.shape

(8840, 21)

In [146]:
te_stats.shape

(3187, 11)

In [147]:
tight_ends.head()

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,FD points,FD salary,...,Opp_Avg_Rec_Allowed,Opp_Avg_Targets_Allowed,Opp_Avg_TDs_Allowed,Opp_Avg_Yds_Allowed,Favored,Spread,O/U,Weather,Temperature,Wind
0,2,2011,4051,"Gonzalez, Tony",TE,ATL,h,PHI,23.8,5700.0,...,4.0,7.0,1.0,30.0,PHI,-2.5,49.5,DOME,72.0,0.0
1,2,2011,4138,"Zelenka, Joe",TE,ATL,h,PHI,0.0,4500.0,...,4.0,7.0,1.0,30.0,PHI,-2.5,49.5,DOME,72.0,0.0
2,2,2011,4074,"Kelly, Reggie",TE,ATL,h,PHI,0.0,4500.0,...,4.0,7.0,1.0,30.0,PHI,-2.5,49.5,DOME,72.0,0.0
3,2,2011,4512,"Palmer, Michael",TE,ATL,h,PHI,0.0,4500.0,...,4.0,7.0,1.0,30.0,PHI,-2.5,49.5,DOME,72.0,0.0
4,2,2011,4363,"Celek, Brent",TE,PHI,a,ATL,6.3,4900.0,...,3.0,8.0,0.0,39.0,PHI,-2.5,49.5,DOME,72.0,0.0


In [148]:
te_stats.head()

Unnamed: 0,Age,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Targets,Team,Week,Year
0,23.077,September,"Hoomanawanui, Michael",NYG,0.0,21.0,2.0,3.0,LAR,2,2011
1,23.232,September,"Kendricks, Lance",NYG,0.0,19.333333,2.0,3.0,LAR,2,2011
2,24.164,September,"Cook, Jared",BAL,0.333333,90.666667,5.666667,6.666667,TEN,2,2011
3,27.23,September,"Davis, Vernon",DAL,0.333333,52.666667,3.666667,7.0,SF,2,2011
4,24.055,September,"Dickson, Ed",TEN,0.666667,23.0,2.333333,3.666667,BAL,2,2011


In [153]:
# tight_ends = 
tight_ends = pd.merge(tight_ends, te_stats, on = ['Name', 'Year', 'Week', 'Team', 'Oppt'])

In [154]:
tight_ends.drop(['GID', 'Pos'], axis = 1, inplace = True)

In [155]:
# tight_ends.to_csv('../data/tightends_final.csv', index = False)