# General Assembly DSI - Denver 2018
## Capstone Project - DFS Model
This is my capstone project at General Assembly's fifth [Data Science Immersive](https://generalassemb.ly/education/data-science-immersive) cohort in 2018. I am developing a model to assist in optimizing NFL lineups on the daily fantasy sports platforms [Draft Kings](https://www.draftkings.com/) and [Fan Duel](https://www.fanduel.com/).

### Problem Statement

Can we build a model to predict a football player’s fantasy football performance to estimate their value and implement the model in conjunction with a daily fantasy strategy to be profitable?

### Data Joining
Merging the FanDuel, Defensive, and Game data that I scraped and collected from Kaggle.

In [2]:
import pandas as pd

In [3]:
fanduel = pd.read_csv('../data/fanduel.csv')
game_info = pd.read_csv('../data/game_information.csv')
qb_vs_def = pd.read_csv('../data/Defense_vs_QB.csv')
rb_vs_def = pd.read_csv('../data/Defense_vs_RB.csv')
wr_vs_def = pd.read_csv('../data/Defense_vs_WR.csv')
te_vs_def = pd.read_csv('../data/Defense_vs_TE.csv')

### Step 1) Make Uniform Team Names to Join On

In [31]:
# here is what every one SHOULD look like
teams = sorted(['NE', 'GB', 'PIT', 'DEN', 'SEA', 'NO', 'BAL', 'PHI', 'ATL', 'DAL',
'CIN', 'CAR', 'DET', 'KC', 'HOU', 'SF', 'ARI', 'NYG', 'LAC', 'MIN', 'IND', 
'NYJ', 'BUF', 'TEN', 'MIA', 'CHI', 'WAS', 'OAK', 'LAR', 'TB','CLE', 'JAX'])

#### Fanduel Data

In [86]:
fanduel['Oppt'] = fanduel['Oppt'].map(lambda x: x.upper())
fanduel['Team'] = fanduel['Team'].map(lambda x: x.upper())

In [30]:
fanduel['Team'].value_counts().index

Index(['NOR', 'OAK', 'DAL', 'ATL', 'ARI', 'CIN', 'BUF', 'GNB', 'KAN', 'DEN',
       'NYG', 'TEN', 'WAS', 'SFO', 'PHI', 'CLE', 'BAL', 'NYJ', 'NWE', 'TAM',
       'MIN', 'PIT', 'SEA', 'CHI', 'HOU', 'JAC', 'CAR', 'MIA', 'IND', 'DET',
       'SDG', 'STL', 'LAR', 'LAC'],
      dtype='object')

In [32]:
# these ones need to change:
fd_teams = {'NOR': 'NO', 'GNB': 'GB', 'KAN': 'KC', 'SFO': 'SF', 'NWE': 'NE', 
            'TAM': 'TB', 'JAC': 'JAX', 'SDG': 'LAC', 'STL': 'LAR'}

In [37]:
def fd_team_change(team):
    if team in fd_teams.keys():
        team = fd_teams[team]
        return team
    else:
        return team

In [42]:
# check if that worked...
sorted(fanduel['Team'].map(fd_team_change).value_counts().index) == teams

True

In [43]:
fanduel['Team'] = fanduel['Team'].map(fd_team_change)
fanduel['Oppt'] = fanduel['Oppt'].map(fd_team_change)

In [92]:
fanduel[fanduel['Oppt'] == '-'].shape

(56, 10)

> Turns out 56 entries don't have an opponent. Will drop them...

In [95]:
fanduel = fanduel[fanduel['Oppt'] != '-']

In [96]:
sorted(fanduel['Oppt'].value_counts().index) == teams

True

#### Game Information

In [55]:
game_info_teams = {team: team[:3].upper() for team in sorted(game_info['team_home'].value_counts().index)}

In [58]:
game_info_teams['Green Bay Packers'] = 'GB'
game_info_teams['Jacksonville Jaguars'] = 'JAX'
game_info_teams['Kansas City Chiefs'] = 'KC'
game_info_teams['Los Angeles Chargers'] = 'LAC'
game_info_teams['Los Angeles Rams'] = 'LAR'
game_info_teams['New England Patriots'] = 'NE'
game_info_teams['New Orleans Saints'] = 'NO'
game_info_teams['New York Giants'] = 'NYG'
game_info_teams['New York Jets'] = 'NYJ'
game_info_teams['San Diego Chargers'] = 'LAC'
game_info_teams['San Francisco 49ers'] = 'SF'
game_info_teams['St. Louis Rams'] = 'LAR'
game_info_teams['Tampa Bay Buccaneers'] = 'TB'

In [63]:
# check
sorted(game_info['team_home'].map(game_info_teams).value_counts().index) == teams

True

In [64]:
game_info['team_home'] = game_info['team_home'].map(game_info_teams)
game_info['team_away'] = game_info['team_away'].map(game_info_teams)

#### Defense vs. Position

In [77]:
def_vs_pos_teams = {team: team for team in qb_vs_def['Opponent'].value_counts().index}
def_vs_pos_teams['LA'] = 'LAR'

In [78]:
for team in qb_vs_def['Team'].value_counts().index:
    if team not in game_info_teams.keys():
        print(team)

In [82]:
qb_vs_def['Team'] = qb_vs_def['Team'].map(game_info_teams)
rb_vs_def['Team'] = rb_vs_def['Team'].map(game_info_teams)
wr_vs_def['Team'] = wr_vs_def['Team'].map(game_info_teams)
te_vs_def['Team'] = te_vs_def['Team'].map(game_info_teams)

In [83]:
qb_vs_def['Opponent'] = qb_vs_def['Opponent'].map(def_vs_pos_teams)
rb_vs_def['Opponent'] = rb_vs_def['Opponent'].map(def_vs_pos_teams)
wr_vs_def['Opponent'] = wr_vs_def['Opponent'].map(def_vs_pos_teams)
te_vs_def['Opponent'] = te_vs_def['Opponent'].map(def_vs_pos_teams)

In [97]:
fanduel.head()

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,FD points,FD salary
0,1,2011,1131,"Brady, Tom",QB,NE,a,MIA,35.98,9200.0
1,1,2011,1309,"Henne, Chad",QB,MIA,h,NE,35.54,6800.0
2,1,2011,1378,"Newton, Cam",QB,CAR,a,ARI,31.68,6700.0
3,1,2011,1151,"Brees, Drew",QB,NO,a,GB,29.06,8900.0
4,1,2011,1242,"Fitzpatrick, Ryan",QB,BUF,a,KC,24.62,7900.0


In [98]:
game_info.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,team_home,team_away,stadium,team_favorite_id,spread_favorite,over_under_line,weather_detail,weather_temperature,weather_wind_mph,score_home,score_away,stadium_neutral,schedule_playoff
0,09/08/2011,2011,1,GB,NO,Lambeau Field,GB,-5.0,48.0,Sunny,68.0,5.0,42.0,34.0,False,False
1,09/11/2011,2011,1,ARI,CAR,University of Phoenix Stadium,ARI,-7.0,37.5,DOME,72.0,0.0,28.0,21.0,False,False
2,09/11/2011,2011,1,BAL,PIT,M&T Bank Stadium,BAL,-1.5,37.0,Rain,75.0,2.0,35.0,7.0,False,False
3,09/11/2011,2011,1,CHI,ATL,Soldier Field,ATL,-1.5,41.0,Sunny,78.0,7.0,30.0,12.0,False,False
4,09/11/2011,2011,1,CLE,CIN,FirstEnergy Stadium,CLE,-7.0,36.5,Sunny,72.0,9.0,17.0,27.0,False,False


In [84]:
qb_vs_def.head()

Unnamed: 0,Attempts,Completions,Interceptions,Opponent,Rank,Team,Touchdowns,Week,Yards,Year
0,32.0,22.0,1.0,PHI,13,ATL,2.0,2,312.0,2011
1,41.0,22.0,3.0,TEN,16,BAL,1.0,2,280.0,2011
2,36.0,22.0,1.0,OAK,5,BUF,1.0,2,119.0,2011
3,27.0,18.0,0.0,GB,8,CAR,2.0,2,309.0,2011
4,47.0,31.0,1.0,NO,6,CHI,0.0,2,319.0,2011


In [99]:
rb_vs_def.head()

Unnamed: 0,Attempts,Opponent,Rank,Team,Touchdowns,Week,Yards,Year
0,26.0,PHI,9,ATL,0.0,2,92.0,2011
1,15.0,TEN,17,BAL,0.0,2,57.0,2011
2,14.0,OAK,1,BUF,0.0,2,66.0,2011
3,21.0,GB,13,CAR,1.0,2,100.0,2011
4,14.0,NO,15,CHI,0.0,2,110.0,2011


In [119]:
wr_vs_def.tail()

Unnamed: 0,Opponent,Rank,Receptions,Targets,Team,Touchdowns,Week,Yards,Year
3355,OAK,8,8.666667,13.666667,LAC,0.333333,17,101.666667,2017
3356,LAR,19,14.0,22.333333,SF,1.333333,17,195.333333,2017
3357,ARI,21,10.666667,15.666667,SEA,1.0,17,141.0,2017
3358,NO,25,11.666667,17.666667,TB,0.666667,17,143.0,2017
3359,NYG,24,8.666667,16.0,WAS,0.333333,17,154.666667,2017


In [102]:
te_vs_def.head()

Unnamed: 0,Opponent,Rank,Receptions,Targets,Team,Touchdowns,Week,Yards,Year
0,PHI,18,4.0,7.0,ATL,1.0,2,30.0,2011
1,TEN,20,3.0,5.0,BAL,0.0,2,42.0,2011
2,OAK,24,4.0,7.0,BUF,0.0,2,39.0,2011
3,GB,15,4.0,6.0,CAR,1.0,2,101.0,2011
4,NO,13,5.0,7.0,CHI,0.0,2,72.0,2011


### Step 2) Drop all week 1 observations

In [106]:
fanduel = fanduel[fanduel['Week'] > 1]

In [110]:
game_info = game_info[game_info['schedule_week'] > 1]

### Step 3) Drop Unnecessary Columns

In [117]:
game_columns = ['schedule_date', 'stadium', 'score_home', 'score_away', 'stadium_neutral', 'schedule_playoff']
game_info.drop(game_columns, axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [121]:
game_info.head()

Unnamed: 0,schedule_season,schedule_week,team_home,team_away,team_favorite_id,spread_favorite,over_under_line,weather_detail,weather_temperature,weather_wind_mph
16,2011,2,ATL,PHI,PHI,-2.5,49.5,DOME,72.0,0.0
17,2011,2,BUF,OAK,BUF,-4.0,41.0,Sunny,66.0,5.0
18,2011,2,CAR,GB,GB,-11.0,45.0,Sunny,67.0,1.0
19,2011,2,DEN,CIN,DEN,-3.0,41.0,Sunny,73.0,6.0
20,2011,2,DET,KC,DET,-9.0,45.0,DOME,72.0,0.0


### Step 4) Split FanDuel by position

In [124]:
fanduel['Pos'].value_counts()

WR     15671
RB     12346
TE      8847
QB      4139
PK      3363
Def     3360
Name: Pos, dtype: int64

In [125]:
fd_qbs = fanduel[fanduel['Pos'] == 'QB']
fd_rbs = fanduel[fanduel['Pos'] == 'RB']
fd_wrs = fanduel[fanduel['Pos'] == 'WR']
fd_tes = fanduel[fanduel['Pos'] == 'TE']

### Step 5) Merge EVERYTHING!!!

#### Quarterbacks

In [191]:
quarterbacks.head()

Unnamed: 0,Week,Year,GID,Name,Pos,Team,h/a,Oppt,FD points,FD salary,...,Opp_Avg_TDs_Allowed,Opp_Avg_Yds_Allowed,Home,Away,team_favorite_id,spread_favorite,over_under_line,weather_detail,weather_temperature,weather_wind_mph
0,2,2011,1378,"Newton, Cam",QB,CAR,h,GB,29.58,7300.0,...,2.0,309.0,CAR,GB,GB,-11.0,45.0,Sunny,67.0,1.0
1,2,2011,1252,"Rodgers, Aaron",QB,GB,a,CAR,21.62,9900.0,...,3.0,419.0,CAR,GB,GB,-11.0,45.0,Sunny,67.0,1.0
2,2,2011,1131,"Brady, Tom",QB,NE,h,LAC,29.22,9100.0,...,2.0,416.0,NE,LAC,NE,-7.0,53.5,Sunny,63.0,12.0
3,2,2011,1231,"Rivers, Philip",QB,LAC,a,NE,20.32,8900.0,...,1.0,39.0,NE,LAC,NE,-7.0,53.5,Sunny,63.0,12.0
4,2,2011,1340,"Stafford, Matthew",QB,DET,h,KC,26.86,7900.0,...,1.0,266.0,DET,KC,DET,-9.0,45.0,DOME,72.0,0.0


In [137]:
quarterbacks = pd.merge(fd_qbs, qb_vs_def, on = ['Week', 'Year', 'Team'])
quarterbacks.drop('Opponent', axis = 1, inplace = True)

In [148]:
renamed_cols = {'Attempts': 'Opp_Avg_Att_Allowed', 
                'Completions': 'Opp_Avg_Comp_Allowed', 
                'Interceptions': 'Opp_Avg_Ints',
                'Rank': 'Opp_Rank',
                'Touchdowns': 'Opp_Avg_TDs_Allowed',
                'Yards': 'Opp_Avg_Yds_Allowed'}

quarterbacks.rename(columns = renamed_cols, inplace = True)

In [185]:
game_info.rename(columns = {'schedule_season': 'Year', 
                            'schedule_week': 'Week', 
                            'team_home': 'Home', 
                            'team_away': 'Away'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [165]:
# create dummy column to replace with home team
quarterbacks['Home'] = 'Blah'
quarterbacks['Away'] = 'Blah'

In [181]:
# creating home & away columns to match with game info
for index, player in quarterbacks.iterrows():
    if player['h/a'] == 'h':
        quarterbacks.loc[index, 'Home'] = player['Team']
        quarterbacks.loc[index, 'Away'] = player['Oppt']
    else:
        quarterbacks.loc[index, 'Home'] = player['Oppt']
        quarterbacks.loc[index, 'Away'] = player['Team']

In [190]:
quarterbacks = pd.merge(quarterbacks, game_info, on = ['Week', 'Year', 'Home', 'Away'])

In [195]:
# drop unwanted columns, rename relevant ones
quarterbacks.drop(['Home', 'Away'], axis = 1, inplace = True)

In [226]:
quarterbacks.to_csv('../data/quarterbacks.csv', index = False)

In [197]:
rename_cols = {'team_favorite_id': 'Favored',
                'spread_favorite': 'Spread',
                'over_under_line': 'O/U',
                'weather_detail': 'Weather',
                'weather_temperature': 'Temperature',
                'weather_wind_mph': 'Wind'}
    
quarterbacks.rename(columns = rename_cols, inplace = True)

In [200]:
game_info.rename(columns = rename_cols, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


#### Runningbacks

In [142]:
runningbacks = pd.merge(fd_rbs, rb_vs_def, on = ['Week', 'Year', 'Team'])
runningbacks.drop('Opponent', axis = 1, inplace = True)

In [150]:
renamed_cols = {'Attempts': 'Opp_Avg_Carries',
                'Rank': 'Opp_Rank',
                'Touchdowns': 'Opp_Avg_TDs_Allowed',
                'Yards': 'Opp_Avg_Yds_Allowed'}

runningbacks.rename(columns = renamed_cols, inplace = True)

In [218]:
runningbacks['Home'] = 'Blah'
runningbacks['Away'] = 'Blah'

In [219]:
# creating home & away columns to match with game info
for index, player in runningbacks.iterrows():
    if player['h/a'] == 'h':
        runningbacks.loc[index, 'Home'] = player['Team']
        runningbacks.loc[index, 'Away'] = player['Oppt']
    else:
        runningbacks.loc[index, 'Home'] = player['Oppt']
        runningbacks.loc[index, 'Away'] = player['Team']

In [220]:
runningbacks = pd.merge(runningbacks, game_info, on = ['Week', 'Year', 'Home', 'Away'])
runningbacks.drop(['Home', 'Away'], axis = 1, inplace = True)

In [224]:
runningbacks.to_csv('../data/runningbacks.csv', index = False)

#### Wide Receivers

In [145]:
receivers = pd.merge(fd_wrs, wr_vs_def, on = ['Week', 'Year', 'Team'])
receivers.drop('Opponent', axis = 1, inplace = True)

In [152]:
renamed_cols = {'Targets': 'Opp_Avg_Targets_Allowed',
                'Receptions': 'Opp_Avg_Rec_Allowed',
                'Rank': 'Opp_Rank',
                'Touchdowns': 'Opp_Avg_TDs_Allowed',
                'Yards': 'Opp_Avg_Yds_Allowed'}

receivers.rename(columns = renamed_cols, inplace = True)

In [227]:
receivers['Home'] = 'Blah'
receivers['Away'] = 'Blah'

# creating home & away columns to match with game info
for index, player in receivers.iterrows():
    if player['h/a'] == 'h':
        receivers.loc[index, 'Home'] = player['Team']
        receivers.loc[index, 'Away'] = player['Oppt']
    else:
        receivers.loc[index, 'Home'] = player['Oppt']
        receivers.loc[index, 'Away'] = player['Team']
        
receivers = pd.merge(receivers, game_info, on = ['Week', 'Year', 'Home', 'Away'])
receivers.drop(['Home', 'Away'], axis = 1, inplace = True)

In [228]:
receivers.to_csv('../data/receivers.csv', index = False)

#### Tight Ends

In [155]:
tight_ends = pd.merge(fd_tes, te_vs_def, on = ['Week', 'Year', 'Team'])
tight_ends.drop('Opponent', axis = 1, inplace = True)

In [157]:
renamed_cols = {'Targets': 'Opp_Avg_Targets_Allowed',
                'Receptions': 'Opp_Avg_Rec_Allowed',
                'Rank': 'Opp_Rank',
                'Touchdowns': 'Opp_Avg_TDs_Allowed',
                'Yards': 'Opp_Avg_Yds_Allowed'}

tight_ends.rename(columns = renamed_cols, inplace = True)

In [231]:
tight_ends['Home'] = 'Blah'
tight_ends['Away'] = 'Blah'

# creating home & away columns to match with game info
for index, player in tight_ends.iterrows():
    if player['h/a'] == 'h':
        tight_ends.loc[index, 'Home'] = player['Team']
        tight_ends.loc[index, 'Away'] = player['Oppt']
    else:
        tight_ends.loc[index, 'Home'] = player['Oppt']
        tight_ends.loc[index, 'Away'] = player['Team']
        
tight_ends = pd.merge(tight_ends, game_info, on = ['Week', 'Year', 'Home', 'Away'])
tight_ends.drop(['Home', 'Away'], axis = 1, inplace = True)

In [233]:
tight_ends.to_csv('../data/tightends.csv', index = False)