# Team Selection Notebook

* Part 1 will import projections and salaries for a single week and choose the optimal team.
* Part 2 will add "noise" to the projections to generate N different optimal rosters
* Part 3 will use a genetic algorithm to evolve rosters until an optimal population has been selected

# Import Requirements

In [160]:
import pandas as pd
from collections import Counter

from pulp import *

from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

# Read in data for 2017 Week 5

In [105]:
qb_df = pd.read_csv('./fantasydata/Season2017/QB_2017_week5.csv')
rb_df = pd.read_csv('./fantasydata/Season2017/RB_2017_week5.csv')
wr_df = pd.read_csv('./fantasydata/Season2017/WR_2017_week5.csv')
te_df = pd.read_csv('./fantasydata/Season2017/TE_2017_week5.csv')
dst_df = pd.read_csv('./fantasydata/Season2017/DST_2017_week5.csv')

print 'QB', qb_df.shape
print 'RB', rb_df.shape
print 'WR', wr_df.shape
print 'TE', te_df.shape
print 'DST', dst_df.shape

QB (74, 11)
RB (128, 11)
WR (164, 11)
TE (87, 11)
DST (28, 11)


# Vertically merge all data

In [97]:
week5_df = pd.concat([qb_df, rb_df, wr_df, te_df, dst_df], axis=0)

week5_df['Pos'] = week5_df['Pos'].str.replace('FB','RB')
print week5_df.shape
print Counter(week5_df['Pos'])
week5_df = week5_df.reset_index(drop=True)
week5_df.head()

(481, 11)
Counter({'WR': 164, 'RB': 128, 'TE': 87, 'QB': 74, 'DST': 28})


Unnamed: 0,Rk,ID,Player,Pos,Week,Team,Opp,Opp Rank,Opp Pos Rank,Salary,Projection
0,1,4314,Tom Brady,QB,5,NE,TB,29,30,8000,25.0
1,2,2593,Aaron Rodgers,QB,5,GB,DAL,26,23,8100,21.1
2,3,16762,Jameis Winston,QB,5,TB,NE,32,32,6000,20.4
3,4,2428,Carson Palmer,QB,5,ARI,PHI,21,24,5900,19.4
4,5,18055,Dak Prescott,QB,5,DAL,GB,6,6,6800,18.8


# ILP Data Wrangling

In [106]:
week5_df['PosID'] = week5_df.sort_values(['Pos','Salary'], ascending=False) \
                            .groupby(['Pos']) \
                            .cumcount() + 1
        
week5_df['PosID'] = week5_df['PosID'].apply(lambda x: '{0:0>3}'.format(x))
week5_df['PosID'] = week5_df['Pos'].map(str) + week5_df['PosID'].map(str)

display(week5_df['Pos'].value_counts())
display(week5_df.sort_values(["Pos","Salary"], ascending=[False,False]).head(5)) 

WR     164
RB     128
TE      87
QB      74
DST     28
Name: Pos, dtype: int64

Unnamed: 0,Rk,ID,Player,Pos,Week,Team,Opp,Opp Rank,Opp Pos Rank,Salary,Projection,PosID
205,4,16389,Odell Beckham Jr,WR,5,NYG,LAC,25,21,8500,18.0,WR001
202,1,11056,Antonio Brown,WR,5,PIT,JAX,4,1,8400,18.6,WR002
206,5,12845,AJ Green,WR,5,CIN,BUF,2,6,8200,16.8,WR003
204,3,4556,Jordy Nelson,WR,5,GB,DAL,26,22,8100,18.2,WR004
203,2,16597,Mike Evans,WR,5,TB,NE,32,29,7900,18.5,WR005


In [108]:
# Create variables for all players
QB_ID  = week5_df[week5_df['PosID'].str.contains('QB')]['PosID'].values.tolist()
TE_ID  = week5_df[week5_df['PosID'].str.contains('TE')]['PosID'].values.tolist()
RB_ID  = week5_df[week5_df['PosID'].str.contains('RB')]['PosID'].values.tolist()
WR_ID  = week5_df[week5_df['PosID'].str.contains('WR')]['PosID'].values.tolist()
DST_ID  = week5_df[week5_df['PosID'].str.contains('DST')]['PosID'].values.tolist()
POS_ID = QB_ID+TE_ID+RB_ID+WR_ID+DST_ID

In [125]:
x = LpVariable.dicts("%s", POS_ID, 0, 1, LpInteger)
points  = pd.Series(week5_df['Projection'].values,index=week5_df['PosID']).to_dict()
salary  = pd.Series(week5_df['Salary'].values,index=week5_df['PosID']).to_dict()

# ILP Solver

In [136]:
dk_solve = LpProblem("ILP", LpMaximize) 
 
# ****************************************************************
# Objective 
# ****************************************************************
dk_solve += sum( [points[i]*x[i] for i in sorted(POS_ID)] )

# ****************************************************************
# Constraints 
# ****************************************************************

# Salary Cap at $50k
dk_solve += sum( [salary[i]*x[i] for i in sorted(POS_ID)] ) <= 50000

# Only 1 Quaterback
dk_solve += sum([x[i] for i in sorted(QB_ID)])  == 1

# Between 1 and 2 Tight Ends
dk_solve += sum([x[i] for i in sorted(TE_ID)])  <= 2
dk_solve += sum([x[i] for i in sorted(TE_ID)])  >= 1

# Between 3 and 4 Wide Receivers
dk_solve += sum([x[i] for i in sorted(WR_ID)])  <= 4
dk_solve += sum([x[i] for i in sorted(WR_ID)])  >= 3

# Between 2 and 3 Running Backs
dk_solve += sum([x[i] for i in sorted(RB_ID)])  <= 3
dk_solve += sum([x[i] for i in sorted(RB_ID)])  >= 2

# Only 1 Defence / Special Teams
dk_solve += sum([x[i] for i in sorted(DST_ID)]) == 1

# Require 9 players
dk_solve += sum([x[i] for i in sorted(POS_ID)]) == 9

# ****************************************************************
# Solve
# ****************************************************************
LpSolverDefault.msg = 1
GLPK().solve(dk_solve)
print("Solution Status: " + LpStatus[dk_solve.status])

Solution Status: Optimal


In [156]:
# Get Selected Player IDs 
PlayID = [v.name for v in dk_solve.variables() if v.varValue==1]
roster_df = week5_df[week5_df['PosID'].isin(PlayID)]
roster_df = roster_df.reset_index(drop=True)

display(HTML("<b>\nSummary</b>"))
print("Projected Points = %0.2f"%(value(dk_solve.objective)))
print("Total Salary = $%d"%(sum(roster_df['Salary'])))

display(roster_df[['Player','Pos','Team','Salary','Projection']])


Projected Points = 134.80
Total Salary = $50000


Unnamed: 0,Player,Pos,Team,Salary,Projection
0,Tom Brady,QB,NE,8000,25.0
1,Le'Veon Bell,RB,PIT,9500,22.3
2,Lamar Miller,RB,HOU,5000,13.7
3,Jonathan Stewart,RB,CAR,3900,11.0
4,Dez Bryant,WR,DAL,6500,15.5
5,Jarvis Landry,WR,MIA,5800,15.3
6,Chris Hogan,WR,NE,6100,14.8
7,Dwayne Allen,TE,NE,2500,8.6
8,Kansas City Chiefs,DST,KC,2700,8.6


# Week 5 true values

In [159]:
# Tom Brady = 17.62
# Le'Veon Bell = 19.30
# Lamar Miller = 10.90
# Jonathan Stewart = 7.40
# Dez Bryant = 16.20
# Jarvis Landry = 15.40
# Chris Hogan = 21.40
# Dwayne Allen = 0.00
# Kansas City Chiefs = 10.00

true_score = 17.62 + 19.30 + 10.90 + 7.40 + 16.20 + 15.40 + 21.40 + 0.00 + 10.00
print true_score

118.22


# Add uncertainty to projection