In [3]:
### header ###
__author__ = "Jenhan Tao"
__license__ = "MIT"
__email__ = "jenhantao@gmail.com"

### imports ###
import sys # system functions
import os # os functions
import pandas as pd # for reading data
import numpy as np # for numeric operations
import matplotlib.pyplot as plt 
import matplotlib # for visualizing data
import scipy # scientific computing
import seaborn as sns # for pretty plots
import time
import copy

### notebook specific configuration ###
%matplotlib inline
matplotlib.rcParams['savefig.dpi'] = 200
sns.set_context('notebook')
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# it's bad practice to work within a github repo and also bad practice to have data in a github repo
# but it's convenient
# change working directory to github repo
working_directory = '/gpfs/data01/glasslab/home/jtao/side_projects/fantasy_football_genetic_algorithm_draft/'
os.chdir(working_directory)

### Read in rankings

In [17]:
# year_rankFrame_dict = {} # {year:DataFrame}
# for f in os.listdir(working_directory + '/rankings_ffc_standard'):
#     json_path = working_directory + '/rankings_ffc_standard/' + f
#     ranks = pd.read_json(json_path, typ='series')
#     adp_list = [x['adp'] for x in ranks['players']]
#     name_list = [x['name'] for x in ranks['players']]
#     position_list = [x['position'] for x in ranks['players']]
    
#     year = f.split('_')[-1].replace('.json','')
    
#     rankFrame = pd.DataFrame({'ADP':adp_list, 'Position':position_list, 'Name':name_list})
#     year_rankFrame_dict[year] = rankFrame
    


In [168]:
year_rankFrame_dict = {} # {year:DataFrame}
for f in os.listdir(working_directory + '/rankings_mfl_standard'):
    csv_path = working_directory + '/rankings_mfl_standard/' + f
    rankFrame = pd.read_csv(csv_path, sep=',')
    rankFrame['POSITION'] = [x.split(' ')[-1].replace('*','') for x in rankFrame['PLAYER'].values]
    rankFrame['NAME'] = [' '.join(x.split(', ')[1].split(' ')[:-2]) + ' ' + x.split(', ')[0].replace(',','') for x in rankFrame['PLAYER'].values]
    
    rankFrame = rankFrame[rankFrame['POSITION'].isin(['Def', 'PK', 'QB', 'RB', 'TE', 'WR'])]
    rankFrame['ADP SCORE'] = rankFrame['AVG. PICK'].max() - rankFrame['AVG. PICK'].values
    year = f.split('_')[-1].replace('.csv','')
    
    year_rankFrame_dict[year] = rankFrame
    


### Read in season results

In [262]:
year_playerScore_dict = {} # {year:DataFrame}
for f in os.listdir(working_directory + '/scoring_standard'):
    score_path = working_directory + '/scoring_standard/' + f
    scoreFrame = pd.read_csv(score_path, sep=',')
    year = f.split('_')[-1].replace('.csv','')
    current_dict = dict(zip(scoreFrame['Player'].values, scoreFrame['Points'].values))
    year_playerScore_dict[year] = current_dict
    


In [263]:
current_dict

{'A.J. Green': 180,
 'A.J. McCarron': 65,
 'Aaron Rodgers': 352,
 'Adam Vinatieri': 115,
 'Adrian Peterson': 220,
 'Ahmad Bradshaw': 28,
 'Albert Wilson': 51,
 'Alex Smith': 299,
 'Alex Tanney': 9,
 'Alfred Blue': 86,
 'Alfred Morris': 79,
 'Allen Hurns': 156,
 'Allen Robinson': 217,
 'Alshon Jeffery': 101,
 'Amari Cooper': 134,
 'Ameer Abdullah': 80,
 'Andre Ellington': 53,
 'Andre Holmes': 41,
 'Andre Johnson': 68,
 'Andre Williams': 28,
 'Andrew Franks': 74,
 'Andrew Luck': 157,
 'Andrew Quarless': 2,
 'Andy Dalton': 286,
 'Anquan Boldin': 96,
 'Anthony Fasano': 36,
 'Antonio Andrews': 81,
 'Antonio Brown': 239,
 'Antonio Gates': 88,
 'Arian Foster': 52,
 'Arizona Cardinals': 206,
 'Atlanta Falcons': 132,
 'Austin Davis': 21,
 'Austin Seferian-Jenkins': 55,
 'Baltimore Ravens': 128,
 'Ben Roethlisberger': 264,
 'Benjamin Watson': 109,
 'Benny Cunningham': 30,
 'Bilal Powell': 79,
 'Bishop Sankey': 40,
 'Blaine Gabbert': 143,
 'Blair Walsh': 147,
 'Blake Bell': 15,
 'Blake Bortles': 

### Create Players

In [195]:
position_index_dict = dict(zip(sorted(rankFrame['POSITION'].unique()), 
                               range(len(rankFrame['POSITION'].unique()))))
index_position_dict = dict(zip(range(len(rankFrame['POSITION'].unique())),
                                sorted(rankFrame['POSITION'].unique()), 
                               ))

In [196]:
position_index_dict # gives the position and the corresponding index

{'Def': 0, 'PK': 1, 'QB': 2, 'RB': 3, 'TE': 4, 'WR': 5}

In [197]:
index_position_dict # gives the index and the corresponding index

{0: 'Def', 1: 'PK', 2: 'QB', 3: 'RB', 4: 'TE', 5: 'WR'}

In [128]:
def create_new_players(num_players, num_rounds):
    '''
    Randomly create simulated fantasy players
    inputs: number of players and rounds
    outputs: list of fantasy players
    '''
    num_positions = 6 # number of positions
    player_list = []
    for player_number in range(num_players):
        new_player = []
        for round_number in range(num_rounds):
            weights = np.random.random(num_positions) # initialize random weights
            normed_weights = weights/np.sum(weights) # normalize so weights sum to 1
            new_player.append(normed_weights)
    
        player_list.append((new_player))
    return np.array(player_list)

In [151]:
len(rankFrame['POSITION'].unique()) # number of positions

9

In [40]:
fantasy_players = create_new_players(12, 10)

In [41]:
fantasy_players.shape # first index gives the player
                      # second index gives the round
                      # third index gives the position 


(12, 10, 6)

In [None]:
fantasy_players[0,0,0] #gives the importance for player 1 in the first round for a defense 
fantasy_players[0,0,1] #gives the importance for player 1 in the first round for a place kicker (PK)
fantasy_players[8,3,2] #gives the importance for player 9 in the fourth round for a QB

In [53]:
fantasy_players.shape

(12, 10, 6)

### Run Simulation

In [264]:
rankFrame = year_rankFrame_dict['2017']

playerScoreDict = year_playerScore_dict['2017']

In [180]:
rankFrame.shape

(340, 10)

In [265]:
len(playerScoreDict)

461

In [273]:
score_players = set (playerScoreDict.keys())

In [274]:
rank_players = set(rankFrame['NAME'].values)

In [276]:
len(score_players - rank_players)

186

In [277]:
len(rank_players-score_players)

65

In [278]:
rankFrame.head()

Unnamed: 0,#,PICK,PLAYER,AVG. PICK,MIN. PICK,MAX. PICK,# DRAFTS SELECTED IN,POSITION,NAME,ADP SCORE
0,1,1.01,"Johnson, David ARI RB*",2.82,1,154,778,RB,David Johnson,254.55
1,2,1.02,"Bell, Le'Veon PIT RB",2.83,1,110,802,RB,Le'Veon Bell,254.54
2,3,1.03,"Brown, Antonio PIT WR",4.78,1,111,802,WR,Antonio Brown,252.59
3,4,1.04,"Jones, Julio ATL WR",7.82,1,112,801,WR,Julio Jones,249.55
4,5,1.05,"Beckham, Odell NYG WR*",8.72,1,120,778,WR,Odell Beckham,248.65


In [279]:
playerScoreDict

{'A.J. Derby': 32,
 'A.J. Green': 147,
 'A.J. McCarron': 1,
 'Aaron Jones': 66,
 'Aaron Rodgers': 157,
 'Adam Humphries': 62,
 'Adam Shaheen': 29,
 'Adam Thielen': 143,
 'Adam Vinatieri': 119,
 'Adrian Peterson': 62,
 'Alan Cross': 4,
 'Albert Wilson': 67,
 'Aldrick Rosas': 80,
 'Alex Collins': 142,
 'Alex Smith': 334,
 'Alfred Blue': 33,
 'Alfred Morris': 60,
 'Allen Hurns': 58,
 'Alshon Jeffery': 132,
 'Alvin Kamara': 221,
 'Amari Cooper': 102,
 'Ameer Abdullah': 89,
 'Andre Ellington': 42,
 'Andy Dalton': 252,
 'Anthony Fasano': 13,
 'Anthony Sherman': 14,
 'Antonio Brown': 204,
 'Antonio Gates': 45,
 'Antony Auclair': 2,
 'Arizona Cardinals': 133,
 'Atlanta Falcons': 131,
 'Austin Davis': 0,
 'Austin Ekeler': 70,
 'Austin Hooper': 62,
 'Austin Seferian-Jenkins': 47,
 'Austin Traylor': 8,
 'Baltimore Ravens': 223,
 'Ben Koyack': 5,
 'Ben Roethlisberger': 310,
 'Benjamin Watson': 71,
 'Bennie Fowler': 48,
 'Benny Cunningham': 33,
 'Bilal Powell': 113,
 'Blaine Gabbert': 71,
 'Blair W

In [280]:
fantasy_players.shape

(12, 10, 6)

In [478]:
def simulate_draft(rankFrame, fantasy_players, num_rounds):
    '''
    Simulates a draft and returns a list of rosters for each fantasy player
    inputs: rankFrame - DataFrame containing ranks of players
            fantasy_players - list of fantasy players with different strategies
            num_rounds - number of rounds in the draft
    output: team_rosters - roster selected by each fantasy player
    '''
    team_rosters = [[] for x in range(fantasy_players.shape[0])]
        
    drafted_players = set()
    flip = False
    for draft_round in range(num_rounds):
        for player_index in range(fantasy_players.shape[0]):
            if flip:
                player = fantasy_players.shape[0] - player_index
            else:
                player = player_index
            
            print(draft_round, player)
            # retrieve strategy
            round_strategy = fantasy_players[player,draft_round, :]
            player_score_tuples = []
            # calculate player ranks
            for index, row in rankFrame[~rankFrame['NAME'].isin(drafted_players)].iterrows(): 
                position = row['POSITION']
                position_weight = round_strategy[position_index_dict[position]]
                weighted_player_score = position_weight * float(row['ADP SCORE'])
                player_score_tuples.append((row['NAME'], weighted_player_score, position))
            
            # draft player
            player_score_tuples.sort(key = lambda x: x[1], reverse=True)
            drafted_player = (player_score_tuples[0][0], player_score_tuples[0][2])
            
            team_rosters[player].append(drafted_player)
            drafted_players.add(drafted_player[0])
        flip=True
    return team_rosters

In [479]:
start = time.time()
rosters = simulate_draft(rankFrame, fantasy_players, 10)
print('time to draft', time.time() - start)

0 0
0 11
0 10
0 9
0 8
0 7
0 6
0 5
0 4
0 3
0 2
0 1
1 0
1 11
1 10
1 9
1 8
1 7
1 6
1 5
1 4
1 3
1 2
1 1
2 0
2 11
2 10
2 9
2 8
2 7
2 6
2 5
2 4
2 3
2 2
2 1
3 0
3 11
3 10
3 9
3 8
3 7
3 6
3 5
3 4
3 3
3 2
3 1
4 0
4 11
4 10
4 9
4 8
4 7
4 6
4 5
4 4
4 3
4 2
4 1
5 0
5 11
5 10
5 9
5 8
5 7
5 6
5 5
5 4
5 3
5 2
5 1
6 0
6 11
6 10
6 9
6 8
6 7
6 6
6 5
6 4
6 3
6 2
6 1
7 0
7 11
7 10
7 9
7 8
7 7
7 6
7 5
7 4
7 3
7 2
7 1
8 0
8 11
8 10
8 9
8 8
8 7
8 6
8 5
8 4
8 3
8 2
8 1
9 0
9 11
9 10
9 9
9 8
9 7
9 6
9 5
9 4
9 3
9 2
9 1
time to draft 3.3382275104522705


In [338]:
#  check rosters don't overlap
for i in range(len(rosters)-1):
    for j in range(i+1, len(rosters)):
        team1 = rosters[i]
        team2 = rosters[j]
        print(i, j, len(set(team1).intersection(set(team2))))

0 1 0
0 2 0
0 3 0
0 4 0
0 5 0
0 6 0
0 7 0
0 8 0
0 9 0
0 10 0
0 11 0
1 2 0
1 3 0
1 4 0
1 5 0
1 6 0
1 7 0
1 8 0
1 9 0
1 10 0
1 11 0
2 3 0
2 4 0
2 5 0
2 6 0
2 7 0
2 8 0
2 9 0
2 10 0
2 11 0
3 4 0
3 5 0
3 6 0
3 7 0
3 8 0
3 9 0
3 10 0
3 11 0
4 5 0
4 6 0
4 7 0
4 8 0
4 9 0
4 10 0
4 11 0
5 6 0
5 7 0
5 8 0
5 9 0
5 10 0
5 11 0
6 7 0
6 8 0
6 9 0
6 10 0
6 11 0
7 8 0
7 9 0
7 10 0
7 11 0
8 9 0
8 10 0
8 11 0
9 10 0
9 11 0
10 11 0


In [339]:
scoreFrame.head()

Unnamed: 0,Player,Points
0,Stephen Gostkowski,159
1,Graham Gano,150
2,Blair Walsh,147
3,Robbie Gould,141
4,Josh Brown,140


In [283]:
playerScoreDict

{'A.J. Derby': 32,
 'A.J. Green': 147,
 'A.J. McCarron': 1,
 'Aaron Jones': 66,
 'Aaron Rodgers': 157,
 'Adam Humphries': 62,
 'Adam Shaheen': 29,
 'Adam Thielen': 143,
 'Adam Vinatieri': 119,
 'Adrian Peterson': 62,
 'Alan Cross': 4,
 'Albert Wilson': 67,
 'Aldrick Rosas': 80,
 'Alex Collins': 142,
 'Alex Smith': 334,
 'Alfred Blue': 33,
 'Alfred Morris': 60,
 'Allen Hurns': 58,
 'Alshon Jeffery': 132,
 'Alvin Kamara': 221,
 'Amari Cooper': 102,
 'Ameer Abdullah': 89,
 'Andre Ellington': 42,
 'Andy Dalton': 252,
 'Anthony Fasano': 13,
 'Anthony Sherman': 14,
 'Antonio Brown': 204,
 'Antonio Gates': 45,
 'Antony Auclair': 2,
 'Arizona Cardinals': 133,
 'Atlanta Falcons': 131,
 'Austin Davis': 0,
 'Austin Ekeler': 70,
 'Austin Hooper': 62,
 'Austin Seferian-Jenkins': 47,
 'Austin Traylor': 8,
 'Baltimore Ravens': 223,
 'Ben Koyack': 5,
 'Ben Roethlisberger': 310,
 'Benjamin Watson': 71,
 'Bennie Fowler': 48,
 'Benny Cunningham': 33,
 'Bilal Powell': 113,
 'Blaine Gabbert': 71,
 'Blair W

In [371]:
def score_roster(playerScoreDict, fantasy_rosters):
    '''
    returns a list of performances for each fantasy player's team and lineups for each team
    '''
    fantasy_team_performances = []
    fantasy_lineups = []
    for roster in fantasy_rosters:  
        lineup = []
        roster_with_scores = [(x[0], x[1], playerScoreDict[x[0]]) if x[0] in playerScoreDict else (x[0], x[1], 0) for x in roster]
        roster_with_scores.sort(key = lambda x:x[2], reverse=True)
        
        # fill regular roster
        for pos in ['QB', 'WR', 'WR', 'RB', 'RB', 'PK', 'DEF']:
            selected_player = ('No Selection', pos, 0)
            player_selected = False
            for player in roster_with_scores:
                if player[1] == pos:
                    selected_player = player
                    player_selected = True
                    break
            lineup.append(selected_player)
            if player_selected:
                roster_with_scores.remove(selected_player)
                
        # fill flex
        selected_player = ('No Selection', pos, 0)
        for player in roster_with_scores:
            if player[1] in ['WR', 'RB', 'TE']:
                selected_player = player
                player_selected = True
                break
        lineup.append(selected_player)
        
        performance = np.sum(x[2] for x in lineup)
        
        fantasy_lineups.append(lineup)
        fantasy_team_performances.append(performance)
        
    return fantasy_team_performances, fantasy_lineups
   


In [364]:
performances, lineups= score_roster(playerScoreDict, copy.deepcopy(rosters))

In [365]:
performances

[1180, 1027, 885, 1042, 891, 735, 872, 1111, 558, 806, 897, 1050]

In [367]:
lineups

[[('Ben Roethlisberger', 'QB', 310),
  ('Michael Thomas', 'WR', 147),
  ('Alshon Jeffery', 'WR', 132),
  ('Melvin Gordon', 'RB', 215),
  ('Christian McCaffrey', 'RB', 134),
  ('Justin Tucker', 'PK', 151),
  ('No Selection', 'DEF', 0),
  ('LeGarrette Blount', 'RB', 91)],
 [('Cam Newton', 'QB', 331),
  ('Antonio Brown', 'WR', 204),
  ('Tyreek Hill', 'WR', 153),
  ('LeSean McCoy', 'RB', 194),
  ('No Selection', 'RB', 0),
  ('No Selection', 'PK', 0),
  ('No Selection', 'DEF', 0),
  ('Jarvis Landry', 'WR', 145)],
 [('Andy Dalton', 'QB', 252),
  ('Julio Jones', 'WR', 156),
  ('No Selection', 'WR', 0),
  ('Leonard Fournette', 'RB', 183),
  ('Carlos Hyde', 'RB', 161),
  ('No Selection', 'PK', 0),
  ('No Selection', 'DEF', 0),
  ('C.J. Anderson', 'RB', 133)],
 [('Dak Prescott', 'QB', 295),
  ('Keenan Allen', 'WR', 175),
  ('Larry Fitzgerald', 'WR', 146),
  ('Tevin Coleman', 'RB', 130),
  ('Jay Ajayi', 'RB', 102),
  ('Stephen Gostkowski', 'PK', 164),
  ('No Selection', 'DEF', 0),
  ('Jordan Reed

In [468]:
def update_players(fantasy_players, fantasy_team_performances, num_players, mutation_rate=1.0, num_survivors = 4):
    '''
    Given a list of fantasy players with different strategies and the performance of each strategy,
    creates a new generation of fantasy players
    '''
    num_positions = 6 # number of positions
    
    performance_tuple_list = list (zip(fantasy_team_performances, fantasy_players))
    performance_tuple_list.sort(key = lambda x:x[0], reverse=True)
    
    playoff_players = np.array([x[1] for x in performance_tuple_list[:num_survivors]]) # get top players
    
    new_players = []
    # recombination
    for player in range(num_players):
        new_player = []
        parent1 = np.random.randint(playoff_players.shape[0])
        parent2 = np.random.randint(playoff_players.shape[0])
        for draft_round in range(fantasy_players.shape[1]):
            mean_chromosome = (playoff_players[parent1,draft_round,:] + playoff_players[parent2,draft_round,:])/2
            
            weights = mutation_rate * np.random.random(num_positions) # initialize random weights
            mutated_chromosome = mean_chromosome +  weights
            normed_chromosome = mutated_chromosome/np.sum(mutated_chromosome) # normalize so weights sum to 1
            
            new_player.append(normed_chromosome)

        new_players.append(new_player)
    
    return np.array(new_players)
    

# Run Basic Simulation

In [474]:
num_simulations = 20
draft_rounds = 15
num_players = 12

rankFrame = year_rankFrame_dict['2017']
playerScoreDict = year_playerScore_dict['2017']
iteration_players = create_new_players(num_players, draft_rounds)
for iteration in range(num_simulations):
    start = time.time()
    iteration_rosters = simulate_draft(rankFrame, iteration_players, num_rounds=draft_rounds)
    iteration_performances, iteration_lineups = score_roster(playerScoreDict, copy.deepcopy(iteration_rosters))
    
    iteration_players = update_players(iteration_players, 
                                       iteration_performances, 
                                       num_players, 
                                       mutation_rate=0.5, 
                                       num_survivors = 4)
    
    print(iteration, 
          np.round(time.time() - start, 2),
          sorted(list(zip(iteration_performances, range(len(iteration_performances)))), key = lambda x:x[0]), 
          np.mean(iteration_performances), 
          np.max(iteration_performances) - np.min(iteration_performances))

0 4.38 [(799, 8), (923, 2), (962, 9), (996, 6), (1040, 7), (1045, 11), (1049, 4), (1076, 10), (1126, 1), (1232, 5), (1298, 3), (1339, 0)] 1073.75 540
1 4.33 [(691, 3), (878, 1), (939, 5), (950, 8), (986, 0), (1021, 11), (1028, 9), (1052, 4), (1059, 6), (1141, 2), (1175, 10), (1240, 7)] 1013.3333333333334 549
2 4.41 [(901, 4), (906, 10), (918, 0), (1007, 11), (1027, 3), (1033, 2), (1064, 1), (1070, 9), (1074, 8), (1099, 5), (1122, 6), (1175, 7)] 1033.0 274
3 4.28 [(811, 1), (865, 11), (914, 8), (958, 2), (972, 10), (1018, 5), (1030, 9), (1040, 7), (1057, 3), (1069, 4), (1171, 0), (1231, 6)] 1011.3333333333334 420
4 4.3 [(839, 2), (883, 4), (893, 5), (1001, 8), (1020, 10), (1068, 6), (1076, 9), (1092, 1), (1111, 11), (1133, 7), (1141, 3), (1257, 0)] 1042.8333333333333 418
5 4.3 [(671, 1), (879, 2), (942, 11), (967, 3), (993, 5), (1046, 8), (1055, 0), (1112, 7), (1141, 4), (1196, 9), (1257, 6), (1297, 10)] 1046.3333333333333 626
6 4.35 [(569, 5), (633, 10), (871, 7), (954, 9), (954, 11), 

In [470]:
iteration_players.shape

(12, 15, 6)

In [471]:
position_index_dict

{'Def': 0, 'PK': 1, 'QB': 2, 'RB': 3, 'TE': 4, 'WR': 5}

In [472]:
iteration_players.argmax(axis=2)

array([[2, 3, 3, 4, 3, 5, 0, 4, 4, 1, 3, 1, 0, 5, 1],
       [4, 3, 2, 5, 0, 3, 1, 2, 5, 4, 0, 1, 0, 5, 5],
       [4, 4, 4, 2, 4, 4, 2, 0, 5, 1, 2, 5, 2, 0, 1],
       [5, 4, 1, 2, 1, 2, 4, 5, 0, 4, 3, 1, 3, 2, 4],
       [5, 1, 2, 0, 1, 2, 5, 2, 2, 2, 2, 5, 0, 4, 4],
       [3, 3, 1, 0, 5, 2, 1, 5, 3, 2, 5, 0, 2, 0, 1],
       [5, 5, 3, 1, 1, 2, 0, 1, 3, 4, 2, 1, 1, 5, 4],
       [5, 2, 2, 4, 1, 0, 4, 0, 5, 5, 1, 5, 2, 0, 3],
       [5, 0, 5, 2, 1, 2, 4, 4, 5, 4, 3, 5, 4, 2, 5],
       [5, 5, 0, 0, 3, 1, 2, 1, 2, 2, 5, 4, 5, 2, 3],
       [3, 5, 3, 0, 1, 5, 2, 1, 4, 2, 2, 1, 3, 0, 3],
       [5, 5, 3, 4, 1, 5, 0, 5, 4, 4, 0, 5, 0, 3, 5]])

In [None]:
k