In [103]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from math import floor

In [104]:
import pandas as pd
from random import sample

In [123]:
def reduceForProjectionAnalysis(file):
    file['FantasyFuelPPGProj'] = file['ppg_projection']
    file['FantasyFuelValueProj'] = file['value_projection']
    file['DFN_PPGProj'] = file['Proj FP']
    file['DFN_MinProj'] = file['Proj Min']
    file['DFN_ValueProj'] = file['Proj Val']
    file['Avg Proj'] = (file['DFN_PPGProj']+file['FantasyFuelPPGProj'])/2
    file['Avg Value Proj'] = ((file['DFN_ValueProj']+file['FantasyFuelValueProj'])/2)
    file['Avg Skewed Min'] = ((file['L2 Min']+file['L5 Min']+file['S Min'])/3)
    file['Avg Skewed FGA'] = ((file['L2 FGA']+file['L5 FGA']+file['S FGA'])/3)
    file['Avg Skewed FP'] = ((file['L5 FP']+file['S FP']+file['Ceil FP']+file['Floor FP']+file['Avg Proj'])/5)
    file['Avg L5 PPG'] = ((file['L5_ppg_floor']+file['L5_ppg_avg']+file['L5_ppg_max'])/3)
    
    file['Proj Min Enhanced'] = (file['Avg Proj'] / file['Avg Skewed Min'] ) * file['Proj Min']
    file['Proj FPP Per Min'] = file['Avg Proj'] / file['Proj Min']
    file['Proj Pts Scored'] = file['implied_team_score']*(file['USG'] /100)
    file.replace([np.inf, -np.inf], 0, inplace=True)
    
    file = file[file['injury_status'] != 'O']
    file = file[file['injury_status'] != 'Q']
    return file
    return file

In [132]:
##Helper Functions
def verifyLineup(lineup):
    enough_represented_teams = len(set(lineup['team'].tolist()))>=2
    under_salary_cap = lineup['Salary'].sum() <= 50000
    all_unique_players = len(set(lineup['Player Name'].tolist())) == 8
    if enough_represented_teams and under_salary_cap and all_unique_players:
        return True
    else:
        return False
    
def createRandomPopulation(point_guards, shooting_guards, small_forwards, power_forwards, guards, forwards, centers, util, limit):
    counter = 0
    lineups = []
    while(counter<limit):
        lineup = pd.DataFrame()
        lineup = lineup.append(point_guards.sample(n=1))
        lineup = lineup.append(shooting_guards.sample(n=1))
        lineup = lineup.append(small_forwards.sample(n=1))
        lineup = lineup.append(power_forwards.sample(n=1))
        lineup = lineup.append(guards.sample(n=1))
        lineup = lineup.append(forwards.sample(n=1))
        lineup = lineup.append(centers.sample(n=1))
        lineup = lineup.append(util.sample(n=1))
        if(verifyLineup(lineup)):
            lineups.append(lineup)
            counter = counter + 1
    return lineups


def mate(_parents):
    parents = pd.concat(sample(_parents, 10))
    point_guards = parents.loc[parents['Pos'].str.contains("PG")]
    shooting_guards = parents.loc[parents['Pos'].str.contains("SG")]
    small_forwards = parents.loc[parents['Pos'].str.contains("SF")]
    power_forwards = parents.loc[parents['Pos'].str.contains("PF")]
    guards = parents.loc[parents['Pos'].str.contains("G")]
    forwards = parents.loc[parents['Pos'].str.contains("F")]
    centers = parents.loc[parents['Pos'].str.contains("C")]
    util = parents
    
    children = []
    while(len(children) < 10):
        child_lineup = _parents[0].append(_parents[1])
        while(not verifyLineup(child_lineup)):
            child_lineup = pd.DataFrame()
            child_lineup = child_lineup.append(point_guards.sample(n=1))
            child_lineup = child_lineup.append(shooting_guards.sample(n=1))
            child_lineup = child_lineup.append(small_forwards.sample(n=1))
            child_lineup = child_lineup.append(power_forwards.sample(n=1))
            child_lineup = child_lineup.append(guards.sample(n=1))
            child_lineup = child_lineup.append(forwards.sample(n=1))
            child_lineup = child_lineup.append(centers.sample(n=1))
            child_lineup = child_lineup.append(util.sample(n=1))
        children.append(child_lineup)
    return children


def sortFitestBasedOnProjection(population, function): 
    population.sort(key=function, reverse=True)
    population = eliminateDuplicates(population)
    return population

def projectedFPPointSum(lineup):
    return lineup['Avg Proj Stan'].sum()

def projectedMinSum(lineup):
    return lineup['DFN_MinProj Stan'].sum() 

def projectedMinEnhancedSum(lineup):
    return lineup['Proj Min Enhanced Stan'].sum()

def projectedFPPperMinSum(lineup):
    return lineup['Proj FPP Per Min Stan'].sum()

def projectedPointsScoredSum(lineup):
    return lineup['Proj Pts Scored Stan'].sum()

def projectedValueSum(lineup):
    return lineup['Proj Value Proj Stan'].sum()

def projectedSelectedSum(lineup):
    return lineup['Selected Stan'].sum()

def eliminateDuplicates(population):
    unique_lineups = set()
    unique_population = []
    for lineup in population:
        lineup_set = set()
        for player in lineup['Player Name'].tolist():
            lineup_set.add(player)
            
        if(not lineup_set.issubset(unique_lineups)):  
            unique_population.append(lineup)
            unique_lineups = unique_lineups.union(lineup_set)
    return unique_population


In [133]:
def performSelection(population):
    selected_population = population[:1]
    selected_population.extend(sample(population[1:5], 3))
    selected_population.extend(sample(population[5:10], 2))
    selected_population.extend(sample(population, 10))
    return selected_population

def performCrossover(population):
    children = []
    parents = population
    children = mate(parents) 
    return children
        

def createNextGeneration(old_population):
    selected_population = performSelection(old_population)
    children = performCrossover(selected_population)
    new_generation = old_population[:len(old_population)-10]
    new_generation.extend(children)
    return new_generation
    
        

In [134]:
def standardize(df, categories):
    result = df.copy()
    for feature_name in categories:
        mean_value = df[feature_name].mean()
        std_value = df[feature_name].std()
        result[feature_name + ' Stan'] = ((df[feature_name] - mean_value) / (std_value))
    return result

In [135]:
def geneticAlgorithmForBestProjectedLineups(file):
    file = standardize(file,['Avg Proj', 'DFN_MinProj', 'Proj Min Enhanced', 'Proj FPP Per Min', 'Proj Pts Scored'])
    total_players = len(file['Player Name'].tolist())
    population_size = 30
    point_guards = file.loc[file['Pos'].str.contains("PG")]
    shooting_guards = file.loc[file['Pos'].str.contains("SG")]
    small_forwards = file.loc[file['Pos'].str.contains("SF")]
    power_forwards = file.loc[file['Pos'].str.contains("PF")]
    guards = file.loc[file['Pos'].str.contains("G")]
    forwards = file.loc[file['Pos'].str.contains("F")]
    centers = file.loc[file['Pos'].str.contains("C")]
    util = file
    runs = [projectedFPPointSum, projectedMinSum, projectedMinEnhancedSum, projectedFPPperMinSum, projectedPointsScoredSum ]
    results = []
    for _type in runs:
        population = createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, population_size) 
        counter = 0
        while(counter<100):
            currentPopulation = sortFitestBasedOnProjection(population, _type)
            population = createNextGeneration(currentPopulation)
            population.extend(createRandomPopulation(point_guards, shooting_guards, 
                                                   small_forwards, power_forwards, 
                                                   guards, forwards, centers, util, 30 - len(population)))

            print(counter)
            counter = counter+1
        print("*****************")
        results.append(sortFitestBasedOnProjection(population, _type))
    return results 



In [136]:
def transformProjectedLineups(files): 
    file_types = []
    for file_type in (files):
        file_types.append( pd.concat(file_type) )
    return pd.concat(file_types)

def addPlayerOccurences(file):
    file['Selected'] = np.nan
    values = file['Player Name'].value_counts().keys().tolist()
    counts = file['Player Name'].value_counts().tolist()
    for i in range(len(values)):
        file.loc[file['Player Name'] == values[i],['Selected']] = counts[i]
    file['Selected'].fillna(0, inplace=True)
    file.drop_duplicates(subset ="Player Name", keep = 'first', inplace = True) 
    return file

In [137]:
def sortFitestBasedOnOccurance(population): 
    population.sort(key=numOfOccurance, reverse=True)
    population = eliminateDuplicates(population)
    return population 

def numOfOccurance(lineup):
    return lineup['Selected'].sum() 

def geneticAlgorithmForBestDailyLineups(file):
    file = addPlayerOccurences(transformProjectedLineups(file))
    file = standardize(file, ['Selected'])
    population_size = 30
    point_guards = file.loc[file['Pos'].str.contains("PG")]
    shooting_guards = file.loc[file['Pos'].str.contains("SG")]
    small_forwards = file.loc[file['Pos'].str.contains("SF")]
    power_forwards = file.loc[file['Pos'].str.contains("PF")]
    guards = file.loc[file['Pos'].str.contains("G")]
    forwards = file.loc[file['Pos'].str.contains("F")]
    centers = file.loc[file['Pos'].str.contains("C")]
    util = file
    population = createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, population_size)  
    counter = 0
    while(counter<100):
        currentPopulation = sortFitestBasedOnOccurance(population)
        population = createNextGeneration(currentPopulation)
        population.extend(createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, 30 - len(population)))
        print(counter)
        counter = counter+1
    print("XXXXXXXXXXXXXXXX")
    return sortFitestBasedOnProjection(population, projectedSelectedSum)

In [138]:
dates = [
    ['2_4', '2020-02-04'],
    ['2_3', '2020-02-03'],
    ['2_2', '2020-02-02'],
    ['2_1', '2020-02-01'],
]

def resetIndicies(file):
    for lineup in file:
        lineup.reset_index(drop=True, inplace=True)
    return file

def saveBestLineups_toCSV(file, date):
    key = np.arange(len(file))
    file_name = 'BestCreatedLineupsFocused/{}'.format(date)
    new_file = pd.concat(file, keys=key, names=['Lineup Num'])
    new_file.to_csv(file_name)

def createBestLineups():
    for game_day in dates:
        file_name = 'HistoricalData_Merged/{}'.format(game_day[1])
        cleaned_file = reduceForProjectionAnalysis(pd.read_csv(file_name))
        best_projected_population = geneticAlgorithmForBestProjectedLineups(cleaned_file)
        best_daily_population = geneticAlgorithmForBestDailyLineups(best_projected_population)
        saveBestLineups_toCSV(resetIndicies(best_daily_population), game_day[1])
        

In [139]:
createBestLineups()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
*****************
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
*****************
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
*****************
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

23