In [139]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from math import floor

In [140]:
import pandas as pd
from random import sample

In [141]:
drop_attribs = [
        "game_date",
        'ppg_projection', 
        'value_projection', 
        'Proj FP', 
        'Proj Min', 
        'Proj Val',
        "position",
        "salary", 
        'DFN_PPGProj',
        'FantasyFuelPPGProj',
        'DFN_ValueProj',
        'FantasyFuelValueProj',
        'L2 Min',
        'L5 Min',
        'S Min',
        
        
]


## other drop attribs
"opp",
"spread",
"over_under",
"implied_team_score",
"L5_dvp_rank",
"L5_ppg_floor",
"L5_ppg_avg",
"L5_ppg_max",
"Rest",
"Opp Pace",
"Opp DEff",
"Opp DvP",
"L2 FGA",
"L5 FGA",
"S FGA",
"L2 Min",
"L5 Min",
"S Min",
"L5 FP",
"S FP",
"Floor FP",
"Ceil FP",


In [142]:
def reduceForProjectionAnalysis(file):
    file['FantasyFuelPPGProj'] = file['ppg_projection']
    file['FantasyFuelValueProj'] = file['value_projection']
    file['DFN_PPGProj'] = file['Proj FP']
    file['DFN_MinProj'] = file['Proj Min']
    file['DFN_ValueProj'] = file['Proj Val']
    file['Avg Proj'] = (file['DFN_PPGProj']+file['FantasyFuelPPGProj'])/2
    file['Avg Value Proj'] = ((file['DFN_ValueProj']+file['FantasyFuelValueProj'])/2)
    file['Avg Skewed Min'] = ((file['L2 Min']+file['L5 Min']+file['S Min'])/3)
    file['Avg Skewed FGA'] = ((file['L2 FGA']+file['L5 FGA']+file['S FGA'])/3)
    file['Avg Skewed FP'] = ((file['L5 FP']+file['S FP']+file['Ceil FP']+file['Floor FP']+file['Avg Proj'])/5)
    file['Avg L5 PPG'] = ((file['L5_ppg_floor']+file['L5_ppg_avg']+file['L5_ppg_max'])/3)
    
    file['Proj Min Enhanced'] = (file['Avg Proj'] / file['Avg Skewed Min'] ) * file['Proj Min']
    
    file.drop(drop_attribs, axis=1, inplace=True)
    file = file[file['injury_status'] != 'O']
    file = file[file['injury_status'] != 'Q']
    return file

point_guards = point_guards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    shooting_guards = shooting_guards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    small_forwards = small_forwards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    power_forwards = power_forwards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    guards = guards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    forwards = forwards.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')
    centers = centers.sample(frac=0.8, replace=True, random_state=1, weights='Avg Value Proj')

In [143]:
##Helper Functions
def verifyLineup(lineup):
    enough_represented_teams = len(set(lineup['team'].tolist()))>=2
    under_salary_cap = lineup['Salary'].sum() <= 50000
    all_unique_players = len(set(lineup['Player Name'].tolist())) == 8
    if enough_represented_teams and under_salary_cap and all_unique_players:
        return True
    else:
        return False
    
def createRandomPopulation(point_guards, shooting_guards, small_forwards, power_forwards, guards, forwards, centers, util, limit):
    counter = 0
    lineups = []
    while(counter<limit):
        lineup = pd.DataFrame()
        lineup = lineup.append(point_guards.sample(n=1))
        lineup = lineup.append(shooting_guards.sample(n=1))
        lineup = lineup.append(small_forwards.sample(n=1))
        lineup = lineup.append(power_forwards.sample(n=1))
        lineup = lineup.append(guards.sample(n=1))
        lineup = lineup.append(forwards.sample(n=1))
        lineup = lineup.append(centers.sample(n=1))
        lineup = lineup.append(util.sample(n=1))
        if(verifyLineup(lineup)):
            lineups.append(lineup)
            counter = counter + 1
    return lineups


def mate(_parents):
    parents = pd.concat(sample(_parents, 10))
    point_guards = parents.loc[parents['Pos'].str.contains("PG")]
    shooting_guards = parents.loc[parents['Pos'].str.contains("SG")]
    small_forwards = parents.loc[parents['Pos'].str.contains("SF")]
    power_forwards = parents.loc[parents['Pos'].str.contains("PF")]
    guards = parents.loc[parents['Pos'].str.contains("G")]
    forwards = parents.loc[parents['Pos'].str.contains("F")]
    centers = parents.loc[parents['Pos'].str.contains("C")]
    util = parents
    
    children = []
    while(len(children) < 10):
        child_lineup = _parents[0].append(_parents[1])
        while(not verifyLineup(child_lineup)):
            child_lineup = pd.DataFrame()
            child_lineup = child_lineup.append(point_guards.sample(n=1))
            child_lineup = child_lineup.append(shooting_guards.sample(n=1))
            child_lineup = child_lineup.append(small_forwards.sample(n=1))
            child_lineup = child_lineup.append(power_forwards.sample(n=1))
            child_lineup = child_lineup.append(guards.sample(n=1))
            child_lineup = child_lineup.append(forwards.sample(n=1))
            child_lineup = child_lineup.append(centers.sample(n=1))
            child_lineup = child_lineup.append(util.sample(n=1))
        children.append(child_lineup)
    return children

def sortFitestBasedOnProjection(population): 
    population.sort(key=projectedPointSum, reverse=True)
    population = eliminateDuplicates(population)
    return population 

def projectedPointSum(lineup):
    return lineup['Avg Proj Stan'].sum() + lineup['DFN_MinProj Stan'].sum() + lineup['Proj Min Enhanced Stan'].sum() + lineup['Avg Skewed FP'].sum()

def eliminateDuplicates(population):
    unique_lineups = set()
    unique_population = []
    for lineup in population:
        lineup_set = set()
        for player in lineup['Player Name'].tolist():
            lineup_set.add(player)
            
        if(not lineup_set.issubset(unique_lineups)):  
            unique_population.append(lineup)
            unique_lineups = unique_lineups.union(lineup_set)
    return unique_population


In [144]:
def performSelection(population):
    selected_population = population[:1]
    selected_population.extend(sample(population[1:5], 3))
    selected_population.extend(sample(population[5:10], 2))
    selected_population.extend(sample(population, 10))
    return selected_population


def performCrossover(population):
    children = []
    parents = population
    children = mate(parents) 
    return children
        

def createNextGeneration(old_population):
    selected_population = performSelection(old_population)
    children = performCrossover(selected_population)
    new_generation = old_population[:len(old_population)-10]
    new_generation.extend(children)
    return new_generation
    
        

def findConvergence(population, total_players):
    unique_players = pd.concat(population)
    unique_players.drop_duplicates(subset ="Player Name", keep = 'first', inplace = True) 
    percentage_of_unique_players = len(unique_players['Player Name'].value_counts().keys().tolist()) / total_players
    projection_value = unique_players['Avg Proj'].mean()
    val_value = unique_players['Avg Value Proj'].mean()
    minutes_value = unique_players['DFN_MinProj'].mean()
    drop_level = unique_players['Proj FP'].mean() - unique['Proj FP'].std()
    projection_value = unique_players['Avg Proj'].mean()
    file[file.apply(lambda  x: x['Actual FP'] > drop_level, axis=1)]
    print("Unique Player Percentage: ", percentage_of_unique_players )
    print("Proj Val: ", projection_value )
    print("Val: ", val_value )
    print("Min Val: ", minutes_value )
    print("XXX")
    unique_player_threshold = percentage_of_unique_players < .55
    projection_threshold = 24 < projection_value < 32
    value_threshold = 4.4 < val_value < 5.4
    minutes_threshold = 25 < minutes_value < 31
    return  unique_player_threshold and projection_threshold and value_threshold and minutes_threshold
    

In [149]:
def findConvergence(population, total_players):
    represented_players = pd.concat(population)
    percentage_of_unique_players = len(set(represented_players['Player Name'].value_counts().keys().tolist())) / total_players
    projection_value = represented_players['Avg Proj'].mean()
    val_value = represented_players['Avg Value Proj'].mean()
    minutes_value = represented_players['DFN_MinProj'].mean()
    projection_value = represented_players['Avg Proj'].mean()
    
    print("Unique Player Percentage: ", percentage_of_unique_players )
    print("Proj Val: ", projection_value )
    print("Val: ", val_value )
    print("Min Val: ", minutes_value )
    print("XXX")
    unique_player_threshold = percentage_of_unique_players < .53
    projection_threshold = 28.6 < projection_value < 34
    value_threshold = 4.6 < val_value < 5.6
    minutes_threshold = 27 < minutes_value < 32
    return  unique_player_threshold and projection_threshold and value_threshold and minutes_threshold

In [150]:
def standardize(df):
    result = df.copy()
    categories = ['Avg Proj', 'DFN_MinProj', 'Proj Min Enhanced', 'Avg Skewed FP']
    for feature_name in categories:
        mean_value = df[feature_name].mean()
        std_value = df[feature_name].std()
        result[feature_name + ' Stan'] = ((df[feature_name] - mean_value) / (std_value))
    return result

def geneticAlgorithmForHistorialBestLineups(file):
    #file = normalize(file)
    file = standardize(file)
    total_players = len(file['Player Name'].tolist())
    population_size = 30
    point_guards = file.loc[file['Pos'].str.contains("PG")]
    shooting_guards = file.loc[file['Pos'].str.contains("SG")]
    small_forwards = file.loc[file['Pos'].str.contains("SF")]
    power_forwards = file.loc[file['Pos'].str.contains("PF")]
    guards = file.loc[file['Pos'].str.contains("G")]
    forwards = file.loc[file['Pos'].str.contains("F")]
    centers = file.loc[file['Pos'].str.contains("C")]
    util = file
    population = createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, population_size)  
   
    converged = False
    while(not converged):
        currentPopulation = sortFitestBasedOnProjection(population)
        converged = findConvergence(population, total_players)
        if(not converged):
            population = createNextGeneration(currentPopulation)
            population.extend(createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, 30 - len(population)))
       
    print("*****************")
    return sortFitestBasedOnProjection(population)

In [151]:
dates_all = [
    ['2_4', '2020-02-04'],
    ['2_3', '2020-02-03'],
    ['2_2', '2020-02-02'],
    ['2_1', '2020-02-01'],
    ['1_31', '2020-01-31'],
    ['1_30', '2020-01-30'],
    ['1_29', '2020-01-29'],
    ['1_28', '2020-01-28'],
    ['1_27', '2020-01-27'],
    ['1_26', '2020-01-26'],
    ['1_25', '2020-01-25'],
    ['1_24', '2020-01-24'],
    ['1_23', '2020-01-23'],
    ['1_22', '2020-01-22'],
    ['1_20', '2020-01-20'],
    ['1_19', '2020-01-19'],
    ['1_18', '2020-01-18'],
    ['1_17', '2020-01-17'],
    ['1_16', '2020-01-16'],
    ['1_15', '2020-01-15'],
    ['1_14', '2020-01-14'],
    ['1_13', '2020-01-13'],
    ['1_12', '2020-01-12'],
    ['1_11', '2020-01-11'],
    ['1_10', '2020-01-10'],
    ['1_9', '2020-01-09'],
    ['1_8', '2020-01-08'],
    ['1_7', '2020-01-07'],
    ['1_6', '2020-01-06'],
    ['1_5', '2020-01-05'],
    ['1_4', '2020-01-04'],
    ['1_3', '2020-01-03'],
    ['1_2', '2020-01-02'],
    ['1_1', '2020-01-01'],
    ['12_31', '2019-12-31'],
    ['12_30', '2019-12-30'],
    ['12_29', '2019-12-29'],
    ['12_28', '2019-12-28'],
    ['12_27', '2019-12-27'],
    ['12_26', '2019-12-26'],
    ['12_25', '2019-12-25'],
    ['12_23', '2019-12-23'],
    ['12_22', '2019-12-22'],
    ['12_21', '2019-12-21'],
    ['12_20', '2019-12-20'],
    ['12_19', '2019-12-19'],
    ['12_18', '2019-12-18'],
    ['12_17', '2019-12-17'],
    ['12_16', '2019-12-16'],
    ['12_15', '2019-12-15'],
    ['12_14', '2019-12-14'],
    ['12_13', '2019-12-13'],
    ['12_12', '2019-12-12'],
    ['12_11', '2019-12-11'],
    ['12_10', '2019-12-10'],
    ['12_9', '2019-12-09'],
    ['12_8', '2019-12-08'],
    ['12_7', '2019-12-07'],
    ['12_6', '2019-12-06'],
    ['12_5', '2019-12-05'],
    ['12_4', '2019-12-04'],
    ['12_3', '2019-12-03'],
    ['12_2', '2019-12-02'],
    ['12_1', '2019-12-01'],
    ['11_30', '2019-11-30'],
    ['11_29', '2019-11-29'],
    ['11_27', '2019-11-27'],
    ['11_26', '2019-11-26'],
    ['11_25', '2019-11-25'],
    ['11_24', '2019-11-24'],
    ['11_23', '2019-11-23'],
    ['11_22', '2019-11-22'],
    ['11_21', '2019-11-21'],
    ['11_20', '2019-11-20'],
    ['11_19', '2019-11-19'],
    ['11_18', '2019-11-18'],
    ['11_17', '2019-11-17'],
    ['11_16', '2019-11-16'],
    ['11_15', '2019-11-15'],
    ['11_14', '2019-11-14'],
    ['11_13', '2019-11-13'],
    ['11_12', '2019-11-12'],
    ['11_11', '2019-11-11'],
    ['11_10', '2019-11-10'],
    ['11_9', '2019-11-09'],
    ['11_8', '2019-11-08'],
    ['11_7', '2019-11-07'],
    ['11_6', '2019-11-06'],
    ['11_5', '2019-11-05'],
    ['11_4', '2019-11-04'],
    ['11_3', '2019-11-03'],
    ['11_2', '2019-11-02'],
    ['11_1', '2019-11-01'],
    ['10_31', '2019-10-31'],
    ['10_30', '2019-10-30'],
    ['10_29', '2019-10-29'],
    ['10_28', '2019-10-28'],
    ['10_27', '2019-10-27'],
    ['10_26', '2019-10-26'],
    ['10_25', '2019-10-25'],
    ['10_24', '2019-10-24'],
    ['10_23', '2019-10-23'],
    ['10_22', '2019-10-22'],   
]

dates = [
    ['2_4', '2020-02-04'],
    #['2_3', '2020-02-03'],
    #['2_2', '2020-02-02'],
    #['2_1', '2020-02-01'],
    #['1_31', '2020-01-31'],
    #['1_30', '2020-01-30'],
    #['1_29', '2020-01-29'],
    #['1_28', '2020-01-28'],
    #['1_27', '2020-01-27'],
    #['1_26', '2020-01-26'],
    
   
]

def resetIndicies(file):
    for lineup in file:
        lineup.reset_index(drop=True, inplace=True)
    return file

def saveBestLineups_toCSV(file, date):
    key = np.arange(len(file))
    file_name = 'BestCreatedLineups/{}'.format(date)
    new_file = pd.concat(file, keys=key, names=['Lineup Num'])
    new_file.to_csv(file_name)

def createBestLineups():
    for game_day in dates:
        file_name = 'HistoricalData_Merged/{}'.format(game_day[1])
        cleaned_file = reduceForProjectionAnalysis(pd.read_csv(file_name))
        best_population = geneticAlgorithmForHistorialBestLineups(cleaned_file)
        saveBestLineups_toCSV(resetIndicies(best_population), game_day[1])
        

In [152]:
createBestLineups()

Unique Player Percentage:  0.9324324324324325
Proj Val:  23.9725
Val:  4.510020833333334
Min Val:  23.695833333333333
XXX
Unique Player Percentage:  0.9459459459459459
Proj Val:  24.074166666666663
Val:  4.4915
Min Val:  23.677083333333332
XXX
Unique Player Percentage:  0.8783783783783784
Proj Val:  25.846041666666665
Val:  4.6494375
Min Val:  25.104166666666668
XXX
Unique Player Percentage:  0.8378378378378378
Proj Val:  25.84375
Val:  4.652958333333333
Min Val:  25.370833333333334
XXX
Unique Player Percentage:  0.8243243243243243
Proj Val:  25.484375
Val:  4.663395833333333
Min Val:  25.064583333333335
XXX
Unique Player Percentage:  0.8513513513513513
Proj Val:  27.883750000000003
Val:  4.860125
Min Val:  27.404166666666665
XXX
Unique Player Percentage:  0.7702702702702703
Proj Val:  27.827083333333334
Val:  4.813416666666667
Min Val:  27.027083333333334
XXX
Unique Player Percentage:  0.7702702702702703
Proj Val:  28.368749999999995
Val:  4.897687500000001
Min Val:  27.85208333333333

Unique Player Percentage:  0.8243243243243243
Proj Val:  29.368541666666662
Val:  5.123645833333334
Min Val:  28.502083333333335
XXX
Unique Player Percentage:  0.6486486486486487
Proj Val:  30.63583333333333
Val:  5.2865416666666665
Min Val:  29.764583333333334
XXX
Unique Player Percentage:  0.7972972972972973
Proj Val:  28.69416666666666
Val:  5.071208333333334
Min Val:  27.875
XXX
Unique Player Percentage:  0.7432432432432432
Proj Val:  30.014791666666664
Val:  5.127458333333334
Min Val:  28.560416666666665
XXX
Unique Player Percentage:  0.7297297297297297
Proj Val:  29.572708333333328
Val:  5.054854166666667
Min Val:  28.383333333333333
XXX
Unique Player Percentage:  0.7702702702702703
Proj Val:  29.28625
Val:  5.026041666666667
Min Val:  28.416666666666668
XXX
Unique Player Percentage:  0.7027027027027027
Proj Val:  29.157499999999995
Val:  5.0925416666666665
Min Val:  28.472916666666666
XXX
Unique Player Percentage:  0.8648648648648649
Proj Val:  28.398541666666667
Val:  4.9582708

Unique Player Percentage:  0.7297297297297297
Proj Val:  28.928749999999997
Val:  5.005729166666667
Min Val:  27.789583333333333
XXX
Unique Player Percentage:  0.7162162162162162
Proj Val:  29.561666666666664
Val:  5.074812499999999
Min Val:  28.720833333333335
XXX
Unique Player Percentage:  0.6486486486486487
Proj Val:  30.345208333333332
Val:  5.127291666666666
Min Val:  29.264583333333334
XXX
Unique Player Percentage:  0.7702702702702703
Proj Val:  29.15083333333333
Val:  4.9747916666666665
Min Val:  27.70625
XXX
Unique Player Percentage:  0.7837837837837838
Proj Val:  28.95958333333333
Val:  4.938833333333334
Min Val:  27.25
XXX
Unique Player Percentage:  0.7027027027027027
Proj Val:  30.967916666666664
Val:  5.159520833333334
Min Val:  29.266666666666666
XXX
Unique Player Percentage:  0.7297297297297297
Proj Val:  30.698749999999997
Val:  5.198541666666666
Min Val:  29.197916666666668
XXX
Unique Player Percentage:  0.7837837837837838
Proj Val:  29.57333333333333
Val:  5.1461250000

Unique Player Percentage:  0.6081081081081081
Proj Val:  31.034375
Val:  5.263916666666666
Min Val:  29.889583333333334
XXX
Unique Player Percentage:  0.6756756756756757
Proj Val:  30.077916666666667
Val:  5.225791666666667
Min Val:  28.947916666666668
XXX
Unique Player Percentage:  0.6891891891891891
Proj Val:  29.465833333333332
Val:  5.1537083333333324
Min Val:  28.579166666666666
XXX
Unique Player Percentage:  0.7297297297297297
Proj Val:  29.975624999999997
Val:  5.140812499999999
Min Val:  28.229166666666668
XXX
Unique Player Percentage:  0.5945945945945946
Proj Val:  31.180625
Val:  5.298541666666667
Min Val:  29.804166666666667
XXX
Unique Player Percentage:  0.6891891891891891
Proj Val:  31.26645833333333
Val:  5.349645833333333
Min Val:  29.975
XXX
Unique Player Percentage:  0.7567567567567568
Proj Val:  29.157083333333336
Val:  5.013270833333333
Min Val:  28.097916666666666
XXX
Unique Player Percentage:  0.7027027027027027
Proj Val:  30.035624999999996
Val:  5.231458333333333

Unique Player Percentage:  0.6891891891891891
Proj Val:  30.117291666666667
Val:  5.257479166666667
Min Val:  28.989583333333332
XXX
Unique Player Percentage:  0.6756756756756757
Proj Val:  30.621041666666663
Val:  5.232375
Min Val:  29.204166666666666
XXX
Unique Player Percentage:  0.6486486486486487
Proj Val:  31.372916666666665
Val:  5.319354166666668
Min Val:  30.079166666666666
XXX
Unique Player Percentage:  0.7567567567567568
Proj Val:  29.856666666666666
Val:  5.164416666666667
Min Val:  28.254166666666666
XXX
Unique Player Percentage:  0.6891891891891891
Proj Val:  30.092708333333334
Val:  5.1719791666666675
Min Val:  28.716666666666665
XXX
Unique Player Percentage:  0.7702702702702703
Proj Val:  30.023125
Val:  5.103041666666667
Min Val:  28.589583333333334
XXX
Unique Player Percentage:  0.7297297297297297
Proj Val:  30.107708333333335
Val:  5.0946875
Min Val:  28.3875
XXX
Unique Player Percentage:  0.7567567567567568
Proj Val:  30.908124999999995
Val:  5.182166666666668
Min V

KeyboardInterrupt: 

In [None]:
def normalize(df):
    result = df.copy()
    categories = ['Avg Proj', 'DFN_MinProj', 'Proj Min Enhanced', ]
    for category in categories:
        max_value = df[category].max()
        min_value = df[category].min()
        result[category + ' Norm'] = ((df[category] - min_value) / (max_value - min_value))
    return result