In [81]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from math import floor

In [82]:
import pandas as pd
from random import sample

In [83]:
def reduceForProjectionAnalysis(file):
    file['FantasyFuelPPGProj'] = file['ppg_projection']
    file['FantasyFuelValueProj'] = file['value_projection']
    file['DFN_PPGProj'] = file['Proj FP']
    file['DFN_MinProj'] = file['Proj Min']
    file['DFN_ValueProj'] = file['Proj Val']
    file['Avg Proj'] = (file['DFN_PPGProj']+file['FantasyFuelPPGProj'])/2
    file['Avg Value Proj'] = ((file['DFN_ValueProj']+file['FantasyFuelValueProj'])/2)
    file['Avg Skewed Min'] = ((file['L2 Min']+file['L5 Min']+file['S Min'])/3)
    file['Avg Skewed FGA'] = ((file['L2 FGA']+file['L5 FGA']+file['S FGA'])/3)
    file['Avg Skewed FP'] = ((file['L5 FP']+file['S FP']+file['Ceil FP']+file['Floor FP']+file['Avg Proj'])/5)
    file['Avg L5 PPG'] = ((file['L5_ppg_floor']+file['L5_ppg_avg']+file['L5_ppg_max'])/3)
    
    file['Proj Min Enhanced'] = (file['Avg Proj'] / file['Avg Skewed Min'] ) * file['Proj Min']
    
    #file.drop(drop_attribs, axis=1, inplace=True)
    file = file[file['injury_status'] != 'O']
    file = file[file['injury_status'] != 'Q']
    return file

In [90]:
##Helper Functions
def verifyLineup(lineup):
    enough_represented_teams = len(set(lineup['team'].tolist()))>=2
    under_salary_cap = lineup['Salary'].sum() <= 50000
    all_unique_players = len(set(lineup['Player Name'].tolist())) == 8
    if enough_represented_teams and under_salary_cap and all_unique_players:
        return True
    else:
        return False
    
def createRandomPopulation(point_guards, shooting_guards, small_forwards, power_forwards, guards, forwards, centers, util, limit):
    counter = 0
    lineups = []
    while(counter<limit):
        lineup = pd.DataFrame()
        lineup = lineup.append(point_guards.sample(n=1))
        lineup = lineup.append(shooting_guards.sample(n=1))
        lineup = lineup.append(small_forwards.sample(n=1))
        lineup = lineup.append(power_forwards.sample(n=1))
        lineup = lineup.append(guards.sample(n=1))
        lineup = lineup.append(forwards.sample(n=1))
        lineup = lineup.append(centers.sample(n=1))
        lineup = lineup.append(util.sample(n=1))
        if(verifyLineup(lineup)):
            lineups.append(lineup)
            counter = counter + 1
    return lineups


def mate(_parents):
    parents = pd.concat(sample(_parents, 10))
    point_guards = parents.loc[parents['Pos'].str.contains("PG")]
    shooting_guards = parents.loc[parents['Pos'].str.contains("SG")]
    small_forwards = parents.loc[parents['Pos'].str.contains("SF")]
    power_forwards = parents.loc[parents['Pos'].str.contains("PF")]
    guards = parents.loc[parents['Pos'].str.contains("G")]
    forwards = parents.loc[parents['Pos'].str.contains("F")]
    centers = parents.loc[parents['Pos'].str.contains("C")]
    util = parents
    
    children = []
    while(len(children) < 10):
        child_lineup = _parents[0].append(_parents[1])
        while(not verifyLineup(child_lineup)):
            child_lineup = pd.DataFrame()
            child_lineup = child_lineup.append(point_guards.sample(n=1))
            child_lineup = child_lineup.append(shooting_guards.sample(n=1))
            child_lineup = child_lineup.append(small_forwards.sample(n=1))
            child_lineup = child_lineup.append(power_forwards.sample(n=1))
            child_lineup = child_lineup.append(guards.sample(n=1))
            child_lineup = child_lineup.append(forwards.sample(n=1))
            child_lineup = child_lineup.append(centers.sample(n=1))
            child_lineup = child_lineup.append(util.sample(n=1))
        children.append(child_lineup)
    return children

def sortFitestBasedOnProjection(population): 
    population.sort(key=projectedPointSum, reverse=True)
    population = eliminateDuplicates(population)
    return population 

def projectedPointSum(lineup):
    return (lineup['Avg Proj Norm'].sum() * .6)+ ( lineup['DFN_MinProj Norm'].sum() * .1) + ( lineup['Proj Min Enhanced Norm'].sum() *.1)  + (lineup['Avg Skewed FP Norm'].sum() * .1) + (lineup['Avg Value Proj'].sum() * .1)

def eliminateDuplicates(population):
    unique_lineups = set()
    unique_population = []
    for lineup in population:
        lineup_set = set()
        for player in lineup['Player Name'].tolist():
            lineup_set.add(player)
            
        if(not lineup_set.issubset(unique_lineups)):  
            unique_population.append(lineup)
            unique_lineups = unique_lineups.union(lineup_set)
    return unique_population


In [91]:
def performSelection(population):
    selected_population = population[:1]
    selected_population.extend(sample(population[1:5], 3))
    selected_population.extend(sample(population[5:10], 2))
    selected_population.extend(sample(population, 10))
    return selected_population


def performCrossover(population):
    children = []
    parents = population
    children = mate(parents) 
    return children
        

def createNextGeneration(old_population):
    selected_population = performSelection(old_population)
    children = performCrossover(selected_population)
    new_generation = old_population[:len(old_population)-10]
    new_generation.extend(children)
    return new_generation
    
        

def findConvergence(population, total_players):
    unique_players = pd.concat(population)
    unique_players.drop_duplicates(subset ="Player Name", keep = 'first', inplace = True) 
    percentage_of_unique_players = len(unique_players['Player Name'].value_counts().keys().tolist()) / total_players
    projection_value = unique_players['Avg Proj'].mean()
    val_value = unique_players['Avg Value Proj'].mean()
    minutes_value = unique_players['DFN_MinProj'].mean()
    drop_level = unique_players['Proj FP'].mean() - unique['Proj FP'].std()
    projection_value = unique_players['Avg Proj'].mean()
    file[file.apply(lambda  x: x['Actual FP'] > drop_level, axis=1)]
    print("Unique Player Percentage: ", percentage_of_unique_players )
    print("Proj Val: ", projection_value )
    print("Val: ", val_value )
    print("Min Val: ", minutes_value )
    print("XXX")
    unique_player_threshold = percentage_of_unique_players < .55
    projection_threshold = 24 < projection_value < 32
    value_threshold = 4.4 < val_value < 5.4
    minutes_threshold = 25 < minutes_value < 31
    return  unique_player_threshold and projection_threshold and value_threshold and minutes_threshold
    

In [108]:
def findConvergence(population, total_players):
    represented_players = pd.concat(population)
    percentage_of_unique_players = len(set(represented_players['Player Name'].value_counts().keys().tolist())) / total_players
    projection_value = represented_players['Avg Proj'].mean()
    val_value = represented_players['Avg Value Proj'].mean()
    minutes_value = represented_players['DFN_MinProj'].mean()
    projection_value = represented_players['Avg Proj'].mean()
    
    print("Unique Player Percentage: ", percentage_of_unique_players )
    print("Proj Val: ", projection_value )
    print("Val: ", val_value )
    print("Min Val: ", minutes_value )
    print("XXX")
    unique_player_threshold = percentage_of_unique_players < .32
    projection_threshold = 28.6 < projection_value < 34
    value_threshold = 4.6 < val_value < 5.6
    minutes_threshold = 27 < minutes_value < 32
    return  unique_player_threshold and projection_threshold and value_threshold and minutes_threshold

In [109]:
def standardize(df):
    result = df.copy()
    categories = ['Avg Proj', 'DFN_MinProj', 'Proj Min Enhanced', 'Avg Skewed FP', 'Avg Value Proj']
    for feature_name in categories:
        mean_value = df[feature_name].mean()
        std_value = df[feature_name].std()
        result[feature_name + ' Norm'] = ((df[feature_name] - mean_value) / (std_value))
    return result

def geneticAlgorithmForHistorialBestLineups(file, total_players):
    file = normalize(file)
    #total_players = len(file['Player Name'].tolist())
    population_size = 30
    point_guards = file.loc[file['Pos'].str.contains("PG")]
    shooting_guards = file.loc[file['Pos'].str.contains("SG")]
    small_forwards = file.loc[file['Pos'].str.contains("SF")]
    power_forwards = file.loc[file['Pos'].str.contains("PF")]
    guards = file.loc[file['Pos'].str.contains("G")]
    forwards = file.loc[file['Pos'].str.contains("F")]
    centers = file.loc[file['Pos'].str.contains("C")]
    util = file
    population = createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, population_size)  
   
    converged = False
    while(not converged):
        currentPopulation = sortFitestBasedOnProjection(population)
        converged = findConvergence(population, total_players)
        if(not converged):
            population = createNextGeneration(currentPopulation)
            population.extend(createRandomPopulation(point_guards, shooting_guards, 
                                               small_forwards, power_forwards, 
                                               guards, forwards, centers, util, 30 - len(population)))
       
    print("*****************")
    return sortFitestBasedOnProjection(population)

In [112]:
dates_all = [
    ['2_4', '2020-02-04'],
    ['2_3', '2020-02-03'],
    ['2_2', '2020-02-02'],
    ['2_1', '2020-02-01'],
    ['1_31', '2020-01-31'],
    ['1_30', '2020-01-30'],
    ['1_29', '2020-01-29'],
    ['1_28', '2020-01-28'],
    ['1_27', '2020-01-27'],
    ['1_26', '2020-01-26'],
    ['1_25', '2020-01-25'],
    ['1_24', '2020-01-24'],
    ['1_23', '2020-01-23'],
    ['1_22', '2020-01-22'],
    ['1_20', '2020-01-20'],
    ['1_19', '2020-01-19'],
    ['1_18', '2020-01-18'],
    ['1_17', '2020-01-17'],
    ['1_16', '2020-01-16'],
    ['1_15', '2020-01-15'],
    ['1_14', '2020-01-14'],
    ['1_13', '2020-01-13'],
    ['1_12', '2020-01-12'],
    ['1_11', '2020-01-11'],
    ['1_10', '2020-01-10'],
    ['1_9', '2020-01-09'],
    ['1_8', '2020-01-08'],
    ['1_7', '2020-01-07'],
    ['1_6', '2020-01-06'],
    ['1_5', '2020-01-05'],
    ['1_4', '2020-01-04'],
    ['1_3', '2020-01-03'],
    ['1_2', '2020-01-02'],
    ['1_1', '2020-01-01'],
    ['12_31', '2019-12-31'],
    ['12_30', '2019-12-30'],
    ['12_29', '2019-12-29'],
    ['12_28', '2019-12-28'],
    ['12_27', '2019-12-27'],
    ['12_26', '2019-12-26'],
    ['12_25', '2019-12-25'],
    ['12_23', '2019-12-23'],
    ['12_22', '2019-12-22'],
    ['12_21', '2019-12-21'],
    ['12_20', '2019-12-20'],
    ['12_19', '2019-12-19'],
    ['12_18', '2019-12-18'],
    ['12_17', '2019-12-17'],
    ['12_16', '2019-12-16'],
    ['12_15', '2019-12-15'],
    ['12_14', '2019-12-14'],
    ['12_13', '2019-12-13'],
    ['12_12', '2019-12-12'],
    ['12_11', '2019-12-11'],
    ['12_10', '2019-12-10'],
    ['12_9', '2019-12-09'],
    ['12_8', '2019-12-08'],
    ['12_7', '2019-12-07'],
    ['12_6', '2019-12-06'],
    ['12_5', '2019-12-05'],
    ['12_4', '2019-12-04'],
    ['12_3', '2019-12-03'],
    ['12_2', '2019-12-02'],
    ['12_1', '2019-12-01'],
    ['11_30', '2019-11-30'],
    ['11_29', '2019-11-29'],
    ['11_27', '2019-11-27'],
    ['11_26', '2019-11-26'],
    ['11_25', '2019-11-25'],
    ['11_24', '2019-11-24'],
    ['11_23', '2019-11-23'],
    ['11_22', '2019-11-22'],
    ['11_21', '2019-11-21'],
    ['11_20', '2019-11-20'],
    ['11_19', '2019-11-19'],
    ['11_18', '2019-11-18'],
    ['11_17', '2019-11-17'],
    ['11_16', '2019-11-16'],
    ['11_15', '2019-11-15'],
    ['11_14', '2019-11-14'],
    ['11_13', '2019-11-13'],
    ['11_12', '2019-11-12'],
    ['11_11', '2019-11-11'],
    ['11_10', '2019-11-10'],
    ['11_9', '2019-11-09'],
    ['11_8', '2019-11-08'],
    ['11_7', '2019-11-07'],
    ['11_6', '2019-11-06'],
    ['11_5', '2019-11-05'],
    ['11_4', '2019-11-04'],
    ['11_3', '2019-11-03'],
    ['11_2', '2019-11-02'],
    ['11_1', '2019-11-01'],
    ['10_31', '2019-10-31'],
    ['10_30', '2019-10-30'],
    ['10_29', '2019-10-29'],
    ['10_28', '2019-10-28'],
    ['10_27', '2019-10-27'],
    ['10_26', '2019-10-26'],
    ['10_25', '2019-10-25'],
    ['10_24', '2019-10-24'],
    ['10_23', '2019-10-23'],
    ['10_22', '2019-10-22'],   
]

dates = [
    #['2_4', '2020-02-04'],
    #['2_3', '2020-02-03'],
    #['2_2', '2020-02-02'],
    #['2_1', '2020-02-01'],
    #['1_31', '2020-01-31'],
    ['1_30', '2020-01-30'],
    #['1_29', '2020-01-29'],
    #['1_28', '2020-01-28'],
    #['1_27', '2020-01-27'],
    #['1_26', '2020-01-26'],
    
   
]

def resetIndicies(file):
    for lineup in file:
        lineup.reset_index(drop=True, inplace=True)
    return file

def saveBestLineups_toCSV(file, date):
    key = np.arange(len(file))
    file_name = 'BestCreatedLineups/{}'.format(date)
    new_file = pd.concat(file, keys=key, names=['Lineup Num'])
    new_file.to_csv(file_name)

def createBestLineups():
    for game_day in dates:
        file_name = 'HistoricalData_Merged/{}'.format(game_day[1])
        file = pd.read_csv(file_name)
        total_players = len(file['Player Name'].tolist())
        cleaned_file = reduceForProjectionAnalysis(file)
        best_population = geneticAlgorithmForHistorialBestLineups(cleaned_file, total_players)
        saveBestLineups_toCSV(resetIndicies(best_population), game_day[1])
        

In [113]:
createBestLineups()

Unique Player Percentage:  0.6342857142857142
Proj Val:  23.9875
Val:  4.732645833333334
Min Val:  24.789583333333333
XXX
Unique Player Percentage:  0.5257142857142857
Proj Val:  25.839375
Val:  4.8390625
Min Val:  26.0125
XXX
Unique Player Percentage:  0.48
Proj Val:  26.701666666666668
Val:  4.829708333333333
Min Val:  26.408333333333335
XXX
Unique Player Percentage:  0.45714285714285713
Proj Val:  25.851458333333333
Val:  4.923625
Min Val:  26.608333333333334
XXX
Unique Player Percentage:  0.44
Proj Val:  27.350416666666668
Val:  5.058166666666667
Min Val:  27.572916666666668
XXX
Unique Player Percentage:  0.42857142857142855
Proj Val:  26.932916666666664
Val:  5.010208333333333
Min Val:  26.816666666666666
XXX
Unique Player Percentage:  0.44571428571428573
Proj Val:  27.210208333333334
Val:  4.978875
Min Val:  26.958333333333332
XXX
Unique Player Percentage:  0.4342857142857143
Proj Val:  28.777708333333333
Val:  5.1012708333333325
Min Val:  27.991666666666667
XXX
Unique Player Per