In [1]:
import numpy as np
import pandas as pd
from collections import Counter


In [2]:
def load_data(path, *args):
    data = pd.read_csv(path)
    data = data.dropna()
    lst = []
    for header in args:
        if header == "SALARY":
            data[header] = pd.to_numeric(data[header].str.replace(r"[$,]", "", regex=True).str.strip())
        lst.append(np.array(data[header].tolist()))
    return lst

rng = np.random.default_rng(42)
players, points, rebounds, assists, weights = load_data("./nbasalariespoints.csv", "Player", "PS/G▼", "TRB", "AST", "SALARY")

In [None]:
# Create a population of num_teams x team_size, with random values from 0-last index
# Each individual represents a team, and each team has 15 players
# **has uniform distribution**
def initializePopulation(pop_size, team_size):
    return rng.integers(0, len(weights) - 1, (pop_size, team_size), dtype=int)

def calculateValue(individual, obj):
    return np.sum(obj[individual])

def calculateWeight(individual, weights):
    return np.sum(weights[individual])


# Change so that instead of objectives being constrained, it calculates the distance between the team's stats
# and an idea point that the user inputs. Then use 1/dist as the fitness.
# Individuals that are far away from the point will have low fitness, and ones that are close
# will have high fitness. Then, use the best as parents for the next population.
# Include a step of calculating how many times a certian player appears in the top (10? 15?) teams
# and add them into the distrubution that many times
# This leads to a non-uniform distribution, which allows for better players to be picked more often

# Sets the fitness to -inf if the weight exceeds the knapsack capacity, or if there are duplicate players (can't draft the same player twice)
# Otherwise, sets the fitness to the value of the team
# Since there is a limit on how many players can be in each team, don't use value to weight ratio as the fitness
def calculateFitness(individual, weights, capacity, ideal_point, objectives):
    val_list = []
    total = 0

    for objective in objectives:
        val_list.append(calculateValue(individual, objective))
    
    for i in range(len(ideal_point)):
        total += (ideal_point[i] - val_list[i]) ** 2
    dist = np.sqrt(total)

    # Use 1/dist as fitness
    # Maybe add the ability to return to the main if a solution with inf fitness is found
    if dist != 0: fitness = 1/dist
    else: fitness = np.inf
    

    # Can't choose the same player twice
    if (len(set(individual)) < 15) or (calculateWeight(individual, weights) > capacity):
        fitness = -np.inf
    return fitness

def addCopies(weights, objectives, counter, objective_names):
    df = pd.DataFrame()
    df["Weights"] = weights
    for i in range(len(objectives)):
        if not objective_names:
            df[f"Objective_{i + 1}"] = objectives[i]
        else:
            df[objective_names[i]] = objectives[i]
    
    # can use index for keys and use 15 * index + inner index to make length faster (maybe)
    for key in counter.keys():
        for _ in range(counter[key]):
            df.loc[len(df)] = df.iloc[key].values

    return df

def adjustDistribution(capacity, weights, population, ideal_point, objectives, objective_names, counter):
    fitness = []
    distribution = np.ones(len(weights)) / len(weights)
    for individual in population:
        fitness.append(calculateFitness(individual, weights, capacity, ideal_point, objectives))
        
    indices = np.argsort(fitness)
    population = population[indices][::-1]
    fitness = np.sort(fitness)[::-1]

    # Calculate how many times each player from the fit population apears
    # then add that many copies into the distribution before sampling again
    # This (effectively) leads to weighting each player, allowing for the better players to be chosen more often 
    # Use counter from collections 
    for i in range(15):
        counter += Counter(population[i])

    for index, count in counter.items():
        distribution[index] = count / len(distribution)
    distribution = distribution / distribution.sum()
    
    return distribution

def Sample(distribution, pop_size, team_size):
    length = len(distribution)
    population = np.zeros((pop_size, team_size), dtype=int)

    for i in range(pop_size):
        population[i] = rng.choice(length, team_size, p = distribution)
    return population

def knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, objectives, objective_names = None):
    population = initializePopulation(pop_size, team_size)
    counter = Counter()

    for i in range(training_length):
        distribution = adjustDistribution(capacity, weights, population, ideal_point, objectives, objective_names, counter)
        population = Sample(distribution, pop_size, team_size)

    valid = []
    for individual in population:
        if calculateFitness(individual, weights, capacity, ideal_point, objectives) == -np.inf: continue
        valid.append(individual)

    valid = np.array(valid)
    indices = np.argsort([val for val in [calculateFitness(individual, weights, capacity, ideal_point, objectives) for ind in valid]])
    print(f"Fitness of best choice: {calculateFitness(valid[indices[-1]], weights, capacity, ideal_point, objectives)}")
    print(f"Value of {objective_names[0]}: {calculateValue(valid[indices[-1]], objectives[0])}")
    print(f"Value of {objective_names[1]}: {calculateValue(valid[indices[-1]], objectives[1])}")
    print(f"Value of {objective_names[2]}: {calculateValue(valid[indices[-1]], objectives[2])}")
    return valid[indices[-1]]


In [4]:
ideal_point = [300, 60]
capacity = 150_000_000
training_length = 1000
pop_size = 100
team_size = 15
individual = knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, [points, rebounds, assists], ["Points", "Rebounds", "Assists"])
individual = np.sort(individual)
print(individual)
print(players[individual])


Fitness of best choice: 0.014288601458109845
Value of Points: 230.89999999999998
Value of Rebounds: 71.1
Value of Assists: 37.5
[  0   2  10  11  14  30  34  37  58  91 105 221 283 287 316]
['Stephen Curry' 'Kevin Durant' 'Isaiah Thomas' 'Klay Thompson'
 'Kawhi Leonard' 'Karl-Anthony Towns' 'Kobe Bryant' 'Rudy Gay'
 'Kentavious Caldwell-Pope' 'Markieff Morris' 'Bojan Bogdanovic'
 'Timofey Mozgov' 'Justin Anderson' 'Orlando Johnson' 'Pablo Prigioni']
