In [9]:
import numpy as np
import pandas as pd
from collections import Counter


In [10]:
def load_data(path, *args):
    data = pd.read_csv(path)
    data = data.dropna()
    lst = []
    for header in args:
        if header == "SALARY":
            data[header] = pd.to_numeric(data[header].str.replace(r"[$,]", "", regex=True).str.strip())
        lst.append(np.array(data[header].tolist()))
    return lst

rng = np.random.default_rng(42)
players, points, rebounds, assists, weights = load_data("./nbasalariespoints.csv", "Player", "PS/G▼", "TRB", "AST", "SALARY")

In [11]:
def initializePopulation(pop_size, team_size):
    return rng.integers(0, len(weights) - 1, (pop_size, team_size), dtype=int)

def calculateValue(individual, obj):
    return np.sum(obj[individual])

def calculateWeight(individual, weights):
    return np.sum(weights[individual])

def calculateFitness(individual, weights, capacity, ideal_point, objectives):
    val_list = []
    total = 0

    for objective in objectives:
        val_list.append(calculateValue(individual, objective))
    
    for i in range(len(ideal_point)):
        diff = ideal_point[i] - val_list[i]
        # Allows for overshooting the point
        if diff > 0:
            total += diff ** 2
    dist = np.sqrt(total)

    # Use 1/dist as fitness
    if dist != 0: fitness = 1/dist
    else: fitness = np.inf

    # Can't choose the same player twice
    if (len(set(individual)) < 15) or (calculateWeight(individual, weights) > capacity):
        fitness = -np.inf
    return fitness

def adjustDistribution(capacity, weights, population, ideal_point, objectives, counter):
    fitness = []
    distribution = np.ones(len(weights)) / len(weights)
    for individual in population:
        fitness.append(calculateFitness(individual, weights, capacity, ideal_point, objectives))
        
    indices = np.argsort(fitness)
    population = population[indices][::-1]
    fitness = np.sort(fitness)[::-1]

    for i in range(len(fitness)):
        if fitness[i] > .75 * fitness[0]:
            counter += Counter(population[i])
        else: break

    for index, count in counter.items():
        distribution[index] = count / len(distribution)
    distribution = distribution / distribution.sum()
    
    return distribution

def Sample(distribution, pop_size, team_size):
    length = len(distribution)
    population = np.zeros((pop_size, team_size), dtype=int)

    for i in range(pop_size):
        population[i] = rng.choice(length, team_size, p = distribution)
    return population

def knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, objectives):
    population = initializePopulation(pop_size, team_size)
    counter = Counter()

    for i in range(training_length):
        distribution = adjustDistribution(capacity, weights, population, ideal_point, objectives, counter)
        population = Sample(distribution, pop_size, team_size)

    indices = np.argsort([val for val in [calculateFitness(individual, weights, capacity, ideal_point, objectives) for individual in population]])
    best_solution = population[indices][-1]
    return best_solution


In [None]:
ideal_point = [300, 90]
capacity = 150_000_000
training_length = 1000
pop_size = 100
team_size = 15
objectives = [points, rebounds, assists]

individual = knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, objectives)
individual = np.sort(individual)
print(individual)
print(players[individual])

print(f"Value of objective 1: {calculateValue(individual, objectives[0])}")
print(f"Value of objective 2: {calculateValue(individual, objectives[1])}")
print(f"Value of objective 3: {calculateValue(individual, objectives[2])}")


[ 5  6  7  8 14 19 21 24 32 35 36 48 65 79 87]
['Damian Lillard' 'Anthony Davis' 'DeMar DeRozan' 'Russell Westbrook'
 'Kawhi Leonard' 'Eric Bledsoe' 'Gordon Hayward' 'Danilo Gallinari'
 'Nikola Vucevic' 'Jahlil Okafor' 'Bradley Beal' 'Jeff Teague'
 'Jabari Parker' 'Robert Covington' 'Luol Deng']
Value of objective 1: 285.2
Value of objective 2: 87.2
Value of objective 3: 55.8
367.7
