In [1]:
import pandas as pd
import numpy as np
from collections import Counter
rng = np.random.default_rng(117)

# Read data and get only player names, points per game, and salaries
data = pd.read_csv("./nbasalariespoints.csv")
data = data[["Player", "PS/G▼", "SALARY"]]
data = data.dropna()

data["PS/G▼"] = data["PS/G▼"].astype(int)
data["SALARY"] = data["SALARY"].str.replace(r"[$,]", "", regex=True).str.strip()
data["SALARY"] = pd.to_numeric(data["SALARY"])

#data["SALARY"] /= 1_000_000
#data["SALARY"] = data["SALARY"].round().astype(int)
data = data[data["SALARY"] != 0]
data = data[data["PS/G▼"] != 0]

In [2]:
def initializePopulation(size_of_pop, team_size, length):
    return rng.integers(0, length, (size_of_pop, team_size), dtype=int)

def calculateValue(individual, values):
    return np.sum(values[individual])

def calculateWeight(individual, weights):
    return np.sum(weights[individual])

def calculateFitness(individual, weights, values, capacity):
    indevidual_weight = calculateWeight(individual, weights)
    count = Counter(individual)
    fitness = calculateValue(individual, values) if indevidual_weight <= capacity else -np.inf
    for i in range(len(individual)):
        if count[individual[i]] > 1: 
            fitness = -np.inf
            break
    return fitness

def selectParents(capacity, weights, values, population):
    fitness = []
    for individual in population:
        fitness.append(calculateFitness(individual, weights, values, capacity))

    fitness = np.array(fitness)
    top_indices = np.argsort(fitness)[-2:] 
    return [population[top_indices[0]], population[top_indices[1]]]

def generatePopulation(parents, size_of_pop, mutation_rate, length):
    num_genes = len(parents[0])
    population = np.zeros((size_of_pop, num_genes), dtype=int)
    population[0], population[1] = parents

    for i in range(2, size_of_pop):
        for j in range(num_genes):
            parent_choice = rng.integers(0, 2)
            population[i][j] = parents[parent_choice][j]
            
            # Mutation
            if rng.random() < mutation_rate:
                population[i][j] = rng.integers(0, length)
    return population


# Saving an invalid solution in the valid list
def knapSackGenetic(capacity, weights, values, training_length, pop_size, team_size):
    length = len(weights) - 1
    population = initializePopulation(pop_size, team_size, length)

    for i in range(training_length):
        parents = selectParents(capacity, weights, values, population)
        population = generatePopulation(parents, pop_size, .01, length)
    
    valid = []
    for individual in population:
        if calculateFitness(individual, weights, values, capacity) == -np.inf: continue
        valid.append(individual)

    valid = np.array(valid)
    indices = np.argsort([val for val in [calculateValue(ind, values) for ind in valid]])
    
    return individual


In [4]:
weights = np.array(data["SALARY"].tolist())
values = np.array(data["PS/G▼"].tolist())

capacity = 150_000_000
training_length = 1000
pop_size = 100
team_size = 15

individual = knapSackGenetic(capacity, weights, values, training_length, pop_size, team_size)

print(f"Best Solution: {individual}")
print(f"Value: {calculateValue(individual, values)} | Weight: {calculateWeight(individual, weights):,}\n")

Best Solution: [ 8  6 16 36 10  2 17 29  3  1  5 18 30  0  7]
Value: 343 | Weight: 148,167,348

