In [68]:
import numpy as np
import pandas as pd
from collections import Counter
rng = np.random.default_rng(42)

In [69]:

nbadata = pd.read_csv("./nbasalariespoints.csv")
nbadata = nbadata.dropna()
nbadata.keys()
nbadata['SALARY'] = pd.to_numeric(nbadata['SALARY'].str.replace(r"[$,]", "", regex=True).str.strip())

min_minutes = 12
nbadata = nbadata[nbadata['MP'] >= min_minutes] 

salary = nbadata['SALARY']
points = nbadata['PS/G▼']
rebounds = nbadata['TRB']
assists = nbadata['AST']
steals = nbadata['STL']
blocks = nbadata['BLK']
turnovers = nbadata['TOV']
three_pointers = nbadata['3P']
fieldgoals = nbadata['eFG%']
freethrows = nbadata['FT']
fouls = nbadata['PF']
minutes = nbadata['MP']
players = nbadata['Player']
taskdata = pd.DataFrame({
    'Player': players,
    'Salary': salary,
    'Points': 36*points/minutes,
    'Rebounds': 36*rebounds/minutes,
    'Assists': 36*assists/minutes,
    'Steals': 36*steals/minutes,
    'Blocks': 36*blocks/minutes,
}).reset_index(drop=True)

weights = taskdata["Salary"]
objectives = [taskdata["Points"], taskdata["Rebounds"], taskdata["Assists"]]
capacity = 150_000_000


In [70]:
def initializePopulation(pop_size, team_size):
    return rng.integers(0, len(weights) - 1, (pop_size, team_size), dtype=int)

def calculateValue(individual, obj):
    return np.sum(obj[individual])

def calculateWeight(individual):
    return np.sum(weights[individual])

def calculateFitness(individual, ideal_point, objectives):
    length = len(ideal_point)
    val_arr = np.zeros(length)

    for i in range(length):
        val_arr[i] = calculateValue(individual, objectives[i])

    cos_theta = np.dot(val_arr, ideal_point) / (np.linalg.norm(val_arr) * np.linalg.norm(ideal_point))
    cos_theta = np.clip(cos_theta, 0.0, 1.0)
    fitness = cos_theta

    return fitness

def get_non_dominated(population, objectives):
    non_dominated = []
    for i, ind_a in enumerate(population):
        dominated = False
        val_a = [calculateValue(ind_a, obj) for obj in objectives]
        for j, ind_b in enumerate(population):
            if i == j:
                continue
            val_b = [calculateValue(ind_b, obj) for obj in objectives]
            if all(b >= a for a, b in zip(val_a, val_b)) and any(b > a for a, b in zip(val_a, val_b)):
                dominated = True
                break
        if not dominated:
            non_dominated.append(ind_a)
    return np.array(non_dominated)


def adjustDistribution(population, ideal_point, objectives, counter):
    fitness = []
    distribution = np.ones(len(weights)) / len(weights)
    population = get_non_dominated(population, objectives)
    for individual in population:
        fitness.append(calculateFitness(individual, ideal_point, objectives))
        
    indices = np.argsort(fitness)
    population = population[indices][::-1]
    fitness = np.sort(fitness)[::-1]

    # Make it select only non-dominated solutions

    for i in range(len(fitness)):
        if fitness[i] > .75 * fitness[0]:
            counter += Counter(population[i])
        else: break

    for index, count in counter.items():
        distribution[index] = count / len(distribution)
    distribution = distribution / distribution.sum()
    
    return distribution

def Sample(distribution, pop_size, team_size):
    length = len(distribution)
    pop_count = 0
    population = np.zeros((pop_size, team_size), dtype=int)

    while pop_count < pop_size:
        sample = rng.choice(length, team_size, p = distribution, replace = False)
        if calculateWeight(sample) <= capacity:
            population[pop_count] = sample
            pop_count += 1
    return population

def knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, objectives):
    population = initializePopulation(pop_size, team_size)
    counter = Counter()

    for i in range(training_length):
        distribution = adjustDistribution(capacity, weights, population, ideal_point, objectives, counter)
        population = Sample(distribution, pop_size, team_size)

    indices = np.argsort([val for val in [calculateFitness(individual, weights, capacity, ideal_point, objectives) for individual in population]])
    best_solution = population[indices][-1]
    return best_solution


In [71]:
ideal_point = np.array([300, 140])
training_length = 20
pop_size = 200
team_size = 15

population = initializePopulation(pop_size, team_size)
counter = Counter()

for i in range(training_length):
    distribution = adjustDistribution(population, ideal_point, objectives, counter)
    population = Sample(distribution, pop_size, team_size)

indices = np.argsort([val for val in [calculateFitness(individual, ideal_point, objectives) for individual in population]])
best_solution = population[indices][-1]

individual = np.sort(best_solution)
print(individual)
print(f"Value of objective 1: {calculateValue(individual, objectives[0])}")
print(f"Value of objective 2: {calculateValue(individual, objectives[1])}")


[  0  25  28  38  40  42  45  65  68  75  91 159 162 185 255]
Value of objective 1: 264.1813001121595
Value of objective 2: 123.29974171280152
