In [9]:
import numpy as np
import pandas as pd
from collections import Counter
rng = np.random.default_rng(42)

In [14]:
nbadata = pd.read_csv("./nbasalariespoints.csv")
nbadata = nbadata.dropna()
nbadata.keys()
nbadata['SALARY'] = pd.to_numeric(nbadata['SALARY'].str.replace(r"[$,]", "", regex=True).str.strip())

min_minutes = 12
nbadata = nbadata[nbadata['MP'] >= min_minutes] 

salary = nbadata['SALARY']
points = nbadata['PS/G▼']
rebounds = nbadata['TRB']
assists = nbadata['AST']
steals = nbadata['STL']
blocks = nbadata['BLK']
turnovers = nbadata['TOV']
three_pointers = nbadata['3P']
fieldgoals = nbadata['eFG%']
freethrows = nbadata['FT']
fouls = nbadata['PF']
minutes = nbadata['MP']
players = nbadata['Player']
taskdata = pd.DataFrame({
  # 'Player': players,
  #  'Points': 36*points/minutes,
    'Rebounds': 36*rebounds/minutes,
    'Assists': 36*assists/minutes,
    'Salary': salary,
  #  'Steals': 36*steals/minutes,
 #   'Blocks': 36*blocks/minutes,
}).reset_index(drop=True)

weights = taskdata["Salary"]
#objectives = [taskdata["Points"], taskdata["Rebounds"], taskdata["Assists"]]
capacity = 150_000_000

corr = taskdata.corr("pearson")
print(corr)

          Rebounds   Assists    Salary
Rebounds  1.000000 -0.323585  0.151784
Assists  -0.323585  1.000000  0.265169
Salary    0.151784  0.265169  1.000000


In [3]:
def calculateValue(individual, obj):
    return np.sum(obj[individual])

def calculateWeight(individual):
    return np.sum(weights[individual])

def calculateFitness(individual, ideal_point, objectives):
    length = len(ideal_point)
    val_arr = np.zeros(length)

    for i in range(length):
        val_arr[i] = calculateValue(individual, objectives[i])

    cos_theta = np.dot(val_arr, ideal_point) / (np.linalg.norm(val_arr) * np.linalg.norm(ideal_point))
    fitness = cos_theta

    return fitness

def non_dominated(population, objectives):
    pop_size = len(population)
    objective_size = len(objectives)
    non_dominated = np.zeros(pop_size, dtype=bool)
    objective_values = np.zeros((pop_size, objective_size))
    for i in range(pop_size):
        for j in range(objective_size):
            objective_values[i][j] = calculateValue(population[i], objectives[j])
    
    for i in range(pop_size):
        dominated = np.zeros(pop_size, dtype=bool)
        for j in range(pop_size):
            dominated[j]= np.all(objective_values[i,:] < objective_values[j,:])
        non_dominated[i] = ~np.any(dominated)
    
    return population[non_dominated]

def adjustDistribution(population, ideal_point, objectives, counter):
    fitness = []
    distribution = np.ones(len(weights)) / len(weights)
    population = non_dominated(population, objectives)
    for individual in population:
        fitness.append(calculateFitness(individual, ideal_point, objectives))
        
    indices = np.argsort(fitness)
    population = population[indices][::-1]
    fitness = np.sort(fitness)[::-1]

    fit_pop = []
    for i in range(len(fitness)):
        if fitness[i] > .99:
            counter += Counter(population[i])
            fit_pop.append(population[i])
        else: break

    for index, count in counter.items():
        distribution[index] = count / len(distribution)
    distribution = distribution / distribution.sum()
    
    return distribution, fit_pop

def Sample(distribution, pop_size, team_size):
    length = len(distribution)
    pop_count = 0
    population = np.zeros((pop_size, team_size), dtype=int)

    while pop_count < pop_size:
        sample = rng.choice(length, team_size, p = distribution, replace = False)
        if calculateWeight(sample) <= capacity:
            population[pop_count] = sample
            pop_count += 1
    return population


In [4]:
team_size = 12
ideal_point = np.array([300, 140, np.median(objectives[2]) * team_size])
training_length = 40
pop_size = 250

population = Sample(np.ones(len(weights)) / len(weights), pop_size, team_size)
counter = Counter()

for i in range(training_length):
    distribution, fit_pop = adjustDistribution(population, ideal_point, objectives, counter)
    population = Sample(distribution, pop_size, team_size)
    if len(fit_pop) != 0:
        population = np.vstack((population, fit_pop))
indices = np.argsort([val for val in [calculateFitness(individual, ideal_point, objectives) for individual in population]])
best_solution = population[indices][-1]

length = len(ideal_point)
val_arr = np.zeros(length)
for i in range(length):
    val_arr[i] = calculateValue(best_solution, objectives[i])
cos_theta = np.dot(val_arr, ideal_point) / (np.linalg.norm(val_arr) * np.linalg.norm(ideal_point))
print(f"Cos(theta) = {cos_theta}")
 
individual = np.sort(best_solution)
print(f"Team: {individual}")
print(f"Value of objective 1: {calculateValue(individual, objectives[0])}")
print(f"Value of objective 2: {calculateValue(individual, objectives[1])}")


KeyboardInterrupt: 

Cos(theta) = 0.9999859421360815
Team: [  0   1   2   3   6  42  45  46  78  83 132 264]
Value of objective 1: 276.1846472082825
Value of objective 2: 129.21818549915162