In [156]:
import numpy as np
import pandas as pd
from collections import Counter
from Data import load_data

rng = np.random.default_rng(42)

players, points, rebounds, assists, weights = load_data("./nbasalariespoints.csv", "Player", "PS/G▼", "TRB", "AST", "SALARY")

In [None]:
# Create a population of num_teams x team_size, with random values from 0-last index
# Each individual represents a team, and each team has 15 players
# **has uniform distribution**
def initializePopulation(pop_size, team_size, max_index):
    return rng.integers(0, max_index, (pop_size, team_size), dtype=int)

def calculateValue(individual, obj):
    return np.sum(obj[individual])

def calculateWeight(individual, weights):
    return np.sum(weights[individual])


# Change so that instead of objectives being constrained, it calculates the distance between the team's stats
# and an idea point that the user inputs. Then use 1/dist as the fitness.
# Individuals that are far away from the point will have low fitness, and ones that are close
# will have high fitness. Then, use the best as parents for the next population.
# Include a step of calculating how many times a certian player appears in the top (10? 15?) teams
# and add them into the distrubution that many times
# This leads to a non-uniform distribution, which allows for better players to be picked more often

# Sets the fitness to -inf if the weight exceeds the knapsack capacity, or if there are duplicate players (can't draft the same player twice)
# Otherwise, sets the fitness to the value of the team
# Since there is a limit on how many players can be in each team, don't use value to weight ratio as the fitness
def calculateFitness(individual, weights, capacity, ideal_point, objectives):
    val_list = []
    total = 0

    for objective in objectives:
        val_list.append(calculateValue(individual, objective))
    
    for i in range(len(ideal_point)):
        total += (ideal_point[i] - val_list[i]) ** 2
    dist = np.sqrt(total)

    # Use 1/dist as fitness
    fitness = 1/dist

    # Can't choose the same player twice
    if (len(set(individual)) < 15) or (calculateWeight(individual, weights) > capacity):
        fitness = 0
    return fitness

def addCopies(weights, objectives, counter, objective_names):
    df = pd.DataFrame()
    df["Weights"] = weights
    for i in range(len(objectives)):
        if not objective_names:
            df[f"Objective_{i + 1}"] = objectives[i]
        else:
            df[objective_names[i]] = objectives[i]
    
    # can use index for keys and use 15 * index + inner index to make length faster (maybe)
    for key in counter.keys():
        for _ in range(counter[key]):
            df.loc[len(df)] = df.iloc[key].values

    return df

# Selects the two best as the parents
def adjustDistribution(capacity, weights, population, ideal_point, objectives, objective_names):
    fitness = []
    for individual in population:
        fitness.append(calculateFitness(individual, weights, capacity, ideal_point, objectives))
        
    indices = np.argsort(fitness)
    population = population[indices][::-1]
    fitness = np.sort(fitness)[::-1]

    # Calculate how many times each player from the fit population apears
    # then add that many copies into the distribution before sampling again
    # This (effectively) leads to weighting each player, allowing for the better players to be chosen more often 
    # Use counter from collections 
    counter = Counter()
    for i in range(15):
        counter += Counter(population[i])

    data = addCopies(weights, objectives, counter, objective_names)

# Generates a population of the same size as the initial population
# Selects each gene randomly from one of the parents
def generatePopulation(parents, pop_size, team_size, length, mutation_rate=.1):
    population = np.zeros((pop_size, team_size), dtype=int)
    population[0], population[1], population[2] = parents

    for i in range(3, pop_size):
        for j in range(team_size):
            parent_choice = rng.integers(0, 3)
            population[i][j] = parents[parent_choice][j]
            
            # Mutation
            if rng.random() < mutation_rate:
                population[i][j] = rng.integers(0, length)
    return population

# Creates an initial population
# Saves the mean value for the population in each training iteration to plot
# Finds to two best as parents
# Creates a new population using the parents
# Saves only the valid solutions in the final population and returns the one with the highest value
def knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, objectives, objective_names = None):
    max_index = len(weights) - 1
    population = initializePopulation(pop_size, team_size, max_index)

    for i in range(training_length):
        parents = adjustDistribution(capacity, weights, population, ideal_point, objectives, objective_names)
        population = generatePopulation(parents, pop_size, team_size, max_index)
    
    valid = []
    for individual in population:
        if calculateFitness(individual, weights, capacity, epsilon, primary, objectives) == -np.inf: continue
        valid.append(individual)

    valid = np.array(valid)
    print(f"Length of valid: {len(valid)}")
    if len(valid) == 0:

        return -1   
    indices = np.argsort([val for val in [calculateValue(ind, primary) for ind in valid]])
    return valid[indices[-1]]


In [158]:
ideal_point = [250, 20]
capacity = 150_000_000
training_length = 1000
pop_size = 500
team_size = 15


individual = knapSackGenetic(capacity, weights, training_length, pop_size, team_size, ideal_point, [points, rebounds, assists], ["Points", "Rebounds", "Assists"])


345
570
        Weights  Points  Rebounds  Assists
0    11370786.0    30.1       5.4      6.7
1    15756438.0    29.0       6.1      7.5
2    20158622.0    28.2       8.2      5.0
3    15851950.0    26.9      11.5      3.3
4    22970500.0    25.3       7.4      6.8
5     4236287.0    25.1       4.0      6.8
6     7070730.0    24.3      10.3      1.9
7    10050000.0    23.5       4.5      4.0
8    16744218.0    23.5       7.8     10.4
9    17120106.0    23.1       7.0      4.1
10    6912869.0    22.2       3.0      6.2
11   15501000.0    22.1       3.8      2.1
12   22875000.0    21.8       7.7      4.2
13   18907726.0    21.4       8.4      4.9
14   16407500.0    21.2       6.8      2.6
15   16407500.0    20.9       5.3      4.8
16   12000000.0    20.9       4.4      5.2
17    2525160.0    20.8       3.2      4.3
18    5758680.0    20.7       3.6      2.0
19   13500000.0    20.4       4.0      6.1
20   15851950.0    19.9       4.9     10.2
21   15409570.0    19.7       5.0      3.7
22 

TypeError: cannot unpack non-iterable NoneType object