In [84]:
import sys
sys.path.append('..')
import random
import numpy as np
import pandas as pd
from copy import deepcopy
from collections import Counter

# Load the Data

In [3]:
# Load the data from the Excel file
df = pd.read_excel('players.xlsx')

# Extract player data
players = []
for _, row in df.iterrows():
    players.append((row['Name'], row['Position'], row['Skill'], row['Salary (â‚¬M)']))

In [4]:
df = pd.DataFrame(players, columns=['Player Name', 'Position', 'Rating', 'Performance'])
print(df)

         Player Name Position  Rating  Performance
0        Alex Carter       GK      85           90
1       Jordan Smith       GK      88          100
2      Ryan Mitchell       GK      83           85
3     Chris Thompson       GK      80           80
4    Blake Henderson       GK      87           95
5      Daniel Foster      DEF      90          110
6      Lucas Bennett      DEF      85           90
7        Owen Parker      DEF      88          100
8       Ethan Howard      DEF      80           70
9         Mason Reed      DEF      82           75
10      Logan Brooks      DEF      86           95
11      Caleb Fisher      DEF      84           85
12     Nathan Wright      MID      92          120
13      Connor Hayes      MID      89          105
14      Dylan Morgan      MID      91          115
15     Hunter Cooper      MID      83           85
16     Austin Torres      MID      82           80
17  Gavin Richardson      MID      87           95
18      Spencer Ward      MID  

In [5]:
players

[('Alex Carter', 'GK', 85, 90),
 ('Jordan Smith', 'GK', 88, 100),
 ('Ryan Mitchell', 'GK', 83, 85),
 ('Chris Thompson', 'GK', 80, 80),
 ('Blake Henderson', 'GK', 87, 95),
 ('Daniel Foster', 'DEF', 90, 110),
 ('Lucas Bennett', 'DEF', 85, 90),
 ('Owen Parker', 'DEF', 88, 100),
 ('Ethan Howard', 'DEF', 80, 70),
 ('Mason Reed', 'DEF', 82, 75),
 ('Logan Brooks', 'DEF', 86, 95),
 ('Caleb Fisher', 'DEF', 84, 85),
 ('Nathan Wright', 'MID', 92, 120),
 ('Connor Hayes', 'MID', 89, 105),
 ('Dylan Morgan', 'MID', 91, 115),
 ('Hunter Cooper', 'MID', 83, 85),
 ('Austin Torres', 'MID', 82, 80),
 ('Gavin Richardson', 'MID', 87, 95),
 ('Spencer Ward', 'MID', 84, 85),
 ('Sebastian Perry', 'FWD', 95, 150),
 ('Xavier Bryant', 'FWD', 90, 120),
 ('Elijah Sanders', 'FWD', 93, 140),
 ('Adrian Collins', 'FWD', 85, 90),
 ('Tyler Jenkins', 'FWD', 80, 70),
 ('Chase Murphy', 'FWD', 86, 95),
 ('Landon Powell', 'FWD', 89, 110),
 ('Julian Scott', 'FWD', 92, 130),
 ('Bentley Rivera', 'MID', 88, 100),
 ('Maxwell Flores'

In [6]:
# Constants
NUM_TEAMS = 5
PLAYERS_PER_TEAM = 7
MAX_BUDGET = 750  # Million €
POSITION_REQUIREMENTS = {"GK": 1, "DEF": 2, "MID": 2, "FWD": 2}

In [None]:
class TeamAssignmentSolution:
    def __init__(self, players, population=None):
        self.players = players  # List of players that will be assigned to teams
        self.population = population if population else {}  # Dictionary with team assignments
    
    def random_initial_representation(self):
        """Randomly create a valid team assignment representation."""
        # Initialize empty teams with required position slots and cost trackers
        teams = {i: {"GK": [], "DEF": [], "MID": [], "FWD": [], "total_cost": 0} for i in range(NUM_TEAMS)}
        
        # Shuffle players to randomly assign them to teams
        shuffled_players = random.sample(self.players, len(self.players))
        
        for player in shuffled_players:
            player_name, position, skill, cost = player
            assigned = False
            # Try to assign the player to a team while respecting position and budget constraints
            for team_id in teams:
                if len(teams[team_id][position]) < POSITION_REQUIREMENTS[position] and teams[team_id]["total_cost"] + cost <= MAX_BUDGET:
                    teams[team_id][position].append(player)
                    teams[team_id]["total_cost"] += cost
                    assigned = True
                    break
            if not assigned:
                continue  # Skip players that can't be assigned under budget or position constraints
        
        # Store the generated team assignment
        self.population = teams
        return self.population
    
    def fitness(self):
        """Evaluate the quality of the solution."""
        team_avg_skills = []
        
        # Iterate over each team in the population
        for team in self.population.values():
            total_skill = 0
            total_players = 0
            
            # Iterate over each position (GK, DEF, MID, FWD)
            for position in ["GK", "DEF", "MID", "FWD"]:
                if len(team[position]) > 0:  # Ensure the position is filled
                    for player in team[position]:
                        total_skill += player[2]  # Add the skill rating of the player
                        total_players += 1  # Count the player
            
            # Calculate the average skill rating for the team
            avg_skill = total_skill / total_players if total_players > 0 else 0
            team_avg_skills.append(avg_skill)

        # Calculate the standard deviation of the average skill ratings across teams
        std_dev = np.std(team_avg_skills)
        return std_dev


In [70]:
# Create an instance of the TeamAssignmentSolution class
solution = TeamAssignmentSolution(players)
solution.random_initial_representation()

# Evaluate the fitness of the initial population
fitness_score = solution.fitness()
print(f"Fitness Score: {fitness_score}")

Fitness Score: 0.9187391537607729


# Selection Mechanism

In Ranking Selection, solutions with better fitness (lower fitness in a minimization problem) should be selected more frequently, as their ranks will be higher.

In [94]:
def ranking_selection(population: list, maximization: bool = False):
    """Ranking Selection"""
    
    # Sort the population based on fitness
    sorted_population = sorted(population, key=lambda ind: ind.fitness())
    
    # For minimization, rank from best to worst (lower fitness gets a higher rank)
    if not maximization:
        sorted_population.reverse()  # Reverse the sorting for minimization (lower fitness first)
    
    # Assign ranks to each individual (1 for the best, 2 for second-best, etc.)
    ranks = list(range(1, len(population) + 1))
    
    # Calculate selection probabilities based on ranks
    total_rank = sum(ranks)
    probabilities = [rank / total_rank for rank in ranks]
    
    # Select an individual based on the rank probabilities
    random_nr = random.uniform(0, 1)
    cumulative_prob = 0
    
    for idx, prob in enumerate(probabilities):
        cumulative_prob += prob
        if random_nr <= cumulative_prob:
            return deepcopy(sorted_population[idx])  # Return the selected individual

In Tournament Selection, since the individuals are randomly chosen, the frequency distribution will depend on the tournament size and randomness of the selections.

In [95]:
def tournament_selection(population: list, tournament_size: int = 3, maximization: bool = False):
    """Tournament Selection"""
    
    # Randomly select a subset of individuals (tournament size)
    tournament_individuals = random.sample(population, tournament_size)
    
    # Sort the tournament individuals by fitness (minimization, so lower is better)
    if maximization:
        best_individual = min(tournament_individuals, key=lambda ind: ind.fitness())  # For maximization: select best
    else:
        best_individual = max(tournament_individuals, key=lambda ind: ind.fitness())  # For minimization: select best
    
    return deepcopy(best_individual)  # Return the best individual from the tournament

In [97]:
def test_selection():
    # Create a small population of TeamAssignmentSolution instances
    population = [TeamAssignmentSolution(players) for _ in range(10)]
    
    # Ensure that each individual has been initialized properly
    for i, sol in enumerate(population):
        sol.random_initial_representation()
        print(f"Solution {i+1} - Fitness: {sol.fitness()}")
    
    # Run the selection process multiple times (e.g., 100 times) for both methods
    ranking_selected_solutions = [ranking_selection(population) for _ in range(100)]
    tournament_selected_solutions = [tournament_selection(population) for _ in range(100)]
    
    # Count how many times each fitness was selected
    ranking_fitness_counter = Counter([sol.fitness() for sol in ranking_selected_solutions])
    tournament_fitness_counter = Counter([sol.fitness() for sol in tournament_selected_solutions])
    
    # Print the selection frequency for both methods
    print("\nRanking Selection Frequency (100 Selections):")
    for fitness, count in ranking_fitness_counter.items():
        print(f"Fitness {fitness}: Selected {count} times")
    
    print("\nTournament Selection Frequency (100 Selections):")
    for fitness, count in tournament_fitness_counter.items():
        print(f"Fitness {fitness}: Selected {count} times")

# Run the test
test_selection()

Solution 1 - Fitness: 0.6663945022680342
Solution 2 - Fitness: 0.8870956969291464
Solution 3 - Fitness: 1.702445886207506
Solution 4 - Fitness: 0.7306692487168277
Solution 5 - Fitness: 1.3167678424917624
Solution 6 - Fitness: 1.206783547539591
Solution 7 - Fitness: 0.8446711713768246
Solution 8 - Fitness: 1.1863165424015245
Solution 9 - Fitness: 1.874860539031192
Solution 10 - Fitness: 0.5142857142857161

Ranking Selection Frequency (100 Selections):
Fitness 0.8446711713768246: Selected 11 times
Fitness 1.206783547539591: Selected 7 times
Fitness 1.3167678424917624: Selected 9 times
Fitness 1.1863165424015245: Selected 7 times
Fitness 0.5142857142857161: Selected 24 times
Fitness 0.8870956969291464: Selected 10 times
Fitness 0.7306692487168277: Selected 15 times
Fitness 0.6663945022680342: Selected 13 times
Fitness 1.702445886207506: Selected 1 times
Fitness 1.874860539031192: Selected 3 times

Tournament Selection Frequency (100 Selections):
Fitness 1.1863165424015245: Selected 5 time