# Selection Methods Exploration and Tests
This notebook contains our approach to exploring selection methods which were later included in the library. 

In [1]:
import sys
sys.path.append('..')

In [None]:
import pandas as pd
import random
from copy import deepcopy

from library.custom.custom_solutions import WSOSolution


In [3]:
df = pdf = pd.read_csv("../library/data/seating_data.csv", index_col=0)
df

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,55,56,57,58,59,60,61,62,63,64
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,5000,0,0,700,700,0,0,0,0,...,100,100,0,0,100,100,100,0,0,0
2,5000,0,700,700,0,0,300,300,500,500,...,100,100,0,100,0,0,0,0,0,0
3,0,700,0,2000,0,0,0,0,300,300,...,0,0,0,0,0,0,0,0,0,0
4,0,700,2000,0,0,0,900,400,300,300,...,0,0,0,0,0,0,0,0,0,0
5,700,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,100,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
61,100,0,0,0,0,0,0,0,0,0,...,0,0,0,0,100,0,0,2000,700,700
62,0,0,0,0,0,0,0,0,0,0,...,0,0,-1000,0,100,0,2000,0,700,700
63,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,700,700,0,900


# SELECTION ALGORITHMS

Do we need to keep the minimization option? Can delete if we think it's not necessary. 

## Tournament Selection
I didn't realize there was a get_best_ind function already in the library, maybe we incorporate that instead of using what I've done

In [4]:
def tournament_selection(population: list, maximization: bool = True, k: int = 4):

    '''
    Definition
        Tournament Selection chooses k random inidivuals from the original
        population to participate in a tournament. Fitnesses of each individual
        in the tournament are evaluated, and the individual with best fitness
        (highest for maximization, lowest for minimization) is selected as the
        winner. This individual is added to parent population P'.

    
    Parameters
        population: a list of solutions/individuals, must have a fitness() method
        maximization: boolean to indicate if the O.P. is maximization or minimization
        k: integer that determines size of tournament

    Returns
        Deepcopy of the winner of the tournament.
    '''

    # function to get fitness of individual in population
    def get_fitness(ind):
        return ind.fitness()

    # sample k random inidividuals from population
    tournament = random.sample(population, k)

    # choose winner of tournament
    if maximization:
        winner = max(tournament, key=get_fitness)
    
    else:
        winner = min(tournament, key=get_fitness)

    return deepcopy(winner)

## Ranking Selection

In [5]:
def ranking_selection(population: list, maximization: bool = True):

    '''
    Definition
        Ranking Selection sorts all individuals in the original population based
        on fitness (highest to lowest for maximization, lowest to highest for
        minimization). Selection probabilities are then calculated based on rankings;
        higher ranked individuals have higher probability of being selected and vice-versa.
        This method disregards magnitude of difference in fitnesses of individuals.

    Parameters
        population: a list of solutions/individuals, must have a fitness() method
        maximization: boolean to indicate if the O.P. is maximization or minimization

    Returns
        Deepcopy of the selected individual.   
    
    '''

    # function to get fitness of individual in population
    def get_fitness(ind):
        return ind.fitness()

    # sort the individuals in the population based on fitness values
    if maximization:
        sorted_population = sorted(population, key=get_fitness, reverse=True)
    else:
        sorted_population = sorted(population, key=get_fitness)

    # create ranking list in descending order for maximization
    rankings = list(range(len(population), 0, -1))

    if not maximization:
        rankings = list(range(1, len(population) + 1))

    total_rank = sum(rankings)

    probabilities = []
    for r in rankings:
        ind_prob = r / total_rank
        probabilities.append(ind_prob)

    print('Sorted population:', sorted_population)
    #print('Sorted population fitness values:', sorted_population.fitness)
    print('Selection Probabilities:', probabilities)


    # from Lab Week 6
    # Generate random number between 0 and 1
    random_nr = random.uniform(0, 1)
    box_boundary = 0

    # For each individual, check if random number is inside the individual's "box"
    for ind_idx, ind in enumerate(sorted_population):
        box_boundary += probabilities[ind_idx]
        if random_nr <= box_boundary:
            return deepcopy(ind)    

## Testing Selection Methods

From Chat mostly, just creating a sample population to test selection algorithms to make sure they are functioning

In [6]:
population_size = 20
population = [
    WSOSolution(relationship_matrix=df, table_size=8, num_tables=8)
    for _ in range(population_size)
]

In [7]:
def test_selection(selection_func, population, trials=100, **kwargs):
    from collections import Counter
    selected_ids = []
    
    for _ in range(trials):
        selected = selection_func(population, **kwargs)
        selected_ids.append(tuple(selected.repr))  # Use repr as identifier

    return Counter(selected_ids)

In [8]:
tournament_result = test_selection(tournament_selection, population, trials=100, k=3, maximization=True)

for solution_repr, count in tournament_result.most_common(10):
    print(f"{solution_repr[:10]}... selected {count} times")

(4, 4, 4, 0, 5, 7, 1, 1, 3, 4)... selected 15 times
(6, 0, 2, 7, 3, 3, 6, 7, 0, 7)... selected 15 times
(7, 0, 3, 1, 6, 3, 1, 4, 5, 3)... selected 13 times
(4, 1, 0, 7, 7, 2, 0, 6, 6, 5)... selected 12 times
(7, 5, 3, 2, 7, 6, 0, 0, 7, 6)... selected 7 times
(3, 1, 0, 2, 5, 2, 2, 6, 3, 2)... selected 6 times
(5, 1, 7, 1, 6, 6, 7, 6, 2, 2)... selected 6 times
(3, 7, 4, 5, 0, 5, 3, 3, 0, 2)... selected 6 times
(7, 0, 1, 3, 3, 4, 2, 5, 5, 3)... selected 6 times
(7, 3, 6, 0, 6, 3, 6, 4, 3, 1)... selected 5 times


In [9]:
ranking_result = test_selection(ranking_selection, population, trials=100, maximization=True)

for solution_repr, count in ranking_result.most_common(10):
    print(f"{solution_repr[:10]}... selected {count} times")

Sorted population: [[6, 0, 2, 7, 3, 3, 6, 7, 0, 7, 3, 3, 7, 7, 4, 1, 7, 2, 6, 6, 6, 5, 1, 2, 4, 6, 6, 2, 7, 0, 0, 5, 2, 3, 2, 2, 5, 0, 5, 1, 4, 4, 5, 4, 7, 3, 0, 1, 4, 3, 0, 4, 0, 6, 5, 4, 2, 1, 5, 5, 1, 3, 1, 1], [4, 4, 4, 0, 5, 7, 1, 1, 3, 4, 5, 5, 1, 3, 5, 7, 5, 6, 2, 6, 0, 1, 7, 6, 6, 0, 0, 3, 7, 6, 0, 3, 1, 3, 0, 5, 6, 1, 2, 6, 7, 2, 0, 4, 3, 2, 5, 2, 4, 6, 7, 4, 1, 2, 2, 7, 0, 3, 4, 5, 1, 2, 3, 7], [7, 0, 3, 1, 6, 3, 1, 4, 5, 3, 6, 6, 1, 7, 3, 1, 5, 0, 2, 5, 0, 2, 3, 6, 6, 6, 5, 2, 7, 5, 0, 1, 2, 1, 6, 3, 1, 6, 2, 0, 2, 0, 4, 5, 7, 1, 7, 4, 7, 0, 4, 4, 2, 7, 7, 2, 4, 4, 0, 4, 5, 5, 3, 3], [7, 5, 3, 2, 7, 6, 0, 0, 7, 6, 2, 1, 4, 0, 6, 4, 7, 7, 0, 5, 6, 4, 1, 2, 4, 2, 0, 6, 5, 2, 3, 6, 1, 3, 3, 1, 4, 7, 7, 5, 4, 2, 3, 1, 0, 3, 1, 1, 2, 5, 0, 0, 3, 7, 5, 2, 4, 5, 5, 3, 6, 6, 1, 4], [4, 1, 0, 7, 7, 2, 0, 6, 6, 5, 1, 0, 1, 1, 0, 5, 4, 5, 0, 1, 6, 5, 6, 3, 7, 3, 6, 5, 6, 4, 3, 4, 2, 2, 2, 2, 6, 7, 7, 0, 5, 3, 4, 2, 7, 5, 4, 4, 3, 1, 3, 7, 4, 2, 1, 2, 6, 5, 3, 3, 0, 1, 7, 0], [3, 7, 4, 