## Standard Genetic Algorithm Implementation

Deprecated due to its disobeyance of global restrictions. Developed into `enhanced_genetic_algorithm.ipynb`.

#### Libraries

In [17]:
import pandas as pd # For datasets
import utils.fitness_functions as ff
import utils.monoobjective_exhaustive_solver as es
import utils.restriction_checker as rc
import models.team_assignment as ta
import random # For randomness
import os # For output
from contextlib import redirect_stdout # For output
import datetime # For output
from copy import deepcopy # For crossover function
import numpy # For doing repetitive runs and getting mean values

# Some imports that may be used later (for plotting or mathematical computations)
# import numpy as np
# import matplotlib.pyplot as plt
# import statsmodels.api as sm
# import seaborn as sns

#### Constants

In [18]:
PROJECTS = ['Shotmaniacs', 'actFact', 'Honours Programme', 'Voice', 'Topicus', 'Earnit', 'Inter-actief'] # Possible projects.
DATA = 'data/reduced_dataset_1.csv' # Dataset reference.
DATASET = pd.read_csv(DATA) # Dataset stored as a dataframe.

NUMBER_OF_GENERATIONS = 50 # Number of generations to iterate in the GA loop.
NUMBER_OF_REPEATS = 10 # Number of repeats before introducing systematic randomness.
PARENT_RESET_THRESHOLD = 10 # Number of resets before replacing parents.
MAX_TEAM_CREATION_ATTEMPTS = 10 # Maximum number of attempts in initial population creation (avoiding infinite loop).
MAX_CROSSOVER_ATTEMPTS = 10 # Maximum number of attempts in crossover algorithm (avoiding infinite loop).
MAX_MUTATION_ATTEMPTS = 10 # Maximum number of attempts in mutation algorithm (avoiding infinite loop).

COMPUTE_OPTIMAL_SOLUTION = False # (Only for small synthetic datasets), computes optimal solution through exhaustive solver.
EFFICIENCY_GOAL = None # (Only for small synthetic datasets), allows early finishing of the algorithm if a certain efficiency
                     # (score / score of best possible solution) is met.

OFFSPRING_PER_GENERATION = 8 # Number of offspring 'birthed' in each iteration of the GA.

RUN_TIME = datetime.datetime.now().strftime("run_%Y%m%d_%H%M%S") # Constant for output folder naming.
BASE_DIR = "output/sga" # Base directory for outputs.

#### Output Configuration

In [19]:
def save_arrangement_batch(arrangements, iteration, run_time=RUN_TIME):
    # Create output directory
    os.makedirs(BASE_DIR, exist_ok=True)

    run_folder = os.path.join(BASE_DIR, f"{run_time}", f"generation_{iteration}")
    os.makedirs(run_folder, exist_ok=True)

    scores = []

    # Evaluate and save each arrangement
    for i, arrangement in enumerate(arrangements, start=1):
        filename = os.path.join(run_folder, f"assignment_{i}.txt")
        with open(filename, "w") as f:
            with redirect_stdout(f):
                score = ff.evaluate_all_teams(arrangement)
                print(f"\nFinal score for arrangement {i}: {score:.4f}")
                scores.append(score)
                
    return scores

#### Initial Population Generation

In [20]:
def create_random_teams(df, min_size=5, max_size=6, project_pool=PROJECTS):
    attempts = 0
    while attempts < MAX_TEAM_CREATION_ATTEMPTS:
        students = df.to_dict(orient='records')
        random.shuffle(students)

        total_students = len(students)
        teams = []
        project_counters = {} # Track number of teams per project
        i = 0
        while total_students - i >= min_size:

            remaining = total_students - i
            if 11 <= remaining <= 12:
                team_size = 6 if remaining == 12 else 5  # Prevent ending in a 1-member team
            elif remaining in [10, 9, 8, 7]:
                team_size = 5
            else:
                team_size = random.choice([5, 6])

            team = students[i:i+team_size]

            if not rc.is_valid_team(team):
                i += 1  # Skip just 1 student and try again
                continue

            project = random.choice(project_pool)

            # Track how many times we've used this project
            count = project_counters.get(project, 0) + 1
            project_counters[project] = count
            team_id = f"{project} {count}"
            teams.append(ta.TeamAssignment(team_id, team, project, fitness=0.0))

            i += team_size

        assigned_ids = [member['ID'] for team in teams for member in team.members]
        if set(assigned_ids) == set(df['ID']):
            return teams
        attempts += 1
    
    # Fallback: generate a random valid arrangement ignoring restrictions
    students = df.to_dict(orient='records')
    random.shuffle(students)
    fallback_teams = []
    i = 0
    project_counters = {}
    while i < len(students):
        team_size = min(6, len(students) - i)
        team = students[i:i+team_size]
        project = random.choice(project_pool)
        count = project_counters.get(project, 0) + 1
        project_counters[project] = count
        team_id = f"{project} {count}"
        fallback_teams.append(ta.TeamAssignment(team_id, team, project, fitness=0.0))
        i += team_size
    return fallback_teams
    

#### Selection

In [21]:
def select_parents(arrangements, scores):

    arrangements_with_scores = list(zip(arrangements, scores))
    arrangements_with_scores.sort(key=lambda x : x[1], reverse=True)

    top_2_arrangements = [arrangements_with_scores[0][0], arrangements_with_scores[1][0]]

    return top_2_arrangements

#### Crossover

In [22]:
# TODO Improve crossover function

def crossover(parent1, parent2, attempt_counter):
    crossover_attempts = 0
    while (crossover_attempts < MAX_CROSSOVER_ATTEMPTS):
        attempt_counter[0] += 1
        selected_teams = random.sample(parent1, k=random.randint(1, len(parent1) - 1))
        
        # Extract IDs from selected teams' members
        used_ids = set(member['ID'] for team in selected_teams for member in team.members)

        # Start building child from selected teams
        child_teams = deepcopy(selected_teams)

        # Add non-overlapping teams from parent2
        for team in parent2:
            team_ids = [member['ID'] for member in team.members]
            if any(id in used_ids for id in team_ids):
                continue
            child_teams.append(deepcopy(team))
            used_ids.update(team_ids)

        # Fill remaining students
        all_ids = DATASET['ID'].tolist()
        remaining_ids = [id for id in all_ids if id not in used_ids]
        df_by_id = DATASET.set_index('ID').to_dict(orient='index')
        remaining_students = [dict(df_by_id[id], ID=id) for id in remaining_ids]

        if remaining_students:
            remaining_df = pd.DataFrame(remaining_students)
            new_teams = create_random_teams(remaining_df)
            child_teams += new_teams

        if rc.is_valid_arrangement(child_teams, total_students=len(DATASET), projects=PROJECTS):
            return child_teams
        
        crossover_attempts += 1

    return create_random_teams(DATASET) # Fallback. Random arrangement.

#### Mutation

In [23]:
def mutate(arrangement, attempt_counter):
    mutation_attempts = 0
    while mutation_attempts < MAX_MUTATION_ATTEMPTS:
        attempt_counter[0] += 1
        mutated = deepcopy(arrangement)

        # Pick two distinct teams
        team_a, team_b = random.sample(mutated, 2)

        # Pick one random member from each
        student_a = random.choice(team_a.members)
        student_b = random.choice(team_b.members)

        # Swap them
        team_a.members.remove(student_a)
        team_b.members.remove(student_b)
        team_a.members.append(student_b)
        team_b.members.append(student_a)

        # Check if resulting arrangement is valid
        if rc.is_valid_arrangement(mutated, total_students=len(DATASET), projects=PROJECTS):
            return mutated
        
        mutation_attempts += 1

    return create_random_teams(DATASET) # Fallback. Random arrangement.

#### GA Execution

In [24]:
def execute():

    df = pd.DataFrame({
        "score": pd.Series(dtype="float"),
        "number of computations": pd.Series(dtype="float")
    })

    # 0. Initial Population Generation
    arrangements = []
    number = 1
    while len(arrangements) < 10:
        arrangement = create_random_teams(DATASET)
        if arrangement is not None:
            arrangements.append(arrangement)
        else:
            number += 1

    # 0.5 Computation of Best Possible Arrangement for Performance Measurement. Only feasible in small synthetic datasets due to TFP.
    if (COMPUTE_OPTIMAL_SOLUTION):
        optimal_arrangement, optimal_score, arrangements_computed = es.find_best_arrangement(DATASET, DATA)

    # 0.75 Performance Log (For debugging, and ensuring that score improves with each generation).
    run_time = datetime.datetime.now().strftime("run_%Y%m%d_%H%M%S")
    performance_log = os.path.join(BASE_DIR, f"{run_time}", f"performance.txt")
    os.makedirs(os.path.dirname(performance_log), exist_ok=True)

    with open(performance_log, "w") as f:
        f.write(f"Performance log for run: {run_time}\n")
        if (COMPUTE_OPTIMAL_SOLUTION):
            f.write(f"Optimal score: {optimal_score:.4f}\n\n")

    iters = 0
    best_score = -1
    previous_score = -1
    efficiency = -1
    stagnation_count = 0 # Counter to introduce randomness upon stagnation.
    attempted__arrangements = [0]
    while iters <= NUMBER_OF_GENERATIONS and (EFFICIENCY_GOAL is None or efficiency < EFFICIENCY_GOAL):

        # 1. Fitness Evaluation
        scores = save_arrangement_batch(arrangements, iters, run_time) # Evaluates all teams, and outputs to output folder. Returns score of each arrangement.
        current_max = max(scores)

        if current_max > previous_score:
            stagnation_count = 0
        else:
            stagnation_count += 1  # No improvement, stagnation

        # Log the best score in the generation
        if (COMPUTE_OPTIMAL_SOLUTION):
            efficiency = round((best_score / optimal_score) * 100, 2)

        with open(performance_log, "a") as f:
            f.write(
                f"Generation {iters}: Best score = {best_score:.4f}"
                + (f" | Efficiency = {efficiency:.2f}%" if COMPUTE_OPTIMAL_SOLUTION else "")
                + (f" | Total Computations: {attempted__arrangements}")
                + (f" of {arrangements_computed:.0f} Possible Combinations" if COMPUTE_OPTIMAL_SOLUTION else "")
                + "\n"
            )

        # Optional: stop early if efficiency goal is met
        if EFFICIENCY_GOAL is not None and efficiency >= EFFICIENCY_GOAL:
            break

        if current_max > best_score:
            best_score = current_max

        # 2. Selection
        parents = select_parents(arrangements, scores)

        # 3. Crossover & Mutation with fallback on stagnation
        if stagnation_count >= PARENT_RESET_THRESHOLD:
            # Full reset of parents after too much stagnation
            parents = []
            while len(parents) < 2:
                new_parent = create_random_teams(DATASET)
                if new_parent is not None:
                    parents.append(new_parent)
            stagnation_count = 0  # Reset counter after fallback

        # Offspring creation
        offspring = []
        while len(offspring) < OFFSPRING_PER_GENERATION:
            child = crossover(parents[0], parents[1], attempted__arrangements)
            if child is None:
                continue
            mutated_child = mutate(child, attempted__arrangements)
            offspring.append(mutated_child)

        arrangements = parents + offspring  # Proceed to next generation

        previous_score = current_max
        iters += 1

        df.loc[iters] = [best_score, attempted__arrangements[0]]

    return df