## Mono-objective Algorithms versus Self-Selected Arrangement

Tool to benchmark mono-objective algorithms against the original arrangement of teams made for the 2022-23 Data & Information Course, as well as a random assignment, showing the performance of these algorithms against state of the art solutions to the TFP.

Also executes a Wilcoxon Signed-rank test comparing the EGA and PSO's performance the self-selected arrangement.

In [None]:
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import contextlib
import io
from scipy.stats import wilcoxon
import import_ipynb

import enhanced_genetic_algorithm as ega
import particle_swarm_optimization as pso
import enhanced_genetic_algorithm_copy as ega_copy
import particle_swarm_optimization_copy as pso_copy
import evaluate_assignment as evaluate
import utils.fitness_functions as ff

RUNS = 3
DATASET_FILE = "data/corrected_2022_23_arrangement.csv"
SELF_SELECTED_SCORE = None
with contextlib.redirect_stdout(io.StringIO()):
    SELF_SELECTED_SCORE, _, _ = evaluate.evaluate()

dataset_df = pd.read_csv(DATASET_FILE, sep=";")
ega.DATASET = dataset_df
pso.DATASET = dataset_df
ega_copy.DATASET = dataset_df
pso_copy.DATASET = dataset_df

ega_runs, pso_runs, ega_copy_runs, pso_copy_runs = [], [], [], []

with contextlib.redirect_stdout(io.StringIO()):
    for _ in tqdm(range(RUNS), desc="Running GA and PSO"):
        timestamp = datetime.datetime.now().strftime("run_%Y%m%d_%H%M%S")

        ega.RUN_TIME = timestamp
        ega.BASE_DIR = "output/mono_vs_benchmarks/ega"
        ega.NUMBER_OF_GENERATIONS = 300
        df_ega, _ = ega.execute()
        ega_runs.append(df_ega["score"])

        pso.RUN_TIME = timestamp
        pso.BASE_DIR = "output/mono_vs_benchmarks/pso"
        pso.NUMBER_OF_ITERATIONS = 300
        df_pso, _ = pso.execute()
        pso_runs.append(df_pso["score"])

        ega_copy.RUN_TIME = timestamp
        ega_copy.BASE_DIR = "output/mono_vs_benchmarks/ega_copy"
        ega_copy.NUMBER_OF_GENERATIONS = 300
        df_ega_copy, _ = ega_copy.execute()
        ega_copy_runs.append(df_ega_copy["score"])   

        pso_copy.RUN_TIME = timestamp
        pso_copy.BASE_DIR = "output/mono_vs_benchmarks/pso_copy"
        pso.NUMBER_OF_ITERATIONS = 300
        df_pso_copy, _ = pso_copy.execute()
        pso_copy_runs.append(df_pso_copy["score"])

ega_final_scores = [run.iloc[-1] for run in ega_runs]
pso_final_scores = [run.iloc[-1] for run in pso_runs]
ega_copy_final_scores = [run.iloc[-1] for run in ega_copy_runs]
pso_copy_final_scores = [run.iloc[-1] for run in pso_copy_runs]

stat_ga_self, pval_ga_self = wilcoxon(ega_final_scores, [SELF_SELECTED_SCORE] * len(ega_final_scores))
stat_pso_self, pval_pso_self = wilcoxon(pso_final_scores, [SELF_SELECTED_SCORE] * len(pso_final_scores))
stat_ga_copy_self, pval_ga_copy_self = wilcoxon(ega_copy_final_scores, [SELF_SELECTED_SCORE] * len(ega_copy_final_scores))
stat_pso_copy_self, pval_pso_copy_self = wilcoxon(pso_copy_final_scores, [SELF_SELECTED_SCORE] * len(pso_copy_final_scores))

random_scores = []
for _ in range(RUNS):
    arrangement = None
    while (arrangement is None):
        arrangement = pso.create_random_teams(dataset_df)
    if arrangement:
        with contextlib.redirect_stdout(io.StringIO()):
            score = ff.evaluate_all_teams(arrangement)
            random_scores.append(score)
RANDOM_SCORE = sum(random_scores) / len(random_scores)

ega_avg = pd.concat(ega_runs, axis=1).mean(axis=1)
pso_avg = pd.concat(pso_runs, axis=1).mean(axis=1)
ega_copy_avg = pd.concat(ega_copy_runs, axis=1).mean(axis=1)
pso_copy_avg = pd.concat(pso_copy_runs, axis=1).mean(axis=1)
generations = ega_avg.index

plt.figure(figsize=(10, 6))
plt.plot(generations, ega_avg, label="GA", linestyle="-")
plt.plot(generations, pso_avg, label="PSO", linestyle="-")
plt.plot(generations, ega_copy_avg, label="Greedy GA", linestyle="-", color="red")
plt.plot(generations, pso_copy_avg, label="Greedy PSO", linestyle="-", color="red")
plt.hlines(SELF_SELECTED_SCORE, generations[0], generations[-1], colors="purple", linestyles="dashed", label="Self-Selected")
plt.hlines(RANDOM_SCORE, generations[0], generations[-1], colors="gray", linestyles="dotted", label="Random Arrangement")
plt.title("GA and PSO vs. Benchmarks on Original Dataset")
plt.xlabel("Generation/Iteration")
plt.ylabel("Fitness Function Score")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("figures/fig_ga_pso_vs_self_selected.png")
plt.show()

print("\nStatistical Comparison to Benchmarks:")
print(f"GA vs Self-Selected: statistic={stat_ga_self}, p={pval_ga_self}")
print(f"PSO vs Self-Selected: statistic={stat_pso_self}, p={pval_pso_self}")
print(f"Greedy GA vs Self-Selected: statistic={stat_ga_copy_self}, p={pval_ga_copy_self}")
print(f"Greedy PSO vs Self-Selected: statistic={stat_pso_copy_self}, p={pval_pso_copy_self}")