# GA & PSO Comparison

In [12]:
import pandas as pd
from scipy.stats import wilcoxon
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")

## Load GA & PSO Results

In [13]:
# Paths to results
GA_RESULTS_PATH = "experiments/results/ga/"
PSO_RESULTS_PATH = "experiments/results/pso/"

# Load results
ga_results = pd.read_csv(GA_RESULTS_PATH)
pso_results = pd.read_csv(PSO_RESULTS_PATH)

# Add algorithm labels
ga_results["algorithm"] = "GA"
pso_results["algorithm"] = "PSO"

# Combine results
combined_results = pd.concat([ga_results, pso_results], ignore_index=True)

PermissionError: [Errno 13] Permission denied: 'experiments/results/ga/'

## Plot Boxplots for Fitness & Evaluations

In [14]:
# Plot fitness boxplots
plt.figure(figsize=(12, 8))
sns.boxplot(x="num_companies", y="sharpe_ratio", hue="algorithm", data=combined_results, palette={"GA": "blue", "PSO": "orange"})
plt.title("Fitness Comparison by Portfolio Size")
plt.xlabel("Number of Assets")
plt.ylabel("Sharpe Ratio")
plt.legend(title="Algorithm")
plt.show()

# Plot evaluations boxplots
plt.figure(figsize=(12, 8))
sns.boxplot(x="num_companies", y="n_evaluations", hue="algorithm", data=combined_results, palette={"GA": "blue", "PSO": "orange"})
plt.title("Evaluations Comparison by Portfolio Size")
plt.xlabel("Number of Assets")
plt.ylabel("Number of Evaluations")
plt.legend(title="Algorithm")
plt.show()

NameError: name 'combined_results' is not defined

<Figure size 1200x800 with 0 Axes>

## Wilcoxon Signed-Rank Test for Pairwise Comparison

In [None]:
alpha = 0.05

# Iterate over unique portfolio sizes
for num_companies in combined_results["num_companies"].unique():
    print(f"\nPortfolio Size: {num_companies}")

    # Filter results for the current portfolio size
    ga_data = combined_results[(combined_results["num_companies"] == num_companies) & (combined_results["algorithm"] == "GA")]
    pso_data = combined_results[(combined_results["num_companies"] == num_companies) & (combined_results["algorithm"] == "PSO")]

    # Perform Wilcoxon test for fitness
    stat, p_value = wilcoxon(ga_data["sharpe_ratio"], pso_data["sharpe_ratio"])
    print(f"Fitness - GA Mean: {ga_data['sharpe_ratio'].mean():.4f}, PSO Mean: {pso_data['sharpe_ratio'].mean():.4f}")
    print("The difference in fitness is statistically significant." if p_value < alpha else "The difference in fitness is NOT statistically significant.")

    # Perform Wilcoxon test for evaluations
    stat, p_value = wilcoxon(ga_data["n_evaluations"], pso_data["n_evaluations"])
    print(f"Evaluations - GA Mean: {ga_data['n_evaluations'].mean():.4f}, PSO Mean: {pso_data['n_evaluations'].mean():.4f}")
    print("The difference in evaluations is statistically significant." if p_value < alpha else "The difference in evaluations is NOT statistically significant.")


## Friedman Test for Overall Algorithm Comparison

In [None]:
from scipy.stats import friedmanchisquare

# Prepare data for Friedman test
fitness_data = [
    combined_results[(combined_results["num_companies"] == num_companies) & (combined_results["algorithm"] == "GA")]["sharpe_ratio"].values
    for num_companies in combined_results["num_companies"].unique()
] + [
    combined_results[(combined_results["num_companies"] == num_companies) & (combined_results["algorithm"] == "PSO")]["sharpe_ratio"].values
    for num_companies in combined_results["num_companies"].unique()
]

# Perform Friedman test for fitness
stat, p_value = friedmanchisquare(*fitness_data)
print("Friedman Test for Fitness")
print(f"Statistic: {stat:.4f}, P-Value: {p_value:.4f}")
print("The difference in fitness across algorithms is statistically significant." if p_value < alpha else "The difference in fitness across algorithms is NOT statistically significant.")

## Fine-Tuning

In [22]:
from scipy.stats import wilcoxon, friedmanchisquare, ttest_rel, shapiro

# Compare configurations using statistical tests
def compare_configs(df):
    print("Performing statistical comparisons...")
    try:
        # Pivot the DataFrame to compare algorithms by quality
        sharpe_values = df.pivot(index="quality", columns="algorithm", values="mean_sharpe")

        # Check if both algorithms are present
        if "GA" not in sharpe_values.columns or "PSO" not in sharpe_values.columns:
            print("One of the algorithms (GA or PSO) is missing from the selected configurations.")
            print("Statistical comparison cannot be performed without both algorithms.")
            return

        # Ensure both columns have equal length
        if len(sharpe_values["GA"]) != len(sharpe_values["PSO"]):
            print("❌ GA and PSO configurations have unequal lengths, skipping paired tests.")
        else:
            # Check for normality using Shapiro-Wilk test
            normal_ga = shapiro(sharpe_values["GA"]).pvalue > 0.05
            normal_pso = shapiro(sharpe_values["PSO"]).pvalue > 0.05
            
            if normal_ga and normal_pso:
                try:
                    # Perform Paired t-test
                    stat, p_value = ttest_rel(sharpe_values["GA"], sharpe_values["PSO"])
                    print(f"Paired t-test between GA and PSO: statistic={stat:.4f}, p-value={p_value:.4f}")
                except Exception as e:
                    print(f"❌ Error during t-test: {e}")
            else:
                try:
                    # Fallback to Wilcoxon if normality is not met
                    stat, p_value = wilcoxon(sharpe_values["GA"], sharpe_values["PSO"])
                    print(f"Wilcoxon Test between GA and PSO: statistic={stat:.4f}, p-value={p_value:.4f}")
                except Exception as e:
                    print(f"❌ Error during Wilcoxon test: {e}")

        # Check the number of configurations before performing Friedman Test
        if len(sharpe_values.columns) < 3:
            print("Friedman test requires at least 3 sets of configurations. Skipping Friedman test.")
            return
        
        # Perform the Friedman Test across all available configurations
        try:
            stat, p_value = friedmanchisquare(*[sharpe_values[algo].dropna() for algo in sharpe_values.columns])
            print(f"Friedman Test across configurations: statistic={stat:.4f}, p-value={p_value:.4f}")
        except Exception as e:
            print(f"❌ Error during Friedman test: {e}")

    except Exception as e:
        print(f"❌ Error during statistical comparison setup: {e}")


# Load the selected configurations and compare
try:
    file_path = get_results_path("selected_configs.csv")
    print(f"Loading selected configurations from: {file_path}")
    selected_configs = pd.read_csv(file_path)
    compare_configs(selected_configs)
except FileNotFoundError:
    print("❌ Selected configurations not found.")

Loading selected configurations from: c:\Users\lara\BAO_Portfolio-Optimization-Problem\experiments\results\selected_configs.csv
Performing statistical comparisons...
Wilcoxon Test between GA and PSO: statistic=0.0000, p-value=0.2500
Friedman test requires at least 3 sets of configurations. Skipping Friedman test.
