## Performance Metrics Calculation

This notebook evaluates and compares the performance of two genetic algorithms, **GA-MSM** and **GA-MSM-P**, across two research questions:

1. **RQ1: Evaluating Noise Types as Mutation Operators**  
   Gaussian, Pink, and Ornstein-Uhlenbeck (OU) noise are used as mutation operators in GA-MSM and GA-MSM-P. The algorithms are tested on the **Frozen Lake environment**, with the outputs stored in the corresponding `.xlsx` files created by the algorithms from the **Code folder**. Statistical significance is analyzed to assess the performance differences between the noise types.

2. **RQ2: Investigating Pink Noise Injection in Input Space**  
   Pink noise is injected into the **continuous input space** of the **CartPole environment** at different scale values (σ = 1.0, 0.5, 0.1). Runs without pink noise injection serve as a baseline. The `.xlsx` files containing these results are also output by the algorithms from the **Code folder**.

### Methodology
- For both research questions, the **median scores of average population fitness** are calculated across multiple runs of the algorithms.
- **Convergence** at which generation is determined using predefined thresholds for fitness stability and minimum fitness values. 


In [1]:
import numpy as np

# function to find convergence
def find_convergence(data, threshold, minimum_fitness, window=10):
    for gen in range(len(data) - window):
        if max(data[gen:gen + window]) - min(data[gen:gen + window]) < threshold:
            # Ensure the average fitness exceeds a meaningful threshold to prevent detecting convergence in cases where
            # the GA is still exploring and has not yet found individuals with significant fitness. This guards against 
            # premature convergence caused by initial exploration phases.
            if np.mean(data[gen:gen + window]) < minimum_fitness:
                continue
            return gen, data[gen]
    return -1, None  # No convergence found


In [2]:
# calculating medians

import pandas as pd

number_of_generations = 200
number_of_runs = 10

df1 = pd.read_excel("Gaussian_FrozenLake_GAMSMP.xlsx", sheet_name='Sheet1', header=None)
df2 = pd.read_excel("Pink_FrozenLake_GAMSMP.xlsx", sheet_name='Sheet1', header=None)
df3 = pd.read_excel("OU_FrozenLake_GAMSMP.xlsx", sheet_name='Sheet1', header=None)

all_run_data1 = df1.values.tolist()
all_run_data2 = df2.values.tolist()
all_run_data3 = df3.values.tolist()

median_array = []
final_data1 = []
final_data2 = []
final_data3 = []

# Gaussian
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data1[run_number][gen])
    final_data1.append(np.median(median_array))
    median_array = []

median_array = []

# Pink
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data2[run_number][gen])
    final_data2.append(np.median(median_array))
    median_array = []

median_array = []

# OU
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data3[run_number][gen])
    final_data3.append(np.median(median_array))
    median_array = []

In [3]:
threshold = 0.03
minimum_fitness = 0.2
convergence1 = find_convergence(final_data1, threshold, minimum_fitness)
convergence2 = find_convergence(final_data2, threshold, minimum_fitness)
convergence3 = find_convergence(final_data3, threshold, minimum_fitness)

print(f"Convergence for Gaussian (GA-MSM-P): Generation {convergence1[0]}, Avg Fitness: {convergence1[1]}")
print(f"Convergence for Pink (GA-MSM-P): Generation {convergence2[0]}, Avg Fitness: {convergence2[1]}")
print(f"Convergence for OU (GA-MSM-P): Generation {convergence3[0]}, Avg Fitness: {convergence3[1]}")

Convergence for Gaussian (GA-MSM-P): Generation 91, Avg Fitness: 0.98
Convergence for Pink (GA-MSM-P): Generation 100, Avg Fitness: 0.98
Convergence for OU (GA-MSM-P): Generation 115, Avg Fitness: 0.955


In [4]:
# calculating medians

import pandas as pd

df1 = pd.read_excel("Gaussian_FrozenLake_GAMSM.xlsx", sheet_name='Sheet1', header=None)
df2 = pd.read_excel("Pink_FrozenLake_GAMSM.xlsx", sheet_name='Sheet1', header=None)
df3 = pd.read_excel("OU_FrozenLake_GAMSM.xlsx", sheet_name='Sheet1', header=None)

all_run_data1 = df1.values.tolist()
all_run_data2 = df2.values.tolist()
all_run_data3 = df3.values.tolist()

median_array = []
final_data1 = []
final_data2 = []
final_data3 = []

# Gaussian
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data1[run_number][gen])
    final_data1.append(np.median(median_array))
    median_array = []

median_array = []

# Pink
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data2[run_number][gen])
    final_data2.append(np.median(median_array))
    median_array = []

median_array = []

# OU
for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data3[run_number][gen])
    final_data3.append(np.median(median_array))
    median_array = []

In [5]:
threshold = 0.03
minimum_fitness = 0.2
convergence1 = find_convergence(final_data1, threshold, minimum_fitness)
convergence2 = find_convergence(final_data2, threshold, minimum_fitness)
convergence3 = find_convergence(final_data3, threshold, minimum_fitness)

print(f"Convergence for Gaussian (GA-MSM): Generation {convergence1[0]}, Avg Fitness: {convergence1[1]}")
print(f"Convergence for Pink (GA-MSM): Generation {convergence2[0]}, Avg Fitness: {convergence2[1]}")
print(f"Convergence for OU (GA-MSM): Generation {convergence3[0]}, Avg Fitness: {convergence3[1]}")

Convergence for Gaussian (GA-MSM): Generation 135, Avg Fitness: 0.985
Convergence for Pink (GA-MSM): Generation 129, Avg Fitness: 1.0
Convergence for OU (GA-MSM): Generation 140, Avg Fitness: 0.975


In [6]:
# calculating medians

import pandas as pd

number_of_runs = 5
number_of_generations = 100

df1 = pd.read_excel("scale1GAMSM.xlsx", sheet_name='Sheet1', header=None)
df2 = pd.read_excel("scale2GAMSM.xlsx", sheet_name='Sheet1', header=None)
df3 = pd.read_excel("scale3GAMSM.xlsx", sheet_name='Sheet1', header=None)
df4 = pd.read_excel("scale0GAMSM.xlsx", sheet_name='Sheet1', header=None)

all_run_data1 = df1.values.tolist()
all_run_data2 = df2.values.tolist()
all_run_data3 = df3.values.tolist()
all_run_data4 = df4.values.tolist()

median_array = []
final_data1 = []
final_data2 = []
final_data3 = []
final_data4 = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data1[run_number][gen])
    final_data1.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data2[run_number][gen])
    final_data2.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data3[run_number][gen])
    final_data3.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data4[run_number][gen])
    final_data4.append(np.median(median_array))
    median_array = []

In [7]:
threshold = 5
minimum_fitness = 25

convergence1 = find_convergence(final_data1, threshold, minimum_fitness)
convergence2 = find_convergence(final_data2, threshold, minimum_fitness)
convergence3 = find_convergence(final_data3, threshold, minimum_fitness)
convergence4 = find_convergence(final_data4, threshold, minimum_fitness)

print(f"Convergence for Pink Noise Injection Scale = 1.0 (GA-MSM) : Generation {convergence1[0]}, Avg Fitness: {convergence1[1]}")
print(f"Convergence for Pink Noise Injection Scale = 0.5 (GA-MSM) : Generation {convergence2[0]}, Avg Fitness: {convergence2[1]}")
print(f"Convergence for Pink Noise Injection Scale = 0.1 (GA-MSM) : Generation {convergence3[0]}, Avg Fitness: {convergence3[1]}")
print(f"Convergence for No Pink Noise Injection (GA-MSM): Generation {convergence4[0]}, Avg Fitness: {convergence4[1]}")

Convergence for Pink Noise Injection Scale = 1.0 (GA-MSM) : Generation 16, Avg Fitness: 28.56400000000001
Convergence for Pink Noise Injection Scale = 0.5 (GA-MSM) : Generation 51, Avg Fitness: 80.95499999999998
Convergence for Pink Noise Injection Scale = 0.1 (GA-MSM) : Generation 51, Avg Fitness: 193.892
Convergence for No Pink Noise Injection (GA-MSM): Generation 55, Avg Fitness: 195.568


In [8]:
# calculating medians

import pandas as pd

df1 = pd.read_excel("scale1GAMSMP.xlsx", sheet_name='Sheet1', header=None)
df2 = pd.read_excel("scale2GAMSMP.xlsx", sheet_name='Sheet1', header=None)
df3 = pd.read_excel("scale3GAMSMP.xlsx", sheet_name='Sheet1', header=None)
df4 = pd.read_excel("scale0GAMSMP.xlsx", sheet_name='Sheet1', header=None)

all_run_data1 = df1.values.tolist()
all_run_data2 = df2.values.tolist()
all_run_data3 = df3.values.tolist()
all_run_data4 = df4.values.tolist()

median_array = []
final_data1 = []
final_data2 = []
final_data3 = []
final_data4 = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data1[run_number][gen])
    final_data1.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data2[run_number][gen])
    final_data2.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data3[run_number][gen])
    final_data3.append(np.median(median_array))
    median_array = []

median_array = []

for gen in range(number_of_generations):
    for run_number in range(number_of_runs):
         median_array.append(all_run_data4[run_number][gen])
    final_data4.append(np.median(median_array))
    median_array = []

In [9]:
threshold = 5
minimum_fitness = 25

convergence1 = find_convergence(final_data1, threshold, minimum_fitness)
convergence2 = find_convergence(final_data2, threshold, minimum_fitness)
convergence3 = find_convergence(final_data3, threshold, minimum_fitness)
convergence4 = find_convergence(final_data4, threshold, minimum_fitness)

print(f"Convergence for Pink Noise Injection Scale = 1.0 (GA-MSM-P) : Generation {convergence1[0]}, Avg Fitness: {convergence1[1]}")
print(f"Convergence for Pink Noise Injection Scale = 0.5 (GA-MSM-P) : Generation {convergence2[0]}, Avg Fitness: {convergence2[1]}")
print(f"Convergence for Pink Noise Injection Scale = 0.1 (GA-MSM-P) : Generation {convergence3[0]}, Avg Fitness: {convergence3[1]}")
print(f"Convergence for No Pink Noise Injection (GA-MSM-P): Generation {convergence4[0]}, Avg Fitness: {convergence4[1]}")

Convergence for Pink Noise Injection Scale = 1.0 (GA-MSM-P) : Generation 27, Avg Fitness: 37.72800000000001
Convergence for Pink Noise Injection Scale = 0.5 (GA-MSM-P) : Generation 45, Avg Fitness: 94.09500000000001
Convergence for Pink Noise Injection Scale = 0.1 (GA-MSM-P) : Generation 38, Avg Fitness: 192.124
Convergence for No Pink Noise Injection (GA-MSM-P): Generation 38, Avg Fitness: 192.126
