In [11]:
import csv
from itertools import combinations
from bisect import bisect_left
from typing import List
import numpy as np
import pandas as pd
import scipy.stats as ss
import os
from pandas import Categorical
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

name_map = {'NSGAII-2': r'\nsgatwo', 'QLearningExplore': r'\qexp', 'QLearning': r'\qb',
           'Chebycheff': r'\qch', 'PQ-HV': r'\pqhv', 'PQ-PO': r'\pqpo', 'Tournament': r'\qw',
           'Chebycheff-10': r'\qch', 'PQ-HV-Local10': r'\pqhv', 'PQ-PO-Local10': r'\pqpo', 'Tournament-Local10': r'\qw'}

def VD_A(treatment: List[float], control: List[float]):
    """
    Computes Vargha and Delaney A index
    :param treatment: a numeric list
    :param control: another numeric list
    :returns the value estimate and the magnitude
    """
    m = len(treatment)
    n = len(control)

    if m != n:
        raise ValueError("Data d and f must have the same length")

    r = ss.rankdata(treatment + control)
    r1 = sum(r[0:m])

    # Compute the measure
    # A = (r1/m - (m+1)/2)/n # formula (14) in Vargha and Delaney, 2000
    # equivalent formula to avoid accuracy errors
    A = (2 * r1 - m * (m + 1)) / (2 * n * m)

    levels = [0.147, 0.33, 0.474]  # effect sizes from Hess and Kromrey, 2004
    magnitude = ["negligible", "small", "medium", "large"]
    scaled_A = (A - 0.5) * 2

    magnitude = magnitude[bisect_left(levels, abs(scaled_A))]
    estimate = A

    return estimate, magnitude


def read_algorithm_data(csv_file):
    with open(csv_file, 'r', newline='') as file:
        reader = csv.reader(file)
        headers = next(reader)
        sorted_headers = sorted(headers)
        
        # Initialize the dictionary with sorted headers
        algorithm_data = {header: [] for header in sorted_headers}
        
        for row in reader:
            # Reorder row data according to sorted headers
            row_data = {headers[i]: float(value) for i, value in enumerate(row)}
            for header in sorted_headers:
                algorithm_data[header].append(row_data[header])
    
    return algorithm_data


def iterate_algorithm_pairs(algorithm_data, print_pairwise_sig=True):
    algorithm_names = list(algorithm_data.keys())
    pairs = list(combinations(algorithm_names, 2))
    matrix = pd.DataFrame(index=[name_map[i] for i in algorithm_data.keys()], columns=[name_map[i] for i in algorithm_data.keys()])
    matrix_sig = pd.DataFrame(index=[name_map[i] for i in algorithm_data.keys()], columns=[name_map[i] for i in algorithm_data.keys()])

    for pair in pairs:
        a1 = pair[0]       
        a2 = pair[1]
        data1 = algorithm_data[a1]
        data2 = algorithm_data[a2]
        a1_n = name_map[pair[0]]
        a2_n = name_map[pair[1]]
        matrix.at[a1_n, a2_n] = VD_A(data2, data1)[0]
        matrix.at[a2_n, a1_n] = VD_A(data1, data2)[0]
                
        rstat, p  = ss.ranksums(data1, data2)
        is_significant = 1 if p < .05 else 0
        matrix_sig.at[a1_n, a2_n] = str(is_significant) + "(" + f"{p:.6f}" + ")" 


       # print(f"p: {p:.3f}, Stat: {rstat:.3f}")
    #print(matrix.to_markdown()) 
    if print_pairwise_sig:
        print(matrix_sig.to_markdown())
    return matrix
    
def find_files(directory, target_filename="hvs_runs.csv"):
    """Recursively find all files named 'x.txt' in the given directory and subdirectories."""
    matching_files = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == target_filename:
                matching_files.append(os.path.join(root, file))

    return matching_files

### Significance tests

In [12]:
path = os.path.join('.', 'data')
hvs_files = find_files(path)
matrices = []

for path, algo_dat in {os.path.dirname(f): read_algorithm_data(f) for f in hvs_files}.items():
    print("-----------------------------------------------------------------------------")
    print(os.path.basename(path))
    print("-----------------------------------------------------------------------------")
    matrix = iterate_algorithm_pairs(algo_dat)
    matrices.append(matrix)

c_m = sum(matrices) / len(matrices)
print("Pooled mean effect size")
print(c_m.to_markdown())  
#print(c_m.to_latex(na_rep='-', float_format="%.3f"))

-----------------------------------------------------------------------------
model_fifty_stacks
-----------------------------------------------------------------------------
|          |   \nsgatwo | \qb         | \qexp       |
|:---------|-----------:|:------------|:------------|
| \nsgatwo |        nan | 1(0.000000) | 1(0.000000) |
| \qb      |        nan | nan         | 1(0.000000) |
| \qexp    |        nan | nan         | nan         |
-----------------------------------------------------------------------------
model_five_stacks
-----------------------------------------------------------------------------
|          |   \nsgatwo | \qb         | \qexp       |
|:---------|-----------:|:------------|:------------|
| \nsgatwo |        nan | 0(0.052775) | 1(0.000001) |
| \qb      |        nan | nan         | 1(0.000003) |
| \qexp    |        nan | nan         | nan         |
-----------------------------------------------------------------------------
model_twentyfive_stacks
---------

### Effect size

In [13]:
path = os.path.join('.', 'data')
case_study = 'slb'
path = os.path.join(path, case_study)
hvs_files = find_files(path)
matrices = []

print("-----------------------------------------------------------------------------")
print(case_study)
print("-----------------------------------------------------------------------------")

for path, algo_dat in {os.path.dirname(f): read_algorithm_data(f) for f in hvs_files}.items():
    
    matrix = iterate_algorithm_pairs(algo_dat, False)
    matrices.append(matrix)

c_m = sum(matrices) / len(matrices)
print("Pooled mean effect size")
print(c_m.to_markdown())  


-----------------------------------------------------------------------------
slb
-----------------------------------------------------------------------------
Pooled mean effect size
|          |    \nsgatwo |         \qb |      \qexp |
|:---------|------------:|------------:|-----------:|
| \nsgatwo | nan         |   0.201389  |   0.950833 |
| \qb      |   0.798611  | nan         |   0.956111 |
| \qexp    |   0.0491667 |   0.0438889 | nan        |
