In [64]:
!pip install utils





In [65]:
from scipy.stats import mannwhitneyu
import itertools
import numpy as np
from collections import defaultdict
from typing import Callable, Optional
import csv
import pandas as pd
import os
import utils

import utils


def is_valid_data_file(file_name:str) -> bool:
    return file_name.endswith("json") or file_name.endswith("txt")


def get_mean_for_combinations(df: pd.DataFrame, 
                       independent_variables: list[str], 
                       dependent_variables: list[str]) -> pd.DataFrame:

    # ensure all the columns are present in the df
    for col in independent_variables+dependent_variables:
        if col not in df:
            raise Exception(f"The column {col} is not in the dataframe\n\t(columns are {list(df.columns)})")
    assert(all(col in df for col in independent_variables))
    assert(dependent_variable in df for dependent_variable in dependent_variables)
    
    grouped = df.groupby(independent_variables, dropna=False)[dependent_variables].mean().reset_index()
    
    return grouped


import json
import os


def json_to_entries(data: dict):
    def item_to_list_of_entries(item) -> list[dict]:
        # Skip if 'results_by_tree' is not present
        if "results_by_tree" not in item:
            return []

        problem_name = item["problem_name"]
        pRef_method = item["pRef_method"]
        pRef_size = item["sample_size"]
        #runtime_seconds = item.get("runtime_seconds", None)  # Extract runtime

        # Try to get runtime at either top-level or tree-level
        runtime_seconds_top = item.get("runtime_seconds", None)

        entries = item["results_by_tree"]

        def get_modified_entry(entry):
            entry["problem"] = item["problem_name"]
            entry["pRef_method"] = item["pRef_method"]
            entry["pRef_size"] = item["sample_size"]

            # pick runtime from tree-level if exists; else top-level
            runtime_sec_depth = entry.get("runtime_seconds", runtime_seconds_top)
            entry["runtime_seconds"] = runtime_sec_depth

            errors = entry["results"]
            entry |= errors
            del entry["results"]

            if "order_tree" in entry:
                del entry["order_tree"]

            return entry

        """
        entries = item["results_by_tree"]

        def get_modified_entry(entry):
            entry["problem"] = problem_name
            entry["pRef_method"] = pRef_method
            entry["pRef_size"] = pRef_size
            entry["runtime_seconds"] = runtime_seconds  # Add runtime to each entry

            errors = entry["results"]
            entry = entry | errors
            del entry["results"]

            if "order_tree" in entry:
                del entry["order_tree"]

            return entry
        """
        entries = list(map(get_modified_entry, entries))
        return entries

    return [entry for item in data for entry in item_to_list_of_entries(item)]

'''
def json_to_entries(data: dict):
    def item_to_list_of_entries(item) -> list[dict]:
        # Handle both results_by_tree and tree_settings_list
        if "results_by_tree" in item:
            # Original logic for results data
            problem_name = item["problem_name"]
            pRef_method = item["pRef_method"]
            pRef_size = item["sample_size"]
            entries = item["results_by_tree"]
            
            def get_modified_entry(entry):
                entry["problem"] = problem_name
                entry["pRef_method"] = pRef_method
                entry["pRef_size"] = pRef_size
                errors = entry["results"]
                entry = entry | errors
                del entry["results"]
                if "order_tree" in entry:
                    del entry["order_tree"]
                return entry
            
            return list(map(get_modified_entry, entries))
            
        elif "tree_settings_list" in item:
            # New logic for configuration data
            problem_name = item["problem_name"]
            pRef_method = item["pRef_method"]
            pRef_size = item["sample_size"]
            has_error = 'error' in item
            error_msg = item.get('error', '')
            
            entries = []
            for tree_setting in item["tree_settings_list"]:
                for depth in tree_setting["depths"]:
                    entry = {
                        'problem': problem_name,
                        'pRef_method': pRef_method,
                        'pRef_size': pRef_size,
                        'kind': tree_setting['kind'],
                        'depth': depth,
                        'ps_budget': tree_setting['ps_budget'],
                        'ps_population': tree_setting['ps_population'],
                        'avoid_ancestors': tree_setting['avoid_ancestors'],
                        'metrics': tree_setting['metrics'],
                        'has_error': has_error,
                        'error_message': error_msg,
                        # No actual results available
                        'mse': None,
                        'mae': None,
                        'r_sq': None,
                        'evs': None
                    }
                    entries.append(entry)
            return entries
        else:
            return []

    return [entry for item in data for entry in item_to_list_of_entries(item)]


def json_to_entries(data: dict):
    def item_to_list_of_entries(item) -> list[dict]:
        # Handle both results_by_tree and tree_settings_list
        if "results_by_tree" in item:
            # Original logic for results data
            problem_name = item["problem_name"]
            pRef_method = item["pRef_method"]
            pRef_size = item["sample_size"]
            entries = item["results_by_tree"]
            
            def get_modified_entry(entry):
                entry["problem"] = problem_name
                entry["pRef_method"] = pRef_method
                entry["pRef_size"] = pRef_size
                errors = entry["results"]
                entry = entry | errors
                del entry["results"]
                if "order_tree" in entry:
                    del entry["order_tree"]
                return entry
            
            return list(map(get_modified_entry, entries))
            
        elif "tree_settings_list" in item:
            # FIXED: Handle different tree types properly
            problem_name = item["problem_name"]
            pRef_method = item["pRef_method"]
            pRef_size = item["sample_size"]
            has_error = 'error' in item
            error_msg = item.get('error', '')
            
            entries = []
            for tree_setting in item["tree_settings_list"]:
                for depth in tree_setting["depths"]:
                    # Base entry with common fields
                    entry = {
                        'problem': problem_name,
                        'pRef_method': pRef_method,
                        'pRef_size': pRef_size,
                        'kind': tree_setting['kind'],
                        'depth': depth,
                        'has_error': has_error,
                        'error_message': error_msg,
                        # No actual results available
                        'mse': None,
                        'mae': None,
                        'r_sq': None,
                        'evs': None
                    }
                    
                    # Add tree-type-specific fields
                    if tree_setting['kind'] == 'ps':
                        entry.update({
                            'ps_budget': tree_setting.get('ps_budget', None),
                            'ps_population': tree_setting.get('ps_population', None),
                            'avoid_ancestors': tree_setting.get('avoid_ancestors', None),
                            'metrics': tree_setting.get('metrics', None),
                            'cp': None  # PS doesn't have cp
                        })
                    elif tree_setting['kind'] == 'iai':
                        entry.update({
                            'ps_budget': None,  # IAI doesn't have these
                            'ps_population': None,
                            'avoid_ancestors': None,
                            'metrics': None,
                            'cp': tree_setting.get('cp', None)
                        })
                    elif tree_setting['kind'] == 'naive':
                        entry.update({
                            'ps_budget': None,  # Naive doesn't have these
                            'ps_population': None,
                            'avoid_ancestors': None,
                            'metrics': None,
                            'cp': None
                        })
                    
                    entries.append(entry)
            return entries
        else:
            return []

    return [entry for item in data for entry in item_to_list_of_entries(item)]
'''
def convert_accuracy_data_to_df(input_directory, output_filename):

    all_dicts = []
    # Iterate through all files in the input directory
    for filename in os.listdir(input_directory):
        # Construct full file path
        file_path = os.path.join(input_directory, filename)

        # Check if the file is a JSON file
        if not os.path.isfile(file_path):
            continue

        if not is_valid_data_file(file_path):
            continue

        with open(file_path, 'r') as file:
            data = json.load(file)
            entries = json_to_entries(data)
            all_dicts.extend(entries)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(all_dicts)

    # Write the DataFrame to a CSV file
    df.to_csv(output_filename, index=False)
   
def json_to_tree_data(data: dict):
    def item_to_list_of_entries(item) -> list[dict]:
        # Skip if 'results_by_tree' is not present
        if "results_by_tree" not in item:
            return []

        surrounding_information = {
            "problem": item["problem_name"],
            "pRef_method": item["pRef_method"]
        }

        entries = item["results_by_tree"]
        entries = [thing for thing in entries if "order_tree" in thing]

        def convert_order_tree(order_tree, accumulator=None, current_depth=0):
            if accumulator is None:
                accumulator = defaultdict(list)
            accumulator[current_depth].append(order_tree["own"])
            if len(order_tree["matching"]) > 0:
                convert_order_tree(order_tree["matching"], accumulator, current_depth + 1)
            if len(order_tree["unmatching"]) > 0:
                convert_order_tree(order_tree["unmatching"], accumulator, current_depth + 1)
            return accumulator

        def convert_tree_to_averages_by_level(entry):
            ps_search_info = {
                "ps_budget": entry["ps_budget"],
                "ps_population": entry["ps_population"],
                "metrics": entry["metrics"]
            }
            tree_structure = entry["order_tree"]
            just_depths = convert_order_tree(tree_structure)
            core_info_trees = [
                {"depth": depth, "order": order}
                for depth in just_depths
                for order in just_depths[depth]
            ]
            core_info_trees = [surrounding_information | ps_search_info | core_tree for core_tree in core_info_trees]
            return core_info_trees

        entries = list(map(convert_tree_to_averages_by_level, entries))
        return entries

    return [entry for item in data for entry in item_to_list_of_entries(item)]


def convert_tree_data_to_df(input_directory, output_filename):

    all_dicts = []
    # Iterate through all files in the input directory
    for filename in os.listdir(input_directory):
        # Construct full file path
        file_path = os.path.join(input_directory, filename)

        # Check if the file is a JSON file
        if not os.path.isfile(file_path):
            continue

        if not is_valid_data_file(file_path):
            continue


        with open(file_path, 'r') as file:
            data = json.load(file)
            entries = json_to_tree_data(data)
            all_dicts.extend(entries)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(all_dicts)

    # Write the DataFrame to a CSV file
    df.to_csv(output_filename, index=False)
    
    
def filter_dataframe(df, **kwargs):
    df = df.copy()  # Make a copy of the DataFrame to avoid modifying the original
    for col, value in kwargs.items():
        if col in df.columns:
            df = df[df[col] == value]
        else:
            raise ValueError(f"Column '{col}' not found in dataframe.")
    return df
        

    
    

def prettify_kind_column(df):
    kind_dict = {#"variance":"PS-W",
                 #"variance estimated_atomicity": "PS-WA",
                 "simplicity variance": "PS-SW",
                 "simplicity variance estimated_atomicity" :"PS-SWA"}
    
    df['kind'] = df.apply(
    lambda row: (
        kind_dict[row['metrics']] if row['kind'] == 'ps' else
        'Trad.' if row['kind'] == 'naive' else
        'IAI' if row['kind'] == 'iai' else
        row['kind']
    ),
    axis=1
)
    
    

    
    

In [66]:
import os

In [67]:
#run_location = r"/Users/gian/Desktop/CondorResults/VDT/compareown/run3/"
#run_location = r"C:\Users\gac8\Desktop\CondorResults\VDT\compareown\all_final_runs"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_07-29-H15'm'15's16"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-07-H16'm'47's09"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-10-H01'm'12's39"
#A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_07-29-H15'm'15's16

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-19-H16'm'26's53"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-19-H23'm'35's39"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\iai_run_3_08-20-H01'm'00's52"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-24-H02'm'25's36"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\iai_run_3_08-24-H11'm'06's57"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-24-H02'm'25's36"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-26-H14'm'06's32"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\compare_own_data_08-27-H12'm'29's47"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\all_final_runs"

#aco and pso
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\all_final_runs"

#bbo aco bro cro
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\ACO+BBO+BRO+CRO"

#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\all_runs_5"
#run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\compare_own_data\all_output_files"
run_location = r"A:\metahuristic_benchmark\PS-descriptors\resources\variance_tree_materials\final_runs_with_time"

results_csv = os.path.join(run_location, "results.csv")
tree_data_csv = os.path.join(run_location, "tree_data.csv")

print("Regenerating CSV files with runtime data...")
convert_accuracy_data_to_df(run_location, results_csv)
convert_tree_data_to_df(run_location, tree_data_csv)

print("CSV files regenerated successfully!")

#convert_accuracy_data_to_df(os.path.join(run_location, "data"), results_csv)
#convert_tree_data_to_df(os.path.join(run_location, "data"), tree_data_csv)



Regenerating CSV files with runtime data...
CSV files regenerated successfully!
CSV files regenerated successfully!


In [68]:

accuracy_data = pd.read_csv(results_csv)
prettify_kind_column(accuracy_data)
#tree_data = pd.read_csv(tree_data_csv)

display(accuracy_data)
print(f"\nColumns in accuracy_data: {list(accuracy_data.columns)}")
print(f"\nData types:")
display(accuracy_data.dtypes)

# Check if runtime_seconds column exists and has data
if 'runtime_seconds' in accuracy_data.columns:
    print(f"\nRuntime data available:")
    print(f"Non-null runtime values: {accuracy_data['runtime_seconds'].notna().sum()}")
    print(f"Sample runtime values: {accuracy_data['runtime_seconds'].dropna().head()}")
else:
    print("\nWARNING: runtime_seconds column not found!")

#prettify_kind_column(tree_data)

for kind in accuracy_data["kind"].unique():
    matching_rows = accuracy_data[accuracy_data['kind'] == kind]
    print(f"For the tree kind {kind}, there are {matching_rows.shape[0]} rows")

#headers = "kind,depth,ps_budget,ps_population,avoid_ancestors,metrics,problem,pRef_method,mse,mae,r_sq,evs"

Unnamed: 0,kind,depth,ps_budget,ps_population,avoid_ancestors,metrics,problem,pRef_method,pRef_size,runtime_seconds,mse,mae,r_sq,evs,cp
0,PS-SW,3,5000.0,100.0,False,simplicity variance,BT,ACO,10000,192.815,32.766721,4.467219,0.788686,0.7888384,
1,PS-SWA,3,5000.0,100.0,False,simplicity variance estimated_atomicity,BT,ACO,10000,193.863,42.932327,5.210911,0.698525,0.7018001,
2,PS-SW,4,5000.0,100.0,False,simplicity variance,BT,ACO,10000,263.916,33.542649,4.589799,0.782642,0.7827871,
3,PS-SWA,4,5000.0,100.0,False,simplicity variance estimated_atomicity,BT,ACO,10000,253.667,69.556492,6.677427,0.559258,0.5659373,
4,PS-SW,5,5000.0,100.0,False,simplicity variance,BT,ACO,10000,342.126,27.422898,4.163483,0.856791,0.8579668,
5,PS-SWA,5,5000.0,100.0,False,simplicity variance estimated_atomicity,BT,ACO,10000,348.406,74.811064,6.747368,0.543089,0.5497223,
6,PS-SW,3,5000.0,100.0,False,simplicity variance,BT,BBO,10000,107.508,26.040557,3.490044,0.915158,0.9157665,
7,PS-SWA,3,5000.0,100.0,False,simplicity variance estimated_atomicity,BT,BBO,10000,116.953,38.68482,3.929768,0.867867,0.8679049,
8,PS-SW,4,5000.0,100.0,False,simplicity variance,BT,BBO,10000,174.128,26.962116,3.243058,0.932871,0.9329336,
9,PS-SWA,4,5000.0,100.0,False,simplicity variance estimated_atomicity,BT,BBO,10000,189.794,31.403089,2.639495,0.883411,0.8837935,



Columns in accuracy_data: ['kind', 'depth', 'ps_budget', 'ps_population', 'avoid_ancestors', 'metrics', 'problem', 'pRef_method', 'pRef_size', 'runtime_seconds', 'mse', 'mae', 'r_sq', 'evs', 'cp']

Data types:


kind                object
depth                int64
ps_budget          float64
ps_population      float64
avoid_ancestors     object
metrics             object
problem             object
pRef_method         object
pRef_size            int64
runtime_seconds    float64
mse                float64
mae                float64
r_sq               float64
evs                float64
cp                 float64
dtype: object


Runtime data available:
Non-null runtime values: 720
Sample runtime values: 0    192.815
1    193.863
2    263.916
3    253.667
4    342.126
Name: runtime_seconds, dtype: float64
For the tree kind PS-SW, there are 90 rows
For the tree kind PS-SWA, there are 90 rows
For the tree kind IAI, there are 90 rows
For the tree kind Trad., there are 450 rows


In [69]:
import pandas as pd

pd.set_option('display.max_rows', None)  # Show all rows


In [70]:

def generate_statistical_test_data(accuracy_data: pd.DataFrame, input_directory, output_filename):
    depths = [3, 4, 5]
    usable_data = filter_dataframe(accuracy_data, pRef_size=10000)
    usable_data = usable_data[usable_data["depth"].isin(depths)]
    
    result_column = "r_sq"
    
    # Check which methods are actually available in your data
    available_methods = usable_data["kind"].unique()
    print(f"Available methods in your data: {available_methods}")
    
    def winning_competitor_for_competition_and_values(problem: str, depth: int, metaheuristic: str):
        # Get data for all available methods
        method_data = {}
        for method in available_methods:
            data = filter_dataframe(usable_data, problem=problem, depth=depth, pRef_method=metaheuristic, kind=method)[result_column]
            if len(data) > 0:  # Only include methods with data
                method_data[method] = data
        
        # If we don't have enough methods to compare, return NaN
        if len(method_data) < 2:
            return {
                "problem": problem,
                "depth": depth,
                "metaheuristic": metaheuristic,
                "comparison": "insufficient_data",
                "p_value": float('nan'),
                "method_1": None,
                "method_2": None,
                "method_1_mean": float('nan'),
                "method_2_mean": float('nan')
            }
        
        # If we have PS-SW and PS-SWA, compare them
        if "PS-SW" in method_data and "PS-SWA" in method_data:
            p_value = mannwhitneyu(method_data["PS-SW"], method_data["PS-SWA"], alternative="two-sided").pvalue
            comparison = "PS-SW_vs_PS-SWA"
            method_1, method_2 = "PS-SW", "PS-SWA"
            method_1_mean = method_data["PS-SW"].mean()
            method_2_mean = method_data["PS-SWA"].mean()
        else:
            p_value = float('nan')
            comparison = "no_valid_comparison"
            method_1, method_2 = None, None
            method_1_mean, method_2_mean = float('nan'), float('nan')
        
        return {
            "problem": problem,
            "depth": depth,
            "metaheuristic": metaheuristic,
            "comparison": comparison,
            "p_value": p_value,
            "method_1": method_1,
            "method_2": method_2,
            "method_1_mean": method_1_mean,
            "method_2_mean": method_2_mean
        }
    
    all_problems = usable_data["problem"].unique()
    all_metaheuristics = usable_data



In [71]:
    
    
def generate_statistical_test_data(accuracy_data: pd.DataFrame, input_directory, output_filename):
    depths = [3, 4, 5]
    usable_data = filter_dataframe(accuracy_data, pRef_size = 10000)
    usable_data = usable_data[usable_data["depth"].isin(depths)]
    
    result_column = "r_sq"
    
    def winning_competitor_for_competition_and_values(problem: str, depth: int, metaheuristic: str) -> (str, np.ndarray):
        for_each_method = {tree_method: filter_dataframe(usable_data, problem = problem, depth = depth, pRef_method = metaheuristic, kind = tree_method)[result_column]
                           for tree_method in {"PS-SW", "PS-SWA", "IAI", "Trad."}}
        
        iai_average = np.average(for_each_method["IAI"])
        naive_average = np.average(for_each_method["Trad."])
        
        
        winning_competitor_method = "IAI" if iai_average > naive_average else "Trad."
        
        p_value_between_w_and_competitor = mannwhitneyu(for_each_method["PS-SW"], for_each_method[winning_competitor_method], alternative="greater").pvalue
        p_value_between_wa_and_competitor = mannwhitneyu(for_each_method["PS-SWA"], for_each_method[winning_competitor_method], alternative="greater").pvalue
        
        return {"problem": problem,
                "depth": depth,
                "metaheuristic": metaheuristic,
                "p_value_sw":p_value_between_w_and_competitor,
                "p_value_swa": p_value_between_wa_and_competitor,
                "winning_competitor": winning_competitor_method}
    
    
    
    all_problems = usable_data["problem"].unique()
    all_metaheuristics = usable_data["pRef_method"].unique()
    
    dicts = [winning_competitor_for_competition_and_values(problem=problem, depth = depth, metaheuristic=metaheuristic)
             for problem in all_problems
             for depth in depths
             for metaheuristic in all_metaheuristics]
    
    return pd.DataFrame(dicts)
        
        
    

statistical_data = generate_statistical_test_data(accuracy_data, None, None)

pivot_table = statistical_data.pivot_table(index=["problem", "depth", "metaheuristic"], 
                                            values =["p_value_sw", "p_value_swa"])
display(statistical_data)
display(pivot_table)

Unnamed: 0,problem,depth,metaheuristic,p_value_sw,p_value_swa,winning_competitor
0,BT,3,ACO,0.166667,0.5,Trad.
1,BT,3,BBO,0.5,0.5,IAI
2,BT,3,BRO,0.833333,0.166667,Trad.
3,BT,3,CRO,0.166667,0.5,Trad.
4,BT,3,PSO,0.166667,0.166667,Trad.
5,BT,4,ACO,0.333333,0.833333,Trad.
6,BT,4,BBO,0.5,1.0,IAI
7,BT,4,BRO,0.833333,1.0,Trad.
8,BT,4,CRO,0.166667,0.166667,Trad.
9,BT,4,PSO,0.5,0.5,IAI


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,p_value_sw,p_value_swa
problem,depth,metaheuristic,Unnamed: 3_level_1,Unnamed: 4_level_1
BT,3,ACO,0.166667,0.5
BT,3,BBO,0.5,0.5
BT,3,BRO,0.833333,0.166667
BT,3,CRO,0.166667,0.5
BT,3,PSO,0.166667,0.166667
BT,4,ACO,0.333333,0.833333
BT,4,BBO,0.5,1.0
BT,4,BRO,0.833333,1.0
BT,4,CRO,0.166667,0.166667
BT,4,PSO,0.5,0.5


In [72]:
def generate_statistical_test_data(accuracy_data: pd.DataFrame, input_directory, output_filename):
    depths = [3, 4, 5]
    usable_data = filter_dataframe(accuracy_data, pRef_size=10000)
    usable_data = usable_data[usable_data["depth"].isin(depths)]
    
    result_column = "r_sq"
    time_column = "runtime_seconds"

    def winning_competitor_for_competition_and_values(problem: str, depth: int, metaheuristic: str):
        # Collect R2 and Runtime data for each method
        for_each_method_r2 = {}
        for_each_method_time = {}
        for tree_method in {"PS-SW", "PS-SWA", "IAI", "Trad."}:
            subset = filter_dataframe(usable_data, problem=problem, depth=depth, pRef_method=metaheuristic, kind=tree_method)
            if len(subset) > 0:
                for_each_method_r2[tree_method] = subset[result_column]
                # Check if runtime_seconds column exists and has data
                if time_column in subset.columns and subset[time_column].notna().any():
                    for_each_method_time[tree_method] = subset[time_column]
                else:
                    for_each_method_time[tree_method] = pd.Series([np.nan])  # Use NaN if no runtime data
        
        # If both IAI and Trad. available, decide winner
        iai_average = np.average(for_each_method_r2.get("IAI", [np.nan]))
        trad_average = np.average(for_each_method_r2.get("Trad.", [np.nan]))
        winning_competitor_method = "IAI" if iai_average > trad_average else "Trad."
        
        # Calculate p-values comparing PS-SW and PS-SWA with winning competitor
        if "PS-SW" in for_each_method_r2 and winning_competitor_method in for_each_method_r2:
            p_sw = mannwhitneyu(for_each_method_r2["PS-SW"], for_each_method_r2[winning_competitor_method], alternative="greater").pvalue
        else:
            p_sw = np.nan
        
        if "PS-SWA" in for_each_method_r2 and winning_competitor_method in for_each_method_r2:
            p_swa = mannwhitneyu(for_each_method_r2["PS-SWA"], for_each_method_r2[winning_competitor_method], alternative="greater").pvalue
        else:
            p_swa = np.nan
        
        # ---- Compute runtime averages ----
        mean_times = {m: np.nanmean(v) for m, v in for_each_method_time.items() if len(v) > 0}
        time_sw = mean_times.get("PS-SW", np.nan)
        time_swa = mean_times.get("PS-SWA", np.nan)
        time_iai = mean_times.get("IAI", np.nan)
        time_trad = mean_times.get("Trad.", np.nan)
        
        return {
            "problem": problem,
            "depth": depth,
            "metaheuristic": metaheuristic,
            "winning_competitor": winning_competitor_method,
            "p_value_sw": p_sw,
            "p_value_swa": p_swa,
            "runtime_sw": time_sw,
            "runtime_swa": time_swa,
            "runtime_iai": time_iai,
            "runtime_trad": time_trad
        }
    
    all_problems = usable_data["problem"].unique()
    all_metaheuristics = usable_data["pRef_method"].unique()
    
    dicts = [
        winning_competitor_for_competition_and_values(problem=problem, depth=depth, metaheuristic=metaheuristic)
        for problem in all_problems
        for depth in depths
        for metaheuristic in all_metaheuristics
    ]
    
    return pd.DataFrame(dicts)


In [73]:
# First generate the statistical data
statistical_data = generate_statistical_test_data(accuracy_data, None, None)



# Create the pivot table
pivot_table = statistical_data.pivot_table(
    index=["problem", "depth", "metaheuristic"],
    values=["p_value_sw", "p_value_swa", "runtime_sw", "runtime_swa", "runtime_iai", "runtime_trad"]
)

# Rename the columns for display
pivot_table = pivot_table.rename(columns={
    'runtime_sw': 'execution_time (PS-SW)',
    'runtime_swa': 'execution_time (PS-SWA)'
})

display(pivot_table.round(3))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,p_value_sw,p_value_swa,runtime_iai,execution_time (PS-SW),execution_time (PS-SWA),runtime_trad
problem,depth,metaheuristic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BT,3,ACO,0.167,0.5,42.844,192.815,193.863,44.531
BT,3,BBO,0.5,0.5,18.995,107.508,116.953,14.169
BT,3,BRO,0.833,0.167,40.417,94.66,158.032,8.045
BT,3,CRO,0.167,0.5,7.739,69.685,75.176,4.435
BT,3,PSO,0.167,0.167,10.31,80.628,88.323,7.775
BT,4,ACO,0.333,0.833,42.844,263.916,253.667,44.531
BT,4,BBO,0.5,1.0,18.995,174.128,189.794,14.169
BT,4,BRO,0.833,1.0,40.417,161.668,238.044,8.045
BT,4,CRO,0.167,0.167,7.739,136.64,133.536,4.435
BT,4,PSO,0.5,0.5,10.31,146.919,155.086,7.775


In [74]:
# Statistical data is now generated in Cell 9
print("Statistical data generated successfully!")
print(f"Shape: {statistical_data.shape}")
print(f"Columns: {list(statistical_data.columns)}")


Statistical data generated successfully!
Shape: (90, 10)
Columns: ['problem', 'depth', 'metaheuristic', 'winning_competitor', 'p_value_sw', 'p_value_swa', 'runtime_sw', 'runtime_swa', 'runtime_iai', 'runtime_trad']


In [75]:
# === New Table with All Runtime Columns ===

pivot_runtime = statistical_data.pivot_table(
    index=["problem", "depth", "metaheuristic"],
    values=["runtime_iai", "runtime_sw", "runtime_swa", "runtime_trad"]
).reset_index()

# Rename columns more descriptively
pivot_runtime.rename(columns={
    "runtime_sw": "Execution Time (PS-SW)",
    "runtime_swa": "Execution Time (PS-SWA)",
    "runtime_iai": "Runtime (IAI)",
    "runtime_trad": "Runtime (Trad.)"
}, inplace=True)

# Display runtime table
display(pivot_runtime)


Unnamed: 0,problem,depth,metaheuristic,Runtime (IAI),Execution Time (PS-SW),Execution Time (PS-SWA),Runtime (Trad.)
0,BT,3,ACO,42.844,192.815,193.863,44.5312
1,BT,3,BBO,18.995,107.508,116.953,14.169
2,BT,3,BRO,40.417,94.66,158.032,8.045
3,BT,3,CRO,7.739,69.685,75.176,4.4352
4,BT,3,PSO,10.31,80.628,88.323,7.7754
5,BT,4,ACO,42.844,263.916,253.667,44.5312
6,BT,4,BBO,18.995,174.128,189.794,14.169
7,BT,4,BRO,40.417,161.668,238.044,8.045
8,BT,4,CRO,7.739,136.64,133.536,4.4352
9,BT,4,PSO,10.31,146.919,155.086,7.7754


In [76]:
'''
def bold_max(row):
    row_as_numbers = [float(item[:-1]) for item in row]
    max_number = max(row_as_numbers)
    
    return ['font-weight: bold' if item == max_number else '' for item in row_as_numbers]
'''

'''
def bold_max(row):
    row_as_numbers = []
    for item in row:
        if isinstance(item, str) and item.endswith("%"):
            # Remove % and convert string to float
            row_as_numbers.append(float(item[:-1]))
        else:
            # Already a number (int or float)
            row_as_numbers.append(float(item))
    max_number = max(row_as_numbers)
    return ['font-weight: bold' if num == max_number else '' for num in row_as_numbers]
'''
def bold_max(row):
    numbers = []
    # Only include percentage columns (R²), ignore time columns ending with 's'
    for item in row:
        if isinstance(item, str) and item.endswith("%"):
            try:
                numbers.append(float(item[:-1]))
            except ValueError:
                numbers.append(float("-inf"))
        else:
            numbers.append(float("-inf"))
    if not any([n > float("-inf") for n in numbers]):
        # No percentage columns in this row, don't bold anything
        return ["" for _ in row]
    max_number = max(numbers)
    return ['font-weight: bold' if num == max_number and num > float("-inf") else '' for num in numbers]


def style_pivot_table(pivot_table):
    custom_column_order = ['PS-SW', 'PS-SWA', 'IAI', 'Trad.']

    # Check if this is an accuracy table (R² values) or execution time table
    if any('execution_time' in str(col) for col in pivot_table.columns):
        # This is an execution time table - format as seconds
        pivot_table = pivot_table.round(2).astype(str) + "s"
    else:
        # This is an accuracy table - format as percentages
        pivot_table = pivot_table.mul(100).round(1).astype(str) + "%"

    # Reorder columns based on custom order (only for accuracy tables)
    if not any('execution_time' in str(col) for col in pivot_table.columns):
        pivot_table = pivot_table.reindex(columns=custom_column_order)

    styled_df = pivot_table.style.apply(bold_max, axis=1)

    return styled_df

def put_latex_tables_side_by_side(left_latex, right_latex):
    return r"\begin{tabular}{ccccccc}\hline"+left_latex+r"\\ \hline\end{tabular}\quad\begin{tabular}{ccccccc}\hline"+right_latex+r"\\ \hline\end{tabular}"

def fix_latex(input_string):
    # Replace '%' with '\%'
    replacements = {"%":"\\%",
                    "pRef_method":"Met.",
                    "{SA}": r"{\rotcell{SA}}", # note that SA is a subset of SAT\_50 etc.., so it causes some issues
                    "SAT_S": "SAT\_20",
                    "SAT_M": "SAT\_50",
                    "SAT_L": "SAT\_100",
                    "GC_L": "GC\_anna",
                    "GC_S": "GC\_jean",
                    "uniform": "RS",
                    "kind": "tree",
                    r"\multirow[c]{12}" : r"\hline \multirow[c]{12}",
                    r"& \multirow[c]{3}" : r"\cline{2-7} & \multirow[c]{3}",
                    r"} \cline{2-7}" : "} ",
                   "\\font-weightbold": "",
                    "≪": "\ll "}

    texts_to_rotate = ["problem", "BT", "GC\_anna", "GC\_jean", "SAT\_20",  "SAT\_50",  "SAT\_100", "Met.", "GA", "Tabu", "RS", "PSO", "depth"]  # MOD: added PSO to rotate label in LaTeX output

    for item_to_rotate in texts_to_rotate:
        replacements[item_to_rotate] = r"\rotcell{"+item_to_rotate+"}"

    modified_string = str(input_string)
    for orig, replacement in replacements.items():
        modified_string = modified_string.replace(orig, replacement)
    
    return modified_string


def pivot_table_as_latex(pivot_table):
    latex_text = pivot_table.to_latex(convert_css=True)
    latex_text = fix_latex(latex_text)
    return latex_text

def create_execution_time_pivot_table(statistical_data):
    """Create a pivot table specifically for execution times"""
    # Create pivot table with execution time columns
    pivot_table = statistical_data.pivot_table(
        index=["problem", "depth", "metaheuristic"],
        values=["runtime_sw", "runtime_swa", "runtime_iai", "runtime_trad"]
    )
    
    # Rename columns for better display
    pivot_table = pivot_table.rename(columns={
        'runtime_sw': 'execution_time (PS-SW)',
        'runtime_swa': 'execution_time (PS-SWA)',
        'runtime_iai': 'execution_time (IAI)',
        'runtime_trad': 'execution_time (Trad.)'
    })
    
    # Apply styling
    styled_df = style_pivot_table(pivot_table)
    return styled_df

In [77]:
def pivot_table_as_latex(pivot_table):
    latex_text = pivot_table.to_latex(convert_css=True)
    latex_text = fix_latex(latex_text)
    return latex_text

In [78]:
# Initialize parameters
pRef_size = 10000
depths = [3, 4, 5]

# Filter data
usable_data = accuracy_data.copy()
usable_data = usable_data[usable_data["pRef_size"] == pRef_size] 
usable_data = usable_data[usable_data["depth"].isin(depths)]

independent_variables = ["problem", "pRef_method", "kind", "depth"]
dependent_variables = ["r_sq"]

problems = ["GC_S", "GC_L", "SAT_S", "SAT_M", "SAT_L", "BT"]
left_problems, right_problems = problems[:3], problems[3:]

def make_combined_table_for_problems(problem_subset):
    methods = ['PS-SW', 'PS-SWA', 'IAI', 'Trad.']
    with_right_problems = usable_data[usable_data['problem'].isin(problem_subset)]

    # R2 pivot (flatten columns)
    accuracy_pivot = with_right_problems.pivot_table(
        index=["problem", "pRef_method", "depth"],
        columns="kind",
        values="r_sq"
    )
    # Flatten MultiIndex columns
    if isinstance(accuracy_pivot.columns, pd.MultiIndex):
        accuracy_pivot.columns = [c for c in accuracy_pivot.columns.get_level_values(0)]
    
    # Build runtime table from statistical_data and flatten/index by method
    execution_times = []
    for _, row in statistical_data.iterrows():
        if row['problem'] in problem_subset:
            execution_times.append({
                'problem': row['problem'],
                'pRef_method': row['metaheuristic'],
                'depth': row['depth'],
                'PS-SW': row['runtime_sw'],
                'PS-SWA': row['runtime_swa'],
                'IAI': row['runtime_iai'],
                'Trad.': row['runtime_trad'],
            })
    runtime_df = pd.DataFrame(execution_times)
    runtime_pivot = runtime_df.pivot_table(
        index=["problem", "pRef_method", "depth"],
        values=methods
    )
    runtime_pivot.rename(columns={
        m: f"Execution Time ({m})" for m in methods
    }, inplace=True)

    # Concatenate pivots side by side
    combined = pd.concat([accuracy_pivot, runtime_pivot], axis=1)

    # Interleave columns: [PS-SW, Execution Time (PS-SW), PS-SWA, Execution Time (PS-SWA), ...]
    new_order = []
    for m in methods:
        if m in combined.columns:
            new_order.append(m)
        runtime_col = f"Execution Time ({m})"
        if runtime_col in combined.columns:
            new_order.append(runtime_col)
    combined = combined[new_order]

    # Format columns
    for m in methods:
        if m in combined.columns:
            combined[m] = (combined[m].astype(float) * 100).round(1).astype(str) + '%'
        runtime_col = f"Execution Time ({m})"
        if runtime_col in combined.columns:
            combined[runtime_col] = combined[runtime_col].astype(float).round(2).astype(str) + "s"

    return combined.style.apply(bold_max, axis=1)


# Create tables
left_table = make_combined_table_for_problems(left_problems)
right_table = make_combined_table_for_problems(right_problems)

# Display tables
display(left_table)
display(right_table)

# Add diagnostic information
print("\nDiagnostic Information:")
print("Statistical Data Columns:", statistical_data.columns.tolist())
print("\nSample row from statistical_data:")
print(statistical_data.iloc[0] if len(statistical_data) > 0 else "No data")


left_table_latex = pivot_table_as_latex(left_table)
right_table_latex = pivot_table_as_latex(right_table)

full_table_latex = put_latex_tables_side_by_side(left_table_latex, right_table_latex)

print("left table:")
print(left_table_latex)

print("\n\n\n\n\n\nright table")
print(right_table_latex)





Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PS-SW,Execution Time (PS-SW),PS-SWA,Execution Time (PS-SWA),IAI,Execution Time (IAI),Trad.,Execution Time (Trad.)
problem,pRef_method,depth,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
GC_L,ACO,3,44.6%,105.98s,43.6%,96.53s,13.7%,17.22s,18.0%,18.76s
GC_L,ACO,4,33.2%,156.75s,15.0%,143.38s,13.7%,17.22s,25.2%,18.76s
GC_L,ACO,5,47.3%,249.26s,14.4%,240.38s,13.7%,17.22s,22.3%,18.76s
GC_L,BBO,3,89.0%,78.28s,72.1%,72.63s,45.5%,6.03s,77.3%,4.25s
GC_L,BBO,4,88.1%,167.0s,84.3%,136.7s,65.9%,6.03s,79.8%,4.25s
GC_L,BBO,5,85.6%,250.88s,89.6%,254.59s,67.0%,6.03s,78.5%,4.25s
GC_L,BRO,3,-0.0%,38.15s,39.3%,90.94s,46.8%,24.66s,57.7%,1.77s
GC_L,BRO,4,55.5%,123.79s,57.1%,166.2s,55.8%,24.66s,64.5%,1.77s
GC_L,BRO,5,66.2%,263.72s,74.3%,298.83s,65.2%,24.66s,70.1%,1.77s
GC_L,CRO,3,82.4%,59.43s,81.2%,59.44s,59.8%,3.37s,70.7%,0.96s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PS-SW,Execution Time (PS-SW),PS-SWA,Execution Time (PS-SWA),IAI,Execution Time (IAI),Trad.,Execution Time (Trad.)
problem,pRef_method,depth,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
BT,ACO,3,78.9%,192.82s,69.9%,193.86s,38.4%,42.84s,60.8%,44.53s
BT,ACO,4,78.3%,263.92s,55.9%,253.67s,57.3%,42.84s,68.2%,44.53s
BT,ACO,5,85.7%,342.13s,54.3%,348.41s,61.3%,42.84s,67.9%,44.53s
BT,BBO,3,91.5%,107.51s,86.8%,116.95s,84.8%,19.0s,81.8%,14.17s
BT,BBO,4,93.3%,174.13s,88.3%,189.79s,89.9%,19.0s,85.3%,14.17s
BT,BBO,5,95.4%,297.46s,92.6%,333.86s,90.6%,19.0s,87.3%,14.17s
BT,BRO,3,27.8%,94.66s,47.5%,158.03s,28.3%,40.42s,37.3%,8.04s
BT,BRO,4,31.6%,161.67s,21.5%,238.04s,38.0%,40.42s,42.8%,8.04s
BT,BRO,5,67.7%,5985.06s,20.8%,345.1s,46.5%,40.42s,45.6%,8.04s
BT,CRO,3,90.7%,69.68s,86.5%,75.18s,76.3%,7.74s,83.2%,4.44s



Diagnostic Information:
Statistical Data Columns: ['problem', 'depth', 'metaheuristic', 'winning_competitor', 'p_value_sw', 'p_value_swa', 'runtime_sw', 'runtime_swa', 'runtime_iai', 'runtime_trad']

Sample row from statistical_data:
problem                     BT
depth                        3
metaheuristic              ACO
winning_competitor       Trad.
p_value_sw            0.166667
p_value_swa                0.5
runtime_sw             192.815
runtime_swa            193.863
runtime_iai             42.844
runtime_trad           44.5312
Name: 0, dtype: object
left table:
\begin{tabular}{lllllllllll}
 &  &  & PS-SW & Execution Time (PS-SW) & PS-SWA & Execution Time (PS-SWA) & IAI & Execution Time (IAI) & Trad. & Execution Time (Trad.) \\
\rotcell{problem} & \rotcell{Met.} & \rotcell{depth} &  &  &  &  &  &  &  &  \\
\multirow[c]{15}{*}{\rotcell{GC\_anna}}  & \multirow[c]{3}{*}{ACO} & 3 & \bfseries 44.6\% & 105.98s & 43.6\% & 96.53s & 13.7\% & 17.22s & 18.0\% & 18.76s \\
 &  & 4 & \bfs

In [80]:
# Define methods for accuracy and for runtime separately
accuracy_methods = ['PS-SW', 'PS-SWA', 'IAI', 'Trad.']
runtime_methods = ['PS-SW', 'PS-SWA']  # exclude IAI and Trad. runtimes

# Create accuracy pivot table that includes all methods
accuracy_pivot = usable_data.pivot_table(
    index=["problem", "pRef_method", "depth"],
    columns="kind",
    values="r_sq"
)

# Flatten MultiIndex columns if present
if isinstance(accuracy_pivot.columns, pd.MultiIndex):
    accuracy_pivot.columns = [c for c in accuracy_pivot.columns.get_level_values(0)]

# Build runtime data only for selected runtime methods
execution_times = []
for _, row in statistical_data.iterrows():
    execution_times.append({
        'problem': row['problem'],
        'pRef_method': row['metaheuristic'],
        'depth': row['depth'],
        'PS-SW': row['runtime_sw'],
        'PS-SWA': row['runtime_swa'],
        # Do not include 'IAI' or 'Trad.' runtime columns here
    })

runtime_df = pd.DataFrame(execution_times)
runtime_pivot = runtime_df.pivot_table(
    index=["problem", "pRef_method", "depth"],
    values=runtime_methods
)
runtime_pivot.rename(columns={
    "PS-SW": "Execution Time (PS-SW)",
    "PS-SWA": "Execution Time (PS-SWA)"
}, inplace=True)

# Concatenate accuracy and runtime pivots horizontally
combined = pd.concat([accuracy_pivot, runtime_pivot], axis=1)

# Interleave columns:
ordered_cols = []
for m in accuracy_methods:
    if m in combined.columns:
        ordered_cols.append(m)
    runtime_col = f"Execution Time ({m})"
    if runtime_col in combined.columns:
        ordered_cols.append(runtime_col)
combined = combined[ordered_cols]

# Format R² as percentages and runtime as seconds
for m in accuracy_methods:
    if m in combined.columns:
        combined[m] = (combined[m].astype(float) * 100).round(1).astype(str) + '%'
    runtime_col = f"Execution Time ({m})"
    if runtime_col in combined.columns:
        combined[runtime_col] = combined[runtime_col].astype(float).round(2).astype(str) + 's'

# Style and display with bold max on R² columns only
styled = combined.style.apply(bold_max, axis=1)
display(styled)


left_table_latex = pivot_table_as_latex(left_table)
right_table_latex = pivot_table_as_latex(right_table)

full_table_latex = put_latex_tables_side_by_side(left_table_latex, right_table_latex)

print("left table:")
print(left_table_latex)

print("\n\n\n\n\n\nright table")
print(right_table_latex)






Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PS-SW,Execution Time (PS-SW),PS-SWA,Execution Time (PS-SWA),IAI,Trad.
problem,pRef_method,depth,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BT,ACO,3,78.9%,192.82s,69.9%,193.86s,38.4%,60.8%
BT,ACO,4,78.3%,263.92s,55.9%,253.67s,57.3%,68.2%
BT,ACO,5,85.7%,342.13s,54.3%,348.41s,61.3%,67.9%
BT,BBO,3,91.5%,107.51s,86.8%,116.95s,84.8%,81.8%
BT,BBO,4,93.3%,174.13s,88.3%,189.79s,89.9%,85.3%
BT,BBO,5,95.4%,297.46s,92.6%,333.86s,90.6%,87.3%
BT,BRO,3,27.8%,94.66s,47.5%,158.03s,28.3%,37.3%
BT,BRO,4,31.6%,161.67s,21.5%,238.04s,38.0%,42.8%
BT,BRO,5,67.7%,5985.06s,20.8%,345.1s,46.5%,45.6%
BT,CRO,3,90.7%,69.68s,86.5%,75.18s,76.3%,83.2%


left table:
\begin{tabular}{lllllllllll}
 &  &  & PS-SW & Execution Time (PS-SW) & PS-SWA & Execution Time (PS-SWA) & IAI & Execution Time (IAI) & Trad. & Execution Time (Trad.) \\
\rotcell{problem} & \rotcell{Met.} & \rotcell{depth} &  &  &  &  &  &  &  &  \\
\multirow[c]{15}{*}{\rotcell{GC\_anna}}  & \multirow[c]{3}{*}{ACO} & 3 & \bfseries 44.6\% & 105.98s & 43.6\% & 96.53s & 13.7\% & 17.22s & 18.0\% & 18.76s \\
 &  & 4 & \bfseries 33.2\% & 156.75s & 15.0\% & 143.38s & 13.7\% & 17.22s & 25.2\% & 18.76s \\
 &  & 5 & \bfseries 47.3\% & 249.26s & 14.4\% & 240.38s & 13.7\% & 17.22s & 22.3\% & 18.76s \\
 \cline{2-7} & \multirow[c]{3}{*}{BBO} & 3 & \bfseries 89.0\% & 78.28s & 72.1\% & 72.63s & 45.5\% & 6.03s & 77.3\% & 4.25s \\
 &  & 4 & \bfseries 88.1\% & 167.0s & 84.3\% & 136.7s & 65.9\% & 6.03s & 79.8\% & 4.25s \\
 &  & 5 & 85.6\% & 250.88s & \bfseries 89.6\% & 254.59s & 67.0\% & 6.03s & 78.5\% & 4.25s \\
 \cline{2-7} & \multirow[c]{3}{*}{BRO} & 3 & -0.0\% & 38.15s & 39.3\% & 90.94s & 4

In [None]:
# Diagnostic checks
print("Statistical Data Info:")
print(statistical_data.info())
print("\nPivot Runtime Info:")
print(pivot_runtime.info())

print("\nSample of Statistical Data:")
print(statistical_data.head())
print("\nSample of Pivot Runtime:")
print(pivot_runtime.head())

Statistical Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   problem             90 non-null     object 
 1   depth               90 non-null     int64  
 2   metaheuristic       90 non-null     object 
 3   winning_competitor  90 non-null     object 
 4   p_value_sw          90 non-null     float64
 5   p_value_swa         90 non-null     float64
 6   runtime_sw          90 non-null     float64
 7   runtime_swa         90 non-null     float64
 8   runtime_iai         90 non-null     float64
 9   runtime_trad        90 non-null     float64
dtypes: float64(6), int64(1), object(3)
memory usage: 7.2+ KB
None

Pivot Runtime Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   ------------

In [None]:
result = []
print(accuracy_data["problem"].unique())
print(accuracy_data["pRef_method"].unique())
for method in accuracy_data["pRef_method"].unique():
    for problem in accuracy_data["problem"].unique():
        for depth in [3, 4, 5]:
            appropriate_data = filter_dataframe(accuracy_data, depth = depth, pRef_method = method, problem = problem, pRef_size=10000)
            trad_data = appropriate_data[appropriate_data["kind"] == "Trad."]["r_sq"][:100]
            own_data = appropriate_data[appropriate_data["kind"] == "PS-SW"]["r_sq"][:100]
            p_value = mannwhitneyu(x = own_data, y = trad_data, alternative="greater").pvalue
            diff = np.average(own_data)-np.average(trad_data)
            
            p_value_string = "0.001" if p_value < 0.001 else round(p_value, 3)
            res = f"{round(diff*100, 2)}%, {p_value_string}"
            if p_value < 0.05:
                res = "\\bfseries "+res
            
            
            result.append({"method":method,
                           "problem":problem,
                           "depth":depth,
                           "res":res})
            
        
improvements = pd.DataFrame(result)
display(improvements)
pivot_table = improvements.pivot_table(index=["problem", "method"],
                                       columns="depth",
                                       values=["res"],
                                       aggfunc=lambda x:x)

#pivot_table = pivot_table.mul(100).round(1).astype(str) + "%"    

display(pivot_table)

latex_code = pivot_table.to_latex()

def fix_latex(input_string):
    # Replace '%' with '\%'
    replacements = {"%":"\\%",
                    "pRef_method":"Met.",
                    "BT":"Staff R.",
                    "SAT_S": "SAT\_20",
                    "SAT_M": "SAT\_50",
                    "SAT_L": "SAT\_100",
                    "GC_L": "GC\_anna",
                    "GC_S": "GC\_jean",
                    "uniform": "RS",
                    "kind": "tree",
                   "\\font-weightbold": "",
                    "res": "average R^2 improvemennt between PS-SW and Trad, with p-value",
                    "≪": "\ll "}


    modified_string = str(input_string)
    for orig, replacement in replacements.items():
        modified_string = modified_string.replace(orig, replacement)
    
    return modified_string
latex_code = fix_latex(latex_code)
print(latex_code)


['BT' 'GC_L' 'GC_S' 'SAT_S' 'SAT_M' 'SAT_L']
['ACO' 'BBO' 'BRO' 'CRO' 'PSO']


Unnamed: 0,method,problem,depth,res
0,ACO,BT,3,"18.02%, 0.167"
1,ACO,BT,4,"10.04%, 0.333"
2,ACO,BT,5,"17.76%, 0.167"
3,ACO,GC_L,3,"26.55%, 0.167"
4,ACO,GC_L,4,"7.95%, 0.5"
5,ACO,GC_L,5,"25.0%, 0.167"
6,ACO,GC_S,3,"9.18%, 0.333"
7,ACO,GC_S,4,"-4.25%, 0.833"
8,ACO,GC_S,5,"2.8%, 0.667"
9,ACO,SAT_S,3,"-5.83%, 0.667"


Unnamed: 0_level_0,Unnamed: 1_level_0,res,res,res
Unnamed: 0_level_1,depth,3,4,5
problem,method,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
BT,ACO,"18.02%, 0.167","10.04%, 0.333","17.76%, 0.167"
BT,BBO,"9.75%, 0.167","7.98%, 0.167","8.14%, 0.167"
BT,BRO,"-9.53%, 0.833","-11.24%, 0.833","22.18%, 0.167"
BT,CRO,"7.47%, 0.167","3.92%, 0.167","5.41%, 0.167"
BT,PSO,"6.76%, 0.167","15.2%, 0.167","11.32%, 0.167"
GC_L,ACO,"26.55%, 0.167","7.95%, 0.5","25.0%, 0.167"
GC_L,BBO,"11.66%, 0.167","8.27%, 0.167","7.16%, 0.333"
GC_L,BRO,"-57.7%, 1.0","-9.03%, 1.0","-3.95%, 1.0"
GC_L,CRO,"11.72%, 0.167","6.16%, 0.333","-13.69%, 0.833"
GC_L,PSO,"4.12%, 0.333","9.74%, 0.167","2.3%, 0.333"


\begin{tabular}{lllll}
\toprule
 &  & \multicolumn{3}{r}{average R^2 improvemennt between PS-SW and Trad, with p-value} \\
 & depth & 3 & 4 & 5 \\
problem & method &  &  &  \\
\midrule
\multirow[t]{5}{*}{Staff R.} & ACO & 18.02\%, 0.167 & 10.04\%, 0.333 & 17.76\%, 0.167 \\
 & BBO & 9.75\%, 0.167 & 7.98\%, 0.167 & 8.14\%, 0.167 \\
 & BRO & -9.53\%, 0.833 & -11.24\%, 0.833 & 22.18\%, 0.167 \\
 & CRO & 7.47\%, 0.167 & 3.92\%, 0.167 & 5.41\%, 0.167 \\
 & PSO & 6.76\%, 0.167 & 15.2\%, 0.167 & 11.32\%, 0.167 \\
\cline{1-5}
\multirow[t]{5}{*}{GC\_anna} & ACO & 26.55\%, 0.167 & 7.95\%, 0.5 & 25.0\%, 0.167 \\
 & BBO & 11.66\%, 0.167 & 8.27\%, 0.167 & 7.16\%, 0.333 \\
 & BRO & -57.7\%, 1.0 & -9.03\%, 1.0 & -3.95\%, 1.0 \\
 & CRO & 11.72\%, 0.167 & 6.16\%, 0.333 & -13.69\%, 0.833 \\
 & PSO & 4.12\%, 0.333 & 9.74\%, 0.167 & 2.3\%, 0.333 \\
\cline{1-5}
\multirow[t]{5}{*}{GC\_jean} & ACO & 9.18\%, 0.333 & -4.25\%, 0.833 & 2.8\%, 0.667 \\
 & BBO & 15.71\%, 0.167 & 8.15\%, 0.167 & 9.96\%, 0.167 \\
 & 

In [None]:
# Display runtime data for PS-SW and PS-SWA
print("Runtime Analysis for PS-SW and PS-SWA:")
print("=" * 50)

# Create a focused view on runtime data
runtime_data = statistical_data[['problem', 'depth', 'metaheuristic', 'runtime_sw', 'runtime_swa', 'runtime_iai', 'runtime_trad']].copy()

# Display runtime pivot table
runtime_pivot = runtime_data.pivot_table(
    index=["problem", "depth", "metaheuristic"],
    values=["runtime_sw", "runtime_swa", "runtime_iai", "runtime_trad"]
)

print("\nRuntime Pivot Table (seconds):")
display(runtime_pivot.round(2))

# Create a comparison table showing PS-SW vs PS-SWA runtime
comparison_data = []
for _, row in statistical_data.iterrows():
    if not pd.isna(row['runtime_sw']) and not pd.isna(row['runtime_swa']):
        speedup = row['runtime_swa'] / row['runtime_sw'] if row['runtime_sw'] > 0 else np.nan
        comparison_data.append({
            'problem': row['problem'],
            'depth': row['depth'], 
            'metaheuristic': row['metaheuristic'],
            'PS-SW_runtime': row['runtime_sw'],
            'PS-SWA_runtime': row['runtime_swa'],
            'speedup_ratio': speedup,
            'faster_method': 'PS-SW' if speedup > 1 else 'PS-SWA' if speedup < 1 else 'Equal'
        })

if comparison_data:
    comparison_df = pd.DataFrame(comparison_data)
    print("\nPS-SW vs PS-SWA Runtime Comparison:")
    display(comparison_df.round(3))
    
    # Summary statistics
    print(f"\nSummary:")
    print(f"Average speedup (PS-SWA/PS-SW): {comparison_df['speedup_ratio'].mean():.2f}")
    print(f"PS-SW faster in {len(comparison_df[comparison_df['faster_method'] == 'PS-SW'])} cases")
    print(f"PS-SWA faster in {len(comparison_df[comparison_df['faster_method'] == 'PS-SWA'])} cases")
else:
    print("No runtime comparison data available")


Runtime Analysis for PS-SW and PS-SWA:

Runtime Pivot Table (seconds):


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,runtime_iai,runtime_sw,runtime_swa,runtime_trad
problem,depth,metaheuristic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BT,3,ACO,42.84,192.82,193.86,44.53
BT,3,BBO,19.0,107.51,116.95,14.17
BT,3,BRO,40.42,94.66,158.03,8.04
BT,3,CRO,7.74,69.68,75.18,4.44
BT,3,PSO,10.31,80.63,88.32,7.78
BT,4,ACO,42.84,263.92,253.67,44.53
BT,4,BBO,19.0,174.13,189.79,14.17
BT,4,BRO,40.42,161.67,238.04,8.04
BT,4,CRO,7.74,136.64,133.54,4.44
BT,4,PSO,10.31,146.92,155.09,7.78



PS-SW vs PS-SWA Runtime Comparison:


Unnamed: 0,problem,depth,metaheuristic,PS-SW_runtime,PS-SWA_runtime,speedup_ratio,faster_method
0,BT,3,ACO,192.815,193.863,1.005,PS-SW
1,BT,3,BBO,107.508,116.953,1.088,PS-SW
2,BT,3,BRO,94.66,158.032,1.669,PS-SW
3,BT,3,CRO,69.685,75.176,1.079,PS-SW
4,BT,3,PSO,80.628,88.323,1.095,PS-SW
5,BT,4,ACO,263.916,253.667,0.961,PS-SWA
6,BT,4,BBO,174.128,189.794,1.09,PS-SW
7,BT,4,BRO,161.668,238.044,1.472,PS-SW
8,BT,4,CRO,136.64,133.536,0.977,PS-SWA
9,BT,4,PSO,146.919,155.086,1.056,PS-SW



Summary:
Average speedup (PS-SWA/PS-SW): 1.03
PS-SW faster in 43 cases
PS-SWA faster in 47 cases


In [None]:
# Diagnostic: Check if runtime data is properly loaded
print("=== RUNTIME DATA DIAGNOSTIC ===")
print(f"Total rows in accuracy_data: {len(accuracy_data)}")
print(f"Columns: {list(accuracy_data.columns)}")

if 'runtime_seconds' in accuracy_data.columns:
    runtime_stats = accuracy_data['runtime_seconds'].describe()
    print(f"\nRuntime statistics:")
    print(runtime_stats)
    
    # Check runtime by method
    print(f"\nRuntime by method:")
    for method in accuracy_data['kind'].unique():
        method_data = accuracy_data[accuracy_data['kind'] == method]['runtime_seconds']
        non_null_count = method_data.notna().sum()
        print(f"  {method}: {non_null_count} non-null values")
        if non_null_count > 0:
            print(f"    Mean: {method_data.mean():.2f}s, Std: {method_data.std():.2f}s")
else:
    print("ERROR: runtime_seconds column not found!")
    print("Available columns:", list(accuracy_data.columns))

# Check a sample of the data
print(f"\nSample data (first 3 rows):")
display(accuracy_data[['problem', 'pRef_method', 'kind', 'depth', 'r_sq', 'runtime_seconds']].head(3))


=== RUNTIME DATA DIAGNOSTIC ===
Total rows in accuracy_data: 720
Columns: ['kind', 'depth', 'ps_budget', 'ps_population', 'avoid_ancestors', 'metrics', 'problem', 'pRef_method', 'pRef_size', 'runtime_seconds', 'mse', 'mae', 'r_sq', 'evs', 'cp']

Runtime statistics:
count     720.000000
mean       55.794985
std       236.095311
min         0.722000
25%         2.522750
50%         7.077500
75%        47.785000
max      5985.061000
Name: runtime_seconds, dtype: float64

Runtime by method:
  PS-SW: 90 non-null values
    Mean: 227.48s, Std: 619.15s
  PS-SWA: 90 non-null values
    Mean: 170.48s, Std: 96.98s
  IAI: 90 non-null values
    Mean: 12.99s, Std: 12.21s
  Trad.: 450 non-null values
    Mean: 7.08s, Std: 9.49s

Sample data (first 3 rows):


Unnamed: 0,problem,pRef_method,kind,depth,r_sq,runtime_seconds
0,BT,ACO,PS-SW,3,0.788686,192.815
1,BT,ACO,PS-SWA,3,0.698525,193.863
2,BT,ACO,PS-SW,4,0.782642,263.916


In [None]:
# Create execution time pivot table with styling
print("Execution Time Pivot Table:")
print("=" * 50)

execution_time_table = create_execution_time_pivot_table(statistical_data)
display(execution_time_table)

# Also create LaTeX version
execution_time_latex = pivot_table_as_latex(execution_time_table)
print("\nLaTeX version:")
print(execution_time_latex)


Execution Time Pivot Table:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,execution_time (IAI),execution_time (PS-SW),execution_time (PS-SWA),execution_time (Trad.)
problem,depth,metaheuristic,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BT,3,ACO,42.84s,192.82s,193.86s,44.53s
BT,3,BBO,19.0s,107.51s,116.95s,14.17s
BT,3,BRO,40.42s,94.66s,158.03s,8.04s
BT,3,CRO,7.74s,69.68s,75.18s,4.44s
BT,3,PSO,10.31s,80.63s,88.32s,7.78s
BT,4,ACO,42.84s,263.92s,253.67s,44.53s
BT,4,BBO,19.0s,174.13s,189.79s,14.17s
BT,4,BRO,40.42s,161.67s,238.04s,8.04s
BT,4,CRO,7.74s,136.64s,133.54s,4.44s
BT,4,PSO,10.31s,146.92s,155.09s,7.78s



LaTeX version:
\begin{tabular}{lllllll}
 &  &  & execution_time (IAI) & execution_time (PS-SW) & execution_time (PS-SWA) & execution_time (Trad.) \\
problem & depth & metaheuristic &  &  &  &  \\
\multirow[c]{15}{*}{Staff R.} & \multirow[c]{5}{*}{3} & ACO & 42.84s & 192.82s & 193.86s & 44.53s \\
 &  & BBO & 19.0s & 107.51s & 116.95s & 14.17s \\
 &  & BRO & 40.42s & 94.66s & 158.03s & 8.04s \\
 &  & CRO & 7.74s & 69.68s & 75.18s & 4.44s \\
 &  & PSO & 10.31s & 80.63s & 88.32s & 7.78s \\
 & \multirow[c]{5}{*}{4} & ACO & 42.84s & 263.92s & 253.67s & 44.53s \\
 &  & BBO & 19.0s & 174.13s & 189.79s & 14.17s \\
 &  & BRO & 40.42s & 161.67s & 238.04s & 8.04s \\
 &  & CRO & 7.74s & 136.64s & 133.54s & 4.44s \\
 &  & PSO & 10.31s & 146.92s & 155.09s & 7.78s \\
 & \multirow[c]{5}{*}{5} & ACO & 42.84s & 342.13s & 348.41s & 44.53s \\
 &  & BBO & 19.0s & 297.46s & 333.86s & 14.17s \\
 &  & BRO & 40.42s & 5985.06s & 345.1s & 8.04s \\
 &  & CRO & 7.74s & 258.19s & 232.34s & 4.44s \\
 &  & PSO & 10.3