In [1]:
import pandas as pd
pd.set_option("display.max_columns", None)

import numpy as np

from pathlib import Path
import itertools
import re

from scipy.stats import mannwhitneyu, wilcoxon
from VD_A import VD_A

from common import *

# Get Data from Files (if needed)

In [2]:
DATA_PREFIX = "last"
raw_results_path = Path("/Volumes/Transcend/data/QRefactoring-results-final/QRefactoring-results-final")
big_benchmarks_pickle = Path(f"{DATA_PREFIX.replace('%', '')}-PI-data.pkl")


big_benchmarks_df = None
if big_benchmarks_pickle.exists():
    print("Pickle exists. Using it!")
    big_benchmarks_df = pd.read_pickle(big_benchmarks_pickle)
else:   
    print("Data pickle does not exists. Creating it!")
    dci_files = list(raw_results_path.glob("*-DCI.csv"))
    pi_files = list(raw_results_path.glob("*-PI.csv"))
    normpi_files = list(raw_results_path.glob("*-normPI.csv"))

    print("Respective PI files in raw results path", len(dci_files), len(pi_files), len(normpi_files))
    
    pis_df = extract_values(pi_files, earliest_finish=DATA_PREFIX)
    normpis_df = extract_values(normpi_files, earliest_finish=DATA_PREFIX)
    dci_df = extract_values(dci_files, earliest_finish=DATA_PREFIX)
    dci_df = dci_df.rename(columns={"_all": "DCI_all", "_min": "DCI_min", "_min_overall": "DCI_min_overall", })

    # merge the data
    merge_cols = ['problem','seed', 'option', 'ngen', 'timestamp', 'qubits', 'arbitrary']
    big_benchmarks_df = pd.merge(pis_df, normpis_df,  how='left', left_on=merge_cols, right_on=merge_cols, suffixes=('', '_norm'))
    big_benchmarks_df = pd.merge(big_benchmarks_df, dci_df,  how='left', left_on=merge_cols, right_on=merge_cols, suffixes=('', 'DCI'))
    big_benchmarks_df["option"] = big_benchmarks_df.option.apply(lambda x : better_options[x])
    big_benchmarks_df["repair"] = big_benchmarks_df.problem.apply(lambda p: p in repair_circuits)
    
    big_benchmarks_df = big_benchmarks_df[["problem", "option", "seed", "qubits", "arbitrary", "ngen", "timestamp", "DCI_min", 'HV_min_norm', 'igdplus_min_norm', "repair"]]  # if there is more data inside, only keep those columns that we are interested in!

    big_benchmarks_df.to_pickle(big_benchmarks_pickle)

Pickle exists. Using it!


In [3]:
big_benchmarks_df.head()

Unnamed: 0,problem,option,seed,qubits,arbitrary,ngen,timestamp,DCI_min,HV_min_norm,igdplus_min_norm,repair
0,AA2,Hybrid,6,2,True,149.0,2082.869706,0.000386,0.549934,0.867129,False
1,AA2,Hybrid,114,2,True,149.0,2045.312278,0.000386,0.549934,0.867129,False
2,AA2,Hybrid,124,2,True,149.0,2496.109194,0.000386,0.549934,0.867129,False
3,AA2,Hybrid,144,2,True,149.0,1902.832277,0.000386,0.549934,0.867129,False
4,AA2,Hybrid,190,2,True,149.0,2107.49759,0.000386,0.549934,0.867129,False


In [4]:
# Compute means for 30 runs of the same setting!
means_cols = ['problem', 'option']

big_means_df = big_benchmarks_df.groupby(by=means_cols).mean().reset_index()
big_means_df

Unnamed: 0,problem,option,seed,qubits,arbitrary,ngen,timestamp,DCI_min,HV_min_norm,igdplus_min_norm,repair
0,AA2,Fixed,541.366667,2.0,1.0,1599.0,916.969257,0.000280,0.621664,0.859208,0.0
1,AA2,Hybrid,541.366667,2.0,1.0,149.0,2026.281710,0.000469,0.551052,0.865381,0.0
2,AA2,Hybrid_{Init=20},541.366667,2.0,1.0,149.0,1545.584055,0.000997,0.567192,0.865444,0.0
3,AA2,Hybrid_{N=100},541.366667,2.0,1.0,149.0,5880.963654,0.000433,0.554111,0.866412,0.0
4,AA2,Hybrid_{N=200},541.366667,2.0,1.0,149.0,13365.190204,0.000465,0.559878,0.867061,0.0
...,...,...,...,...,...,...,...,...,...,...,...
315,wstate_n3,Hybrid_{Init=20},541.366667,3.0,0.0,149.0,6462.615873,0.002077,0.801224,0.651825,0.0
316,wstate_n3,Hybrid_{N=100},541.366667,3.0,0.0,149.0,14190.076376,0.001808,0.799330,0.655974,0.0
317,wstate_n3,Hybrid_{N=200},541.366667,3.0,0.0,149.0,29278.812773,0.001684,0.803281,0.653894,0.0
318,wstate_n3,Hybrid_{Q2},541.366667,3.0,0.0,149.0,27451.773422,0.001510,0.801621,0.672515,0.0


In [5]:
performance_indicators = ["DCI_min", 'HV_min_norm', 'igdplus_min_norm']

effect_sizes = {"negligible": 1, "small":2, "medium": 3, "large": 4}
def wilcoxon_pi_table(means_dataframe, pis, alpha_correct=1, use_wilcoxon=True):
    comp = wilcoxon if use_wilcoxon else mannwhitneyu
    
    approaches = [better_options[HYBRID], better_options[NONHYBRID], better_options[FIXED]]  # [means_dataframe.option.unique()  # get a list of all approaches
    symbols_df = pd.DataFrame()  # the table we initialize
    for appA, appB in itertools.product(approaches, approaches):  # iterate over pairs of approaches
        if appA == appB:
            symbols_df.at[appA, appB] = ""
            continue
        appA_df = means_dataframe[means_dataframe.option == appA].sort_values(["problem"])
        appB_df = means_dataframe[means_dataframe.option == appB].sort_values(["problem"])
        assert len(appA_df) == len(appB_df), f"both dataframes have the same length, currently it's {appA}: {len(appA_df)} vs {appB}: {len(appB_df)}"
                
        pi_symbols = []
        for pi in pis:
            vd = None
            # print("appA", appA, "vs", "appB", appB)
            
            appA_np = np.nan_to_num(appA_df[pi].to_numpy())
            appB_np = np.nan_to_num(appB_df[pi].to_numpy())
            
            
            is_zero_arr = ((appA_np - appB_np) == 0)
            
            if (is_zero_arr).all():  # check if they are all zeros... 
                # print("all zero or nan")
                pi_symbols.append("=")
            elif comp(appA_np, appB_np)[1] >= (0.05 / alpha_correct):
                pi_symbols.append("=")
            else:
                vd = VD_A(list(appA_np), list(appB_np))
                # print("vd", vd)
                # Higher is better for Diversity and HyperVolume!!!
                if "hv" in pi.lower() or "dci" in pi.lower():  
                    symbol = ("X" if vd[0] < 0.5 else "+") * effect_sizes[vd[1]]
                else:
                    symbol = ("+" if vd[0] < 0.5 else "X") * effect_sizes[vd[1]]

                pi_symbols.append(symbol)

        symbols_df.at[appA, appB] = "   ".join(pi_symbols)
    return (symbols_df.reindex(approaches))

# Means...
tables = []
data = big_means_df

pi_comparison_rows = []

dfs_to_unify = {}

for pi, title in zip(performance_indicators, performance_indicators):
    print("total", pi)
    table = wilcoxon_pi_table(data, pis=[pi])
    
    comparisons = dict(PI=title)
    comparisons["Hybrid vs. Non-Hybrid"] = table.at["Hybrid", "NonHybrid"]
    comparisons["Hybrid vs. Fixed"] = table.at["Hybrid", "Fixed"]
    comparisons["Non-Hybrid vs. Fixed"] = table.at["NonHybrid", "Fixed"]
    
    pi_comparison_rows.append(comparisons)
    
    table.columns = pd.MultiIndex.from_product([[title], table.columns])
    tables.append(table)

dfs_to_unify["All"] = pd.DataFrame(pi_comparison_rows).set_index("PI")
dfs_to_unify["All"].columns = pd.MultiIndex.from_product([["All"], dfs_to_unify["All"].columns])

slice_by_feature = {"repair": "Repair"}  # "qubits": "qubits", 
for column, feature in slice_by_feature.items():
    print("Slicing by feature", feature)
    

    for feature_value in [True, False]:
        print(feature, feature_value, "-->", f"{column}{feature_value}.tex")
        data = big_means_df[big_means_df[column] == feature_value]
        tables = []
        slice_pi_comparison_rows = []

        for pi, title in zip(performance_indicators, performance_indicators):
            table = wilcoxon_pi_table(data, pis=[pi], use_wilcoxon=True)
            
            comparisons = dict(PI=title)
            comparisons["Hybrid vs. Non-Hybrid"] = table.at["Hybrid", "NonHybrid"]
            comparisons["Hybrid vs. Fixed"] = table.at["Hybrid", "Fixed"]
            comparisons["Non-Hybrid vs. Fixed"] = table.at["NonHybrid", "Fixed"]
            
            slice_pi_comparison_rows.append(comparisons)

            
            table.columns = pd.MultiIndex.from_product([[title], table.columns])
            tables.append(table)
        
        if feature_value:
            dfs_to_unify["Repair"] = pd.DataFrame(slice_pi_comparison_rows).set_index("PI")
            dfs_to_unify["Repair"].columns = pd.MultiIndex.from_product([["Repair"], dfs_to_unify["Repair"].columns])

        else:
            dfs_to_unify["Optimize"] = pd.DataFrame(slice_pi_comparison_rows).set_index("PI")
            dfs_to_unify["Optimize"].columns = pd.MultiIndex.from_product([["Optimize"], dfs_to_unify["Optimize"].columns])

slice_by_feature = {"arbitrary": "arbitrary"}  # "qubits": "qubits", 
for column, feature in slice_by_feature.items():
    print("Slicing by feature", feature)
    

    for feature_value in [False, True]:
        print(feature, feature_value, "-->", f"{column}{feature_value}.tex")
        data = big_means_df[big_means_df[column] == feature_value]
        tables = []
        slice_pi_comparison_rows = []

        for pi, title in zip(performance_indicators, performance_indicators):
            table = wilcoxon_pi_table(data, pis=[pi], use_wilcoxon=True)
            
            comparisons = dict(PI=title)
            comparisons["Hybrid vs. Non-Hybrid"] = table.at["Hybrid", "NonHybrid"]
            comparisons["Hybrid vs. Fixed"] = table.at["Hybrid", "Fixed"]
            comparisons["Non-Hybrid vs. Fixed"] = table.at["NonHybrid", "Fixed"]
            
            slice_pi_comparison_rows.append(comparisons)

            
            table.columns = pd.MultiIndex.from_product([[title], table.columns])
            tables.append(table)
        
        if feature_value:
            dfs_to_unify["Arbitrary"] = pd.DataFrame(slice_pi_comparison_rows).set_index("PI")
            dfs_to_unify["Arbitrary"].columns = pd.MultiIndex.from_product([["Arbitrary"], dfs_to_unify["Arbitrary"].columns])

        else:
            dfs_to_unify["Specific"] = pd.DataFrame(slice_pi_comparison_rows).set_index("PI")
            dfs_to_unify["Specific"].columns = pd.MultiIndex.from_product([["Specific"], dfs_to_unify["Specific"].columns])

total DCI_min
total HV_min_norm
total igdplus_min_norm
Slicing by feature Repair
Repair True --> repairTrue.tex
Repair False --> repairFalse.tex
Slicing by feature arbitrary
arbitrary False --> arbitraryFalse.tex
arbitrary True --> arbitraryTrue.tex




In [6]:
def tex_adapt_table(table, col_fix=True):
    tex = table.to_latex()
    
    # centre columns
    if re.search(r"\{l+\}", tex):
        orig_cols = re.search(r"\{l+\}", tex).group(0)
        cols = orig_cols.replace("l", "c")
        tex = tex.replace(orig_cols, cols)
    if col_fix:
        if "ccccccccccccccccccccccccc" in tex:
            tex = tex.replace("ccccccccccccccccccccccccc", "c|cccccccc|cccccccc|cccccccc")
        elif "lrrrrrrrrrrrrrrr" in tex:
            tex = tex.replace("lrrrrrrrrrrrrrrr", "c|rrrrr|rrrrr|rrrrr")
            
    # vertically center multirow label
    tex = tex.replace("\\multirow[t]{3}{*}", "\\multirow[t]{3}{*}[-1em]")
    tex = tex.replace("PI", "PI Comparison")
    
    # centre column group headlines
    tex = tex.replace("{l}", "{c}")
    
    tex = tex.replace("++++", f"\\betterLarge")
    tex = tex.replace("+++", f"\\betterMedium")
    tex = tex.replace("++", f"\\betterSmall")
    tex = tex.replace("+", f"\\betterNegligible")

    tex = tex.replace("=", f"\\textcolor{{gray}}{{\\ensuremath{{\\equiv}}}}")     

    tex = tex.replace("XXXX", f"\\cellcolor{{badred!50}}\\textcolor{{badred}}{{\\ding{{55}}\\ding{{55}}\\ding{{55}}}}")
    tex = tex.replace("XXX", f"\\cellcolor{{badred!30}}\\transparent{{0.75}}\\textcolor{{badred}}{{\\ding{{55}}\\ding{{55}}}}")
    tex = tex.replace("XX", f"\\cellcolor{{badred!15}}\\transparent{{0.5}}\\textcolor{{badred}}{{\\ding{{55}}}}")
    tex = tex.replace("X", f"\\transparent{{0.25}}\\textcolor{{badred}}{{\\ding{{55}}}}")
        
    # colors
    tex = tex.replace("cellbetter", f"\\cellcolor{{goodgreen!30}}").replace("cellworse", f"\\cellcolor{{badred!30}}")
        
    tex = tex.replace(f"igdplus_min_norm", "\\IGD").replace(f"HV_min_norm", "\\HV").replace(f"DCI_min", "\\DCI")
    
    tex = tex.replace(f"Fixed", "\\fix")
    tex = tex.replace(f"Non-Hybrid", "\\non")
    tex = tex.replace(f"Hybrid", "\\hyb")
    
    tex = tex.replace("\\cline{1-5}\n\\bottomrule", "\\bottomrule")  # remove last bottomrule
    
    # shorten inner-cat separators
    tex = tex.replace("\\cline{1-5}\n\\multirow[t]{3}{*}[-1em]{Optimize}", "\\cline{2-2}\n\\multirow[t]{3}{*}[-1em]{Optimize}")  
    tex = tex.replace("\\cline{1-5}\n\\multirow[t]{3}{*}[-1em]{Arbitrary}", "\\cline{2-2}\n\\multirow[t]{3}{*}[-1em]{Arbitrary}")

    # change cat separators
    tex = tex.replace("\\cline{1-5}\n\\multirow[t]{3}{*}[-1em]{Repair}", "\\cmidrule{1-5}\n\\multirow[t]{3}{*}[-1em]{Repair}")
    tex = tex.replace("\\cline{1-5}\n\\multirow[t]{3}{*}[-1em]{Specific}", "\\cmidrule{1-5}\n\\multirow[t]{3}{*}[-1em]{Specific}")
    
    return tex

def write_table_to_file(table, filepath, col_fix=True):
    tex = tex_adapt_table(table, col_fix=col_fix)
    with open(filepath, 'w') as texfile:
        texfile.write(tex)
    return tex

In [7]:
paper_tables_dir = Path("/Users/stefan/Library/CloudStorage/Dropbox/Apps/Overleaf/ASE2023 - QRepair/generated")
tab = pd.concat(dfs_to_unify.values(), axis=1)
display(pd.concat(dfs_to_unify.values(), axis=1).T)
write_table_to_file(tab.T, paper_tables_dir / f"{DATA_PREFIX.replace('%', '')}-PIs-vertical.tex", col_fix=False)

Unnamed: 0,PI,DCI_min,HV_min_norm,igdplus_min_norm
All,Hybrid vs. Non-Hybrid,+++,+,XX
All,Hybrid vs. Fixed,++++,++,=
All,Non-Hybrid vs. Fixed,+++,++,=
Repair,Hybrid vs. Non-Hybrid,++++,++,=
Repair,Hybrid vs. Fixed,+++,+,+++
Repair,Non-Hybrid vs. Fixed,=,=,=
Optimize,Hybrid vs. Non-Hybrid,+++,=,XX
Optimize,Hybrid vs. Fixed,++++,+++,XX
Optimize,Non-Hybrid vs. Fixed,++++,++,=
Specific,Hybrid vs. Non-Hybrid,=,=,=


'\\begin{tabular}{ccccc}\n\\toprule\n & PI Comparison & \\DCI & \\HV & \\IGD \\\\\n\\midrule\n\\multirow[t]{3}{*}[-1em]{All} & \\hyb vs. \\non & \\betterMedium & \\betterNegligible & \\cellcolor{badred!15}\\transparent{0.5}\\textcolor{badred}{\\ding{55}} \\\\\n & \\hyb vs. \\fix & \\betterLarge & \\betterSmall & \\textcolor{gray}{\\ensuremath{\\equiv}} \\\\\n & \\non vs. \\fix & \\betterMedium & \\betterSmall & \\textcolor{gray}{\\ensuremath{\\equiv}} \\\\\n\\cmidrule{1-5}\n\\multirow[t]{3}{*}[-1em]{Repair} & \\hyb vs. \\non & \\betterLarge & \\betterSmall & \\textcolor{gray}{\\ensuremath{\\equiv}} \\\\\n & \\hyb vs. \\fix & \\betterMedium & \\betterNegligible & \\betterMedium \\\\\n & \\non vs. \\fix & \\textcolor{gray}{\\ensuremath{\\equiv}} & \\textcolor{gray}{\\ensuremath{\\equiv}} & \\textcolor{gray}{\\ensuremath{\\equiv}} \\\\\n\\cline{2-2}\n\\multirow[t]{3}{*}[-1em]{Optimize} & \\hyb vs. \\non & \\betterMedium & \\textcolor{gray}{\\ensuremath{\\equiv}} & \\cellcolor{badred!15}\\