In [None]:
import pandas as pd
pd.set_option("display.max_columns", None)

import numpy as np
from pathlib import Path
import re

from common import *

<h1><span style="color:red">
    WARNING:</span> This file is meant to be executed top to bottom. 
</h1><h2>There are quite a few parts copy & pasted. Hence, MANY variables are shadowed.</h2>

# Step 0. Data Production: Each of these cells takes 14 minutes to execute on Stefan's machine

In [None]:
DATA_PREFIX = "last"
THRESHOLD = 0.000001  # 1.0 and 0.9999999... are the same, right?

raw_results_path = Path("/Volumes/Transcend/data/QRefactoring-results-final/QRefactoring-results-final")

OL_file_path = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OL.pkl")
OED_file_path = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OED.pkl")

paper_tables_dir = Path("/Users/stefan/Library/CloudStorage/Dropbox/Apps/Overleaf/ASE2023 - QRepair/generated")

In [None]:
# This is a helper!
def extract_best_solutions(files, earliest_finish=None):
    if earliest_finish == "last":  # override last
        earliest_finish = None
    
    earliest_finish_times = {}
    if earliest_finish:
        for earliest_finish_file in list(raw_results_path.glob("*earliest_finish.csv")):
            problem = earliest_finish_file.stem.replace("_earliest_finish", "")

            earliest_finish_df = pd.read_csv(earliest_finish_file)
            earliest_finish_time = earliest_finish_df.iloc[0][earliest_finish]
            earliest_finish_times[problem] = earliest_finish_time
    # print("Earliest finish times:")
    # pprint(earliest_finish_times)
    best_OL_rows = []
    best_OED_rows = []
    # extract last gen value
    for pi_file in sorted(files):
        problem, seed, option = extract_info_from_file(pi_file)
        qubits, arbitrary = QUBITS_and_ARBITRARY[problem]
        
        last_row = dict(problem=problem, option=option, seed=seed, qubits=qubits, arbitrary=arbitrary)
        results_file_df = pd.read_csv(pi_file)
        if earliest_finish:
            if problem not in earliest_finish_times:
                print("WARNING!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                print("No earliest finish time for problem", problem)
            else:
                results_file_df = results_file_df[results_file_df.timestamp <= earliest_finish_df[earliest_finish].iloc[0]]
        
        last_gen_df = results_file_df[results_file_df.ngen == results_file_df.ngen.max()].reset_index()
        last_gen_df["no_error_prob_actual"] = 1 - last_gen_df.apply(lambda row: get_actual_error_rate(row.num_gates, row.num_nonloc_gates), axis=1)
        last_gen_df["OED"] = last_gen_df.overlap * last_gen_df.no_error_prob_actual
        
        best_OL_row = dict(last_row)
        best_OL_row.update(last_gen_df.sort_values("overlap", ascending=False).iloc[0].to_dict())
        best_OL_rows.append(best_OL_row)
        
        best_OED_row = dict(last_row)
        best_OED_row.update(last_gen_df.sort_values("OED", ascending=False).iloc[0].to_dict())
        best_OED_rows.append(best_OED_row)

    OL_df = pd.DataFrame(best_OL_rows).sort_values(by=["problem", "option", "seed"])
    OED_df = pd.DataFrame(best_OED_rows).sort_values(by=["problem", "option", "seed"])
    
    return OL_df, OED_df

In [None]:
%%time
if OL_file_path.exists() and OED_file_path.exists():
    print("Found data files. No need to extract.")
else:
    print("Data files not found. Starting the data extraction.")
    
    output_files = list(raw_results_path.glob("*.csv"))
    output_files = [f for f in output_files if "logbook" not in str(f)]
    output_files = [f for f in output_files if "seed" in str(f)]
    output_files = [f for f in output_files if "PI.csv" not in str(f)]
    output_files = [f for f in output_files if "DCI.csv" not in str(f)]
    output_files = [f for f in output_files if "HVrefpoint.csv" not in str(f)]
    output_files = [f for f in output_files if "globalPareto.csv" not in str(f)]
    output_files = [f for f in output_files if "earliest_finish.csv" not in str(f)]
          
          
    OL_df, OED_df = extract_best_solutions(output_files, earliest_finish=DATA_PREFIX)
    OL_df.to_pickle(OL_file_path)
    OED_df.to_pickle(OED_file_path)

# RQ 1.1 & 2.1 (Theoretical gains)

In [None]:
all_vals_df = pd.read_pickle(OL_file_path)
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

In [None]:
def _check_improvement(row): 
    ref_num_gates, ref_depth, ref_non_local = reference_fitness_values[row.problem]
    # either all equal, or some better, some worse...
    if (row.num_gates == ref_num_gates and row.depth == ref_depth and row.num_nonloc_gates == ref_non_local) or \
       ((row.num_gates > ref_num_gates or row.depth > ref_depth or row.num_nonloc_gates > ref_non_local) and \
        (row.num_gates < ref_num_gates or row.depth < ref_depth or row.num_nonloc_gates < ref_non_local)):
        return "Pareto Equal"
    
    # not all equal, not pareto_equal, so it's either better or worse
    if row.num_gates >= ref_num_gates and row.depth >= ref_depth and row.num_nonloc_gates >= ref_non_local:
        return "Worse"
    
    # one of them is better
    if row.num_gates <= ref_num_gates and row.depth <= ref_depth and row.num_nonloc_gates <= ref_non_local:
        return "Optimized"

def get_operator_categorisation_OL(row):   
    if (1.0 - row.overlap) > THRESHOLD:  # TODO: Check if this is the overlap? ...
        return "Faulty"
    
    # it's not buggy, so check if we are better or not...
    return _check_improvement(row) 
    
def get_operator_categorisation_OED(row):
    ref_num_gates, ref_depth, ref_non_local = reference_fitness_values[row.problem]
    ref_OED_actual = 1 - get_actual_error_rate(ref_num_gates, ref_non_local)
    
    if (ref_OED_actual - row.OED) > THRESHOLD:
        return "Faulty"
    
    # it's not buggy, so check if we are better or not...
    return _check_improvement(row)

## RQ 1.1 - Repair (theoretical)? 

In [None]:
repair_df = all_vals_df[all_vals_df.repair].reset_index()
repair_df["Categorisation_OL"] = repair_df.apply(get_operator_categorisation_OL, axis=1)
# repair_df["Categorisation_OED"] = repair_df.apply(get_operator_categorisation_OED, axis=1)

In [None]:
display(repair_df.groupby("Categorisation_OL").count())
# display(repair_df.groupby("Categorisation_OED").count())

In [None]:
tables = {}
for opt in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[opt])
    rows = []
    for problem in repair_df.problem.unique():
        qubits=repair_df[repair_df.problem == problem].qubits.iloc[0]
        arbitrary = repair_df[repair_df.problem == problem].arbitrary.iloc[0]
        row = dict(problem = problem, qubits=qubits, Arbitrary="Arbitrary" if arbitrary else "Specific" )
        
        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(repair_df[
                (repair_df.option == opt) & 
                (repair_df.problem == problem) & 
                (repair_df.Categorisation_OL == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab.problem)
    tables[opt] = tab
    # display(tab)

# sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))
# sum_table["problem"] = sum_table.problem.apply(lambda v: v.split("/")[0].strip())
# sum_table["Qubits"] = sum_table.qubits.apply(lambda v: v.split("/")[0].strip() + " qubits")
# sum_table["Arbitrary"] = sum_table.Arbitrary.apply(lambda v: v.split("/")[0].strip())

# sum_table["Problem"] = "[" + sum_table["Arbitrary"] + "]" + sum_table["problem"]
# sum_table = sum_table.sort_values(by=["Qubits", "problem"], key=lambda col: col.str.lower())
# sum_table.index = pd.MultiIndex.from_frame(sum_table[["Qubits", "Problem"]])
# sum_table = sum_table.drop(columns=["problem", "qubits", "Arbitrary", "Problem", "Qubits"])


sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))
sum_table["problem"] = sum_table.problem.apply(lambda v: v.split("/")[0].strip())
sum_table["Qubits"] = sum_table.qubits.apply(lambda v: v.split("/")[0].strip() + " qubits")
sum_table["Input State"] = sum_table.Arbitrary.apply(lambda v: v.split("/")[0].strip())

sum_table["Problem"] =  sum_table["problem"] + " (" + sum_table["Qubits"]+")"
sum_table = sum_table.sort_values(by=["Arbitrary", "qubits"], ascending=[False,True], key=lambda col: col.str.lower())
sum_table.index = pd.MultiIndex.from_frame(sum_table[["Input State", "Problem"]])
sum_table = sum_table.drop(columns=["problem", "qubits", "Arbitrary", "Problem", "Qubits", "Input State"])

# RQ11_table = sum_table.set_index("Problem").sort_values(by=["qubits", "problem"], key=lambda col: col.str.lower()).drop(columns=["problem", "Qubits", "Arbitrary"])
RQ11_table = sum_table
RQ11_table

## Calculate Relative Improvement

In [None]:
interesting_cols = ["num_gates", "depth", "num_nonloc_gates", "num_parameters"]
repair_df[repair_df.Categorisation_OL == "Optimized"][interesting_cols]

## RQ2.1 - Optimization (theoretical)

In [None]:
optimization_df = all_vals_df[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OL"] = optimization_df.apply(get_operator_categorisation_OL, axis=1)

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        row = dict(problem = problem, qubits=qubits, Arbitrary=arbitrary)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab
    # display(tab)

sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))
sum_table["problem"] = sum_table.problem.apply(lambda v: v.split("/")[0].strip())
sum_table["Qubits"] = sum_table.qubits.apply(lambda v: v.split("/")[0].strip() + " qubits")
sum_table["Arbitrary"] = sum_table.Arbitrary.apply(lambda v: v.split("/")[0].strip())

sum_table["Problem"] = "[" + sum_table["Arbitrary"] + "]" + sum_table["problem"]
sum_table = sum_table.sort_values(by=["Qubits", "problem"], key=lambda col: col.str.lower())
sum_table.index = pd.MultiIndex.from_frame(sum_table[["Qubits", "Problem"]])
sum_table = sum_table.drop(columns=["problem", "qubits", "Arbitrary", "Problem", "Qubits"])

# RQ11_table = sum_table.set_index("Problem").sort_values(by=["qubits", "problem"], key=lambda col: col.str.lower()).drop(columns=["problem", "Qubits", "Arbitrary"])
RQ21_table = sum_table
RQ21_table

## Create Summary Tables (because the full table is too long!)

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab
    
    
    tab = tab.sum().drop(columns=["problem", "qubits"])
    tables[option] = pd.DataFrame(tab)
        
sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ21_summary_table_sum = sum_table.T.drop(columns=["qubits", "problem", "Arbitrary"])
RQ21_summary_table_sum

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab
    tab = tab.groupby("Arbitrary").sum().drop(columns=["problem", "qubits"])
    tables[option] = tab
    
sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ21_summary_table_input_state = sum_table
RQ21_summary_table_input_state

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab
    
    tab = tab.groupby("qubits").sum().drop(columns=["problem", "Arbitrary"])
    tables[option] = tab
        
sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ21_summary_table_qubits = sum_table
RQ21_summary_table_qubits

# RQ 1.2 & 2.2 (Practical gains)

In [None]:
all_vals_df = pd.read_pickle(OED_file_path)
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

## RQ 1.1 - Repair (practical)? 

In [None]:
repair_df = all_vals_df[all_vals_df.repair].reset_index()      
repair_df["Categorisation_OED"] = repair_df.apply(get_operator_categorisation_OED, axis=1)

In [None]:
display(repair_df.groupby("Categorisation_OED").count())

In [None]:
tables = {}
for opt in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[opt])
    rows = []
    for problem in repair_df.problem.unique():
        qubits=repair_df[repair_df.problem == problem].qubits.iloc[0]
        arbitrary = repair_df[repair_df.problem == problem].arbitrary.iloc[0]
        row = dict(problem = problem, qubits=qubits, Arbitrary="Arbitrary" if arbitrary else "Specific")
        
        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(repair_df[
                (repair_df.option == opt) & 
                (repair_df.problem == problem) & 
                (repair_df.Categorisation_OED == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[opt] = tab

sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))
sum_table["problem"] = sum_table.problem.apply(lambda v: v.split("/")[0].strip())
sum_table["Qubits"] = sum_table.qubits.apply(lambda v: v.split("/")[0].strip() + " qubits")
sum_table["Input State"] = sum_table.Arbitrary.apply(lambda v: v.split("/")[0].strip())

sum_table["Problem"] =  sum_table["problem"] + " (" + sum_table["Qubits"]+")"
sum_table = sum_table.sort_values(by=["Arbitrary", "qubits"], ascending=[False,True], key=lambda col: col.str.lower())
sum_table.index = pd.MultiIndex.from_frame(sum_table[["Input State", "Problem"]])
sum_table = sum_table.drop(columns=["problem", "qubits", "Arbitrary", "Problem", "Qubits", "Input State"])

RQ12_table = sum_table
RQ12_table

## RQ2.1 - Optimization (practical)

In [None]:
optimization_df = all_vals_df[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OED"] = optimization_df.apply(get_operator_categorisation_OED, axis=1)

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        row = dict(problem = problem, qubits=qubits, Arbitrary=arbitrary)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab

sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))
sum_table["problem"] = sum_table.problem.apply(lambda v: v.split("/")[0].strip())
sum_table["Qubits"] = sum_table.qubits.apply(lambda v: v.split("/")[0].strip() + " qubits")
sum_table["Arbitrary"] = sum_table.Arbitrary.apply(lambda v: v.split("/")[0].strip())

sum_table["Problem"] = "[" + sum_table["Arbitrary"] + "]" + sum_table["problem"]
sum_table = sum_table.sort_values(by=["Qubits", "problem"], key=lambda col: col.str.lower())
sum_table.index = pd.MultiIndex.from_frame(sum_table[["Qubits", "Problem"]])
sum_table = sum_table.drop(columns=["problem", "qubits", "Arbitrary", "Problem", "Qubits"])

RQ22_table = sum_table
RQ22_table

## Create Summary Table (because the full table is too long!)

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tables[option] = tab
    tab = tab.sum().drop(columns=["problem", "qubits"])
    tables[option] = pd.DataFrame(tab)
        
sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ22_summary_table_sum = sum_table.T.drop(columns=["qubits", "problem", "Arbitrary"])
RQ22_summary_table_sum

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)  
    tab = tab.groupby("Arbitrary").sum().drop(columns=["problem", "qubits"])
    tables[option] = tab

sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ22_summary_table_input_state = sum_table
RQ22_summary_table_input_state

In [None]:
tables = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
    tab = pd.DataFrame(rows)
    tab = tab.groupby("qubits").sum().drop(columns=["problem", "Arbitrary"])
    tables[option] = tab
    
sum_table = tables[HYBRID].applymap(lambda v: f"{v}").add(tables[NONHYBRID].applymap(lambda v: f" / {v}")).add(tables[FIXED].applymap(lambda v: f" / {v}"))

RQ22_summary_table_qubits = sum_table
RQ22_summary_table_qubits

# Merge Tables for RQ1 and RQ2 and produce tex output

In [None]:
def tex_adapt_table(table, col_fix=True, space_slash=True):
    tex = table.to_latex()
    
    tex = tex.replace("%", "")
    
    # centre columns
    if re.search(r"\{l+\}", tex):
        orig_cols = re.search(r"\{l+\}", tex).group(0)
        cols = orig_cols.replace("l", "c")
        tex = tex.replace(orig_cols, cols)
    
    if col_fix:
        tex = tex.replace("multicolumn{3}{r}{", "multicolumn{3}{c}{")
        tex = tex.replace("multicolumn{4}{r}{", "multicolumn{4}{c}{")
        tex = tex.replace("multicolumn{7}{r}{", "multicolumn{7}{c}{")
        if "Input State & Problem &  &  &  &  &  &  &  &  \\\\" in tex:
            tex = tex.replace("Input State & Problem &  &  &  &  &  &  &  &  \\\\", "")        
            tex = tex.replace(" &  & Optimized & Pareto Equal & Worse & Faulty & Optimized & Pareto Equal & Worse & Faulty",
                              "Input State & Problem & Optimized & Pareto Equal & Worse & Faulty & Optimized & Pareto Equal & Worse & Faulty")
        if "qubits & Problem &  &  &  &  &  &  &  &  \\\\" in tex:
            tex = tex.replace("qubits & Problem &  &  &  &  &  &  &  &  \\\\", "")
            tex = tex.replace(" &  & Optimized & Pareto Equal & Worse & Faulty & Optimized & Pareto Equal & Worse & Faulty",
                              "Qubits & Problem & Optimized & Pareto Equal & Worse & Faulty & Optimized & Pareto Equal & Worse & Faulty")
        
        if "c"*15 in tex:
            tex = tex.replace("c"*15, "c|ccccccc|ccccccc")
        elif "lrrrrrrrrrrrrrrr" in tex:
            tex = tex.replace("lrrrrrrrrrrrrrrr", "c|rrrrr|rrrrr|rrrrr")
        elif "c"*10 in tex:
            tex = tex.replace("c"*10, "cc|cccc|cccc")
        elif "c"*9 in tex:
            tex = tex.replace("c"*9, "c|cccc|cccc")
        elif "c"*8 in tex:
            tex = tex.replace("c"*8, "cc|ccc|ccc")
        elif "c"*7 in tex:
            tex = tex.replace("c"*7, "c|ccc|ccc")
        
            
    # vertically center multirow label
    tex = tex.replace("\\multirow[t]{3}{*}", "\\multirow[t]{3}{*}[-1em]")
    tex = tex.replace("PI", "PI Comparison")
    
    # centre column group headlines
    tex = tex.replace("{l}", "{c}")
    
    tex = tex.replace(f"Fixed", "\\fix")
    tex = tex.replace(f"Non-Hybrid", "\\non")
    tex = tex.replace(f"Hybrid", "\\hyb")
    
    tex = tex.replace("\\hyb_{NGen=50}", "NGen=50")
    tex = tex.replace("\\hyb_{NGen=100}", "NGen=100")
    tex = tex.replace("\\hyb_{N=100}", "N=100")
    tex = tex.replace("\\hyb_{N=200}", "N=200")
    tex = tex.replace("\\hyb_{Init=20}", "Init=20")
    tex = tex.replace("\\hyb_{Q2}", "\\texttt{Q2}")

    
    tex = tex.replace("\\cline{1-5}\n\\bottomrule", "\\bottomrule")
    tex = tex.replace("\\cline{1-10}\n\\bottomrule", "\\bottomrule")
    tex = tex.replace("\\cline{1-9}\n\\bottomrule", "\\bottomrule")    
    
    # problem names
    tex = tex.replace('hamiltonian_simulation_', 'hamiltonian\_').replace('iswap_n2','iswap\_n2')
    tex = tex.replace('quantum_walk', 'quantum\_walk')
    tex = tex.replace('fredkin_n3', 'fredkin\_n3')
    tex = tex.replace('linearsolver_n3',  'linearsolver\_n3')
    tex = tex.replace('quantum_mc_F', 'quantum\_mc\_F')
    tex = tex.replace('teleportation_n3',  'teleportation\_n3')
    tex = tex.replace('tofolli_n3', 'tofolli\_n3')
    tex = tex.replace('wstate_n3',  'wstate\_n3')
    tex = tex.replace('adder_n4',  'adder\_n4')
    tex = tex.replace('bell_n4', 'bell\_n4')
    tex = tex.replace('cat_state_n4', 'cat\_state\_n4')
    tex = tex.replace('hs4_n4',  'hs4\_n4')
    tex = tex.replace('qrng_n4', 'qrng\_n4')
    tex = tex.replace('lpn_n5',  'lpn\_n5')
    tex = tex.replace('qec_en_n5', 'qec\_en\_n5')
    
    
    tex = tex.replace('QSE_', 'QSE\_')
    tex = tex.replace('QSE2_', 'QSE2\_')
    tex = tex.replace('QSO_', 'QSO\_')
    tex = tex.replace('QG_', 'QG\_')
    
    # replace rowcolor
    
    colored_lines = []
    for line in tex.split("\\\\"):
        # print("before", line)
        if '[True]' in line:  # make line arbitrary
            line = line.replace('&', '& \\cellcolor{specificrow}')
            line = line.replace("[True]", "")
        elif '[False]' in line:  # make line specific
            line = line.replace('&', '& \\cellcolor{arbitraryrow}')
            line = line.replace("[False]", "")
        else:
            line = line    
        colored_lines.append(line)
    
    tex = "\\\\".join(colored_lines)
    
    # RQ1 Table:
    # add lines between new Qubit sizes
    tex = tex.replace("cline{1-10}", "hline")
    
    # RQ2 Summary Table:
    tex = tex.replace("Repair &", "\\hline Repair &")
    tex = tex.replace("Specific &", "\\hline Specific &")
    tex = tex.replace("2 qubits &", "\\hline 2 qubits &")
    
    # Don't make spaces so large
    if space_slash:
        tex = tex.replace(" / ", "{\,}/{\,}")
    else:
        tex = tex.replace(" / ", "/")
    
    return tex

def write_table_to_file(table, filepath, col_fix=True, space_slash=True):
    tex = tex_adapt_table(table, col_fix=col_fix, space_slash=space_slash)
    with open(filepath, 'w') as texfile:
        texfile.write(tex)
    return tex

## RQ1

In [None]:
RQ11_table_out = RQ11_table.copy()
RQ12_table_out = RQ12_table.copy()

RQ11_table_out.columns = pd.MultiIndex.from_product([["RQ1.1 (Perfect Accuracy)"], RQ11_table_out.columns])
RQ12_table_out.columns = pd.MultiIndex.from_product([["RQ1.2 (Acceptable Accuracy)"], RQ12_table_out.columns])

RQ1_table_out = pd.concat([RQ11_table_out, RQ12_table_out], axis=1)
display(RQ1_table_out)

write_table_to_file(RQ1_table_out, paper_tables_dir / f"{DATA_PREFIX}-RQ1.tex", col_fix=True )

## RQ2 -- Summary Tables

In [None]:
RQ21_summary_table_sum_out = RQ21_summary_table_sum.copy()
RQ21_summary_table_input_state_out = RQ21_summary_table_input_state.copy()
RQ21_summary_table_qubits_out = RQ21_summary_table_qubits.copy()

RQ22_summary_table_sum_out = RQ22_summary_table_sum.copy()
RQ22_summary_table_input_state_out = RQ22_summary_table_input_state.copy()
RQ22_summary_table_qubits_out = RQ22_summary_table_qubits.copy()

RQ21_summary_concat = pd.concat([RQ21_summary_table_sum_out, RQ21_summary_table_input_state_out, RQ21_summary_table_qubits_out])
RQ21_summary_concat.columns = pd.MultiIndex.from_product([["RQ2.1 (Perfect Accuracy)"], RQ21_summary_concat.columns])

RQ22_summary_concat = pd.concat([RQ22_summary_table_sum_out, RQ22_summary_table_input_state_out, RQ22_summary_table_qubits_out])
RQ22_summary_concat.columns = pd.MultiIndex.from_product([["RQ2.2 (Acceptable Accuracy)"], RQ22_summary_concat.columns])

RQ2_summary_out = pd.concat([RQ21_summary_concat, RQ22_summary_concat], axis=1)
RQ2_summary_out.index = ["Total", "Arbitrary", "Specific", "2 qubits", "3 qubits", "4 qubits", "5 qubits"]
RQ2_summary_out = RQ2_summary_out.reindex(["Total", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"])

display(RQ2_summary_out)

write_table_to_file(RQ2_summary_out, paper_tables_dir / f"{DATA_PREFIX}-RQ2-summary.tex", col_fix=True )

## RQ2 -- Detail Table

In [None]:
RQ21_table_out = RQ21_table.copy()
RQ22_table_out = RQ22_table.copy()
RQ21_table_out.columns = pd.MultiIndex.from_product([["RQ2.1 (Perfect Accuracy)"], RQ21_table_out.columns])
RQ22_table_out.columns = pd.MultiIndex.from_product([["RQ2.2 (Acceptable Accuracy)"], RQ22_table_out.columns])

RQ2_table_out = pd.concat([RQ21_table_out, RQ22_table_out], axis=1)
display(RQ2_table_out)

write_table_to_file(RQ2_table_out, paper_tables_dir / f"{DATA_PREFIX}-RQ2.tex", col_fix=True )

## RQ2 Calculate Relative Improvement (for those that optimized)

In [None]:
rel_opt_cols = ["Gates", "Depth", "NonLocalGates"]

In [None]:
def extract_relative_optimization(row): 
    ref_num_gates, ref_depth, ref_non_local = reference_fitness_values[row.problem]
        
    rel_opt_num_gates = (ref_num_gates - row.num_gates) / ref_num_gates
    rel_depth = (ref_depth - row.depth) / ref_depth
    rel_non_local = (ref_non_local - row.num_nonloc_gates) / ref_non_local
    
    return rel_opt_num_gates, rel_depth, rel_non_local
    

### OL

In [None]:
all_vals_df = pd.read_pickle(OL_file_path)
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

In [None]:
optimization_df = all_vals_df[all_vals_df.repair == False].copy()
optimization_df["Categorisation_OL"] = optimization_df.apply(get_operator_categorisation_OL, axis=1)

In [None]:
optimized_df = optimization_df[optimization_df.Categorisation_OL == "Optimized"].copy()
optimized_df[rel_opt_cols] = optimized_df.apply(extract_relative_optimization, axis=1, result_type="expand")

In [None]:
optimized_df[(optimized_df.option == "") & (optimized_df.problem == "AA4")]

In [None]:
avg_opt_problem = optimized_df[optimized_df.option.isin([HYBRID, NONHYBRID, FIXED])].groupby(["option", "problem"])[rel_opt_cols].mean().reset_index()

# some of the use cases have no optimization. Therefore, the average is zero!
avg_optimization = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    missing = 38 - len(avg_opt_problem[avg_opt_problem.option == option])
    avg_optimization[better_options[option]] = np.concatenate([avg_opt_problem[avg_opt_problem.option == option][rel_opt_cols].to_numpy(),  np.zeros((missing, 3))]).mean(axis=0)

# Q2 Optimization
rows = []
for name, opt_vals in QISKIT_opt_results.items():
    if ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) > 0 ).any():  # if any is better, calculate improvement
        rows.append(extract_relative_optimization(pd.Series(dict(problem=name, num_gates=opt_vals[0], depth=opt_vals[1], num_nonloc_gates=opt_vals[2]))))
    else:  # otherwise, use zeros
        rows.append([0,0,0])
        
# add default optimization of Q2 (Qiskit)
avg_optimization["Q2"] = np.array(rows).mean(axis=0)
    
print("OL (Theoretical) in percent")
average_rel_improve_OL = (pd.DataFrame(avg_optimization, index=rel_opt_cols).T * 100).round(1).astype(str) + "%"
average_rel_improve_OL

In [None]:
optimized_df

In [None]:
by_approach = {}
for opt in [HYBRID, NONHYBRID, FIXED]:
    approach_optimized_df = optimized_df[optimized_df.option == opt]
    grouped = (approach_optimized_df.groupby(["problem"])[rel_opt_cols].mean() * 100).round(1).astype(str) + "%"
    # display(grouped)
    
    # add those rows that had now optimization...
    add_indices = pd.Index(optimization_df.problem.unique()).difference(grouped.index)
    add_df = pd.DataFrame(index=add_indices, columns=grouped.columns).fillna("---")
    
    merged_df = (pd.concat([grouped,add_df]).reindex(optimization_df.problem.sort_values().unique()))
    # merged_df.columns =  pd.MultiIndex.from_product([[better_options[opt]], merged_df.columns])
    by_approach[opt] = merged_df

# Q2 Optimization
rows = []
for name, opt_vals in QISKIT_opt_results.items():
    # if name not in list(optimization_df.problem.unique()):
    #     continue
    if ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) == 0 ).all():  # not optimized
        rows.append([name] + ["---", "---", "---"])
    elif ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) > 0 ).any():  # if any is better, calculate improvement
        extracts = extract_relative_optimization(pd.Series(dict(problem=name, num_gates=opt_vals[0], depth=opt_vals[1], num_nonloc_gates=opt_vals[2])))
        extracts = ( (pd.Series(extracts) * 100).round(1).astype(str) + "%").tolist() 
        rows.append([name] + list(extracts))
    else:  # otherwise, not optimized
        rows.append([name] + ["---", "---", "---"])
        
# add default optimization of Q2 (Qiskit)
by_approach["Q2"] = pd.DataFrame(rows, columns=["problem"] + rel_opt_cols).set_index("problem")
    
individual_rel_improve_OL = by_approach[HYBRID].applymap(lambda v: f"{v}").add(by_approach[NONHYBRID].applymap(lambda v: f" / {v}")).add(by_approach[FIXED].applymap(lambda v: f" / {v}")).add(by_approach["Q2"].applymap(lambda v: f" / {v}"))
individual_rel_improve_OL["qubits"] = list(pd.Series(individual_rel_improve_OL.index).apply(lambda i: QUBITS_and_ARBITRARY[i][0]))
individual_rel_improve_OL["arbitrary"] = list(pd.Series(individual_rel_improve_OL.index).apply(lambda i: QUBITS_and_ARBITRARY[i][1]))
individual_rel_improve_OL["problem"] = list(pd.Series(individual_rel_improve_OL.index))
individual_rel_improve_OL["Problem"] = individual_rel_improve_OL.apply(lambda row: "[" + str(row["arbitrary"]) + "]" + row.problem, axis=1)
individual_rel_improve_OL = individual_rel_improve_OL.sort_values(["qubits", "problem"])
individual_rel_improve_OL.index = pd.MultiIndex.from_frame(individual_rel_improve_OL[["qubits", "Problem"]])
individual_rel_improve_OL = individual_rel_improve_OL.drop(columns=["qubits", "problem", "Problem", "arbitrary"])
individual_rel_improve_OL

### OED

In [None]:
all_vals_df = pd.read_pickle(OED_file_path)
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

In [None]:
optimization_df = all_vals_df[all_vals_df.repair == False].copy()
optimization_df["Categorisation_OED"] = optimization_df.apply(get_operator_categorisation_OED, axis=1)

In [None]:
optimized_df = optimization_df[optimization_df.Categorisation_OED == "Optimized"].copy()
optimized_df[rel_opt_cols] = optimized_df.apply(extract_relative_optimization, axis=1, result_type="expand")

In [None]:
avg_opt_problem = optimized_df[optimized_df.option.isin([HYBRID, NONHYBRID, FIXED])].groupby(["option", "problem"])[rel_opt_cols].mean().reset_index()

avg_optimization = {}
for option in [HYBRID, NONHYBRID, FIXED]:
    missing = 38 - len(avg_opt_problem[avg_opt_problem.option == option])
    avg_optimization[better_options[option]] = np.concatenate([avg_opt_problem[avg_opt_problem.option == option][rel_opt_cols].to_numpy(),  np.zeros((missing, 3))]).mean(axis=0)

# Q2 Optimization
rows = []
for name, opt_vals in QISKIT_opt_results.items():
    if ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) > 0 ).any():  # if any is better, calculate improvement
        rows.append(extract_relative_optimization(pd.Series(dict(problem=name, num_gates=opt_vals[0], depth=opt_vals[1], num_nonloc_gates=opt_vals[2]))))
    else:  # otherwise, use zeros
        rows.append([0,0,0])
        
avg_optimization["Q2"] = np.array(rows).mean(axis=0)
    
print("OED (Practical) in percent")
(pd.DataFrame(avg_optimization, index=rel_opt_cols).T * 100).round(2)

average_rel_improve_OED = (pd.DataFrame(avg_optimization, index=rel_opt_cols).T * 100).round(1).astype(str) + "%"
average_rel_improve_OED

In [None]:
by_approach = {}
for opt in [HYBRID, NONHYBRID, FIXED]:
    approach_optimized_df = optimized_df[optimized_df.option == opt]
    grouped = (approach_optimized_df.groupby(["problem"])[rel_opt_cols].mean() * 100).round(1).astype(str) + "%"
    # display(grouped)
    
    # add those rows that had now optimization...
    add_indices = pd.Index(optimization_df.problem.unique()).difference(grouped.index)
    add_df = pd.DataFrame(index=add_indices, columns=grouped.columns).fillna("---")
    
    merged_df = (pd.concat([grouped,add_df]).reindex(optimization_df.problem.sort_values().unique()))
    # merged_df.columns =  pd.MultiIndex.from_product([[better_options[opt]], merged_df.columns])
    by_approach[opt] = merged_df
    # display(merged_df)

# Q2 Optimization
rows = []
for name, opt_vals in QISKIT_opt_results.items():
    # if name not in list(optimization_df.problem.unique()):
    #     continue
    if ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) == 0 ).all():  # not optimized
        rows.append([name] + ["---", "---", "---"])
    elif ((np.array(reference_fitness_values[name]) - np.array(opt_vals)) > 0 ).any():  # if any is better, calculate improvement
        extracts = extract_relative_optimization(pd.Series(dict(problem=name, num_gates=opt_vals[0], depth=opt_vals[1], num_nonloc_gates=opt_vals[2])))
        extracts = ( (pd.Series(extracts) * 100).round(1).astype(str) + "%").tolist() 
        rows.append([name] + list(extracts))
    else:  # otherwise, not optimized
        rows.append([name] + ["---", "---", "---"])
        
# add default optimization of Q2 (Qiskit)
by_approach["Q2"] = pd.DataFrame(rows, columns=["problem"] + rel_opt_cols).set_index("problem")
    
# individual_rel_improve_OED = by_approach[HYBRID].applymap(lambda v: f"{v}").add(by_approach[NONHYBRID].applymap(lambda v: f" / {v}")).add(by_approach[FIXED].applymap(lambda v: f" / {v}")).add(by_approach["Q2"].applymap(lambda v: f" / {v}"))
# individual_rel_improve_OED

individual_rel_improve_OED = by_approach[HYBRID].applymap(lambda v: f"{v}").add(by_approach[NONHYBRID].applymap(lambda v: f" / {v}")).add(by_approach[FIXED].applymap(lambda v: f" / {v}")).add(by_approach["Q2"].applymap(lambda v: f" / {v}"))
individual_rel_improve_OED["qubits"] = list(pd.Series(individual_rel_improve_OED.index).apply(lambda i: QUBITS_and_ARBITRARY[i][0]))
individual_rel_improve_OED["arbitrary"] = list(pd.Series(individual_rel_improve_OED.index).apply(lambda i: QUBITS_and_ARBITRARY[i][1]))
individual_rel_improve_OED["problem"] = list(pd.Series(individual_rel_improve_OED.index))
individual_rel_improve_OED["Problem"] = individual_rel_improve_OED.apply(lambda row: "[" + str(row["arbitrary"]) + "]" + row.problem, axis=1)
individual_rel_improve_OED = individual_rel_improve_OED.sort_values(["qubits", "problem"])
individual_rel_improve_OED.index = pd.MultiIndex.from_frame(individual_rel_improve_OED[["qubits", "Problem"]])
individual_rel_improve_OED = individual_rel_improve_OED.drop(columns=["qubits", "problem", "Problem", "arbitrary"])
individual_rel_improve_OED

### Output

In [None]:
individual_rel_improve_OL_out = individual_rel_improve_OL.copy()
individual_rel_improve_OED_out = individual_rel_improve_OED.copy()

individual_rel_improve_OL_out.columns = pd.MultiIndex.from_product([["RQ2.1 (Perfect Accuracy)"], individual_rel_improve_OL_out.columns])
individual_rel_improve_OED_out.columns = pd.MultiIndex.from_product([["RQ2.2 (Acceptable Accuracy)"], individual_rel_improve_OED_out.columns])

individual_rel_improve_out = pd.concat([individual_rel_improve_OL_out, individual_rel_improve_OED_out], axis=1)
display(individual_rel_improve_out)

write_table_to_file(individual_rel_improve_out, paper_tables_dir / f"{DATA_PREFIX}-rel-improve.tex", col_fix=True )

In [None]:
average_rel_improve_OL_out = average_rel_improve_OL.copy()
average_rel_improve_OED_out = average_rel_improve_OED.copy()

average_rel_improve_OL_out.columns = pd.MultiIndex.from_product([["RQ2.1 (Perfect Accuracy)"], average_rel_improve_OL_out.columns])
average_rel_improve_OED_out.columns = pd.MultiIndex.from_product([["RQ2.2 (Acceptable Accuracy)"], average_rel_improve_OED_out.columns])

average_rel_improve_out = pd.concat([average_rel_improve_OL_out, average_rel_improve_OED_out], axis=1)
display(average_rel_improve_out)

write_table_to_file(average_rel_improve_out, paper_tables_dir / f"{DATA_PREFIX}-avg-rel-improve.tex", col_fix=True )

# RQ4 

In [None]:
def extract_best_solutions_max_gen(files, earliest_finish=None, max_gen=100):
    earliest_finish_times = {}
    if earliest_finish:
        for earliest_finish_file in list(results_path.glob("*earliest_finish.csv")):
            problem = earliest_finish_file.stem.replace("_earliest_finish", "")

            earliest_finish_df = pd.read_csv(earliest_finish_file)
            earliest_finish_time = earliest_finish_df.iloc[0][earliest_finish]
            earliest_finish_times[problem] = earliest_finish_time
    # print("Earliest finish times:")
    # pprint(earliest_finish_times)
    best_OL_rows = []
    best_OED_rows = []
    # extract last gen value
    for pi_file in sorted(files):
        problem, seed, option = extract_info_from_file(pi_file)
        qubits, arbitrary = QUBITS_and_ARBITRARY[problem]
        
        last_row = dict(problem=problem, option=option, seed=seed, qubits=qubits, arbitrary=arbitrary)
        results_file_df = pd.read_csv(pi_file)
    
        if max_gen and max_gen > 0:
            results_file_df = results_file_df[results_file_df.ngen < max_gen]  # filter max_gen
            results_file_df["option"] = f"Ngen_{max_gen}"
        
        if earliest_finish:
            if problem not in earliest_finish_times:
                print("WARNING!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                print("No earliest finish time for problem", problem)
            else:
                results_file_df = results_file_df[results_file_df.timestamp <= earliest_finish_df[earliest_finish].iloc[0]]
        
        last_gen_df = results_file_df[results_file_df.ngen == results_file_df.ngen.max()].reset_index()
        last_gen_df["no_error_prob_actual"] = 1 - last_gen_df.apply(lambda row: get_actual_error_rate(row.num_gates, row.num_nonloc_gates), axis=1)
        last_gen_df["OED"] = last_gen_df.overlap * last_gen_df.no_error_prob_actual
        
        best_OL_row = dict(last_row)
        best_OL_row.update(last_gen_df.sort_values("overlap", ascending=False).iloc[0].to_dict())
        best_OL_rows.append(best_OL_row)
        
        best_OED_row = dict(last_row)
        best_OED_row.update(last_gen_df.sort_values("OED", ascending=False).iloc[0].to_dict())
        best_OED_rows.append(best_OED_row)

    OL_df = pd.DataFrame(best_OL_rows).sort_values(by=["problem", "option", "seed"])
    OED_df = pd.DataFrame(best_OED_rows).sort_values(by=["problem", "option", "seed"])
    
    return OL_df, OED_df

In [None]:
output_files = list(raw_results_path.glob("*.csv"))
output_files = [f for f in output_files if "logbook" not in str(f)]
output_files = [f for f in output_files if "seed" in str(f)]
output_files = [f for f in output_files if "PI.csv" not in str(f)]
output_files = [f for f in output_files if "DCI.csv" not in str(f)]
output_files = [f for f in output_files if "HVrefpoint.csv" not in str(f)]
output_files = [f for f in output_files if "globalPareto.csv" not in str(f)]
output_files = [f for f in output_files if "earliest_finish.csv" not in str(f)]

hybrid_only_files = output_files
hybrid_only_files = [f for f in hybrid_only_files if "gateset_fixed" not in str(f)]
hybrid_only_files = [f for f in hybrid_only_files if "Q2" not in str(f)]
hybrid_only_files = [f for f in hybrid_only_files if "init_pop" not in str(f)]
hybrid_only_files = [f for f in hybrid_only_files if "N_100" not in str(f)]
hybrid_only_files = [f for f in hybrid_only_files if "N_200" not in str(f)]
len(hybrid_only_files)
# hybrid_only_files

## Get the data for the Hybrid Search after 50 and 100 generations

In [None]:
%%time
OL_file_path_100gen = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OL_100gen.pkl")
OL_file_path_50gen = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OL_50gen.pkl")
OED_file_path_100gen = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OED_100gen.pkl")
OED_file_path_50gen = Path(f"{DATA_PREFIX.replace('%', '')}_vals_OED_50gen.pkl")

ngen_100_OL_df = None
ngen_100_OED_df = None

if OL_file_path_100gen.exists() and OED_file_path_100gen.exists():
    ngen_100_OL_df = pd.read_pickle(OL_file_path_100gen)
    ngen_100_OED_df = pd.read_pickle(OED_file_path_100gen)
else:
    ngen_100_OL_df, ngen_100_OED_df = extract_best_solutions_max_gen(hybrid_only_files, max_gen=100)
    ngen_100_OL_df.to_pickle(OL_file_path_100gen)
    ngen_100_OED_df.to_pickle(OED_file_path_100gen)
    
ngen_50_OL_df = None
ngen_50_OED_df = None
if OL_file_path_50gen.exists() and OED_file_path_50gen.exists():
    ngen_50_OL_df = pd.read_pickle(OL_file_path_50gen)
    ngen_50_OED_df = pd.read_pickle(OED_file_path_50gen)
else:
    ngen_50_OL_df, ngen_50_OED_df = extract_best_solutions_max_gen(hybrid_only_files, max_gen=50)
    ngen_50_OL_df.to_pickle(OL_file_path_50gen)
    ngen_50_OED_df.to_pickle(OED_file_path_50gen)

In [None]:
hybrid_search_settings = [k for k in better_options.keys() if k not in [FIXED, NONHYBRID]]
hybrid_search_settings

## All - Theoretical

In [None]:
all_vals_df = pd.concat([pd.read_pickle(OL_file_path), ngen_100_OL_df, ngen_50_OL_df])
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

# Hybrid init_pop and Hybrid are the same for all repair use cases. 
# all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)]

hybrid_init_pop_repair = all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)].copy()
hybrid_init_pop_repair["option"] = "init_pop_20"

all_vals_df = pd.concat([all_vals_df, hybrid_init_pop_repair])

In [None]:
optimization_df = all_vals_df  #[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OL"] = optimization_df.apply(get_operator_categorisation_OL, axis=1)

#### SUM

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.sum()
    tab = pd.DataFrame(tab).T
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary"])    

    tables[option] = tab
    # display(tab)
    
RQ41_summary_table_sum = pd.concat(tables.values(), axis=1)
RQ41_summary_table_sum

#### Repair/Optimization

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        is_repair = optimization_df[optimization_df.problem == problem].repair.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb, repair=("Repair" if is_repair else "Optimization"))

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.groupby("repair").sum()
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary"])    

    tables[option] = tab
    # display(tab)
    
RQ41_summary_table_repair = pd.concat(tables.values(), axis=1)
RQ41_summary_table_repair

#### Input State

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.groupby("Arbitrary").sum()
    # display(tab)
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits"])    

    tables[option] = tab
    # display(tab)
    
RQ41_summary_table_input_state = pd.concat(tables.values(), axis=1)
RQ41_summary_table_input_state

#### Qubits

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    tables[option] = tab
    
for option, tab in tables.items():
    tab["qubits"] = tab.qubits.astype(str) + " qubits"
    tab = tab.groupby("qubits").sum()
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])
    tab = tab.drop(columns=["problem", "Arbitrary"])    
    tables[option] = tab
    
    # display(tab)
    
RQ41_summary_table_qubits = pd.concat(tables.values(), axis=1)
RQ41_summary_table_qubits

## All - Practical

In [None]:
all_vals_df = pd.concat([pd.read_pickle(OED_file_path), ngen_100_OED_df, ngen_50_OED_df])

all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

hybrid_init_pop_repair = all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)].copy()
hybrid_init_pop_repair["option"] = "init_pop_20"

all_vals_df = pd.concat([all_vals_df, hybrid_init_pop_repair])

In [None]:
optimization_df = all_vals_df  #[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OED"] = optimization_df.apply(get_operator_categorisation_OED, axis=1)

#### SUM

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.sum()
    tab = pd.DataFrame(tab).T
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary"])    

    tables[option] = tab
    # display(tab)
    
RQ42_summary_table_sum = pd.concat(tables.values(), axis=1)
RQ42_summary_table_sum

#### Repair/Optimization

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        repair = optimization_df[optimization_df.problem == problem].repair.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb, repair="Repair" if repair else "Optimization")

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.groupby("repair").sum()
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary"])    

    tables[option] = tab
    # display(tab)
    
RQ42_summary_table_repair = pd.concat(tables.values(), axis=1)
RQ42_summary_table_repair

#### Input State

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    tab = tab.groupby("Arbitrary").sum()
    # display(tab)
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])  
    tab = tab.drop(columns=["problem", "qubits"])    

    tables[option] = tab
    # display(tab)
    
RQ42_summary_table_input_state = pd.concat(tables.values(), axis=1)
RQ42_summary_table_input_state

#### Qubits

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    tables[option] = tab
    
for option, tab in tables.items():
    tab["qubits"] = tab.qubits.astype(str) + " qubits"
    tab = tab.groupby("qubits").sum()
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab = tab.sort_values(["qubits", "problem"])
    tab = tab.drop(columns=["problem", "Arbitrary"])    
    tables[option] = tab
    
    # display(tab)
    
RQ42_summary_table_qubits = pd.concat(tables.values(), axis=1)
RQ42_summary_table_qubits

## Merge Tables

In [None]:
RQ41_summary_table_sum_out = RQ41_summary_table_sum.copy()
RQ41_summary_table_repair_out = RQ41_summary_table_repair.copy()
RQ41_summary_table_input_state_out = RQ41_summary_table_input_state.copy()
RQ41_summary_table_qubits_out = RQ41_summary_table_qubits.copy()

RQ42_summary_table_sum_out = RQ42_summary_table_sum.copy()
RQ42_summary_table_repair_out = RQ42_summary_table_repair.copy()
RQ42_summary_table_input_state_out = RQ42_summary_table_input_state.copy()
RQ42_summary_table_qubits_out = RQ42_summary_table_qubits.copy()

RQ41_summary_concat = pd.concat([RQ41_summary_table_sum_out, RQ41_summary_table_repair_out, RQ41_summary_table_input_state_out, RQ41_summary_table_qubits_out])
RQ41_summary_concat.columns = pd.MultiIndex.from_product([["RQ3.1 (Perfect Accuracy)"], RQ41_summary_concat.columns])

RQ42_summary_concat = pd.concat([RQ42_summary_table_sum_out, RQ42_summary_table_repair_out, RQ42_summary_table_input_state_out, RQ42_summary_table_qubits_out])
RQ42_summary_concat.columns = pd.MultiIndex.from_product([["RQ3.2 (Acceptable Accuracy)"], RQ42_summary_concat.columns])

RQ4_summary_out = pd.concat([RQ41_summary_concat, RQ42_summary_concat], axis=1)
RQ4_summary_out.index = ["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"]
# RQ4_summary_out = RQ4_summary_out.reindex(["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"])
display(RQ4_summary_out)


print("Perfect Accuracy")
RQ41_summary_concat_out = pd.concat([RQ41_summary_table_sum_out, RQ41_summary_table_repair_out, RQ41_summary_table_input_state_out, RQ41_summary_table_qubits_out])
RQ41_summary_concat_out.index = ["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"]
# RQ41_summary_concat_out = RQ41_summary_concat_out.reindex(["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"])
display(RQ41_summary_concat_out)

print("Acceptable Accuracy")
RQ42_summary_concat_out = pd.concat([RQ42_summary_table_sum_out, RQ42_summary_table_repair_out, RQ42_summary_table_input_state_out, RQ42_summary_table_qubits_out])
RQ42_summary_concat_out.index = ["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"]
# RQ42_summary_concat_out = RQ42_summary_concat_out.reindex(["Total", "Repair", "Optimization", "Specific", "Arbitrary", "2 qubits", "3 qubits", "4 qubits", "5 qubits"])
display(RQ42_summary_concat_out)


write_table_to_file(RQ41_summary_concat_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-summary-perfect.tex", col_fix=False, space_slash=False)
write_table_to_file(RQ42_summary_concat_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-summary-acceptable.tex", col_fix=False, space_slash=False )
write_table_to_file(RQ4_summary_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-summary.tex", col_fix=True, space_slash=False )

## Individual Problems -- Theoretical

In [None]:
all_vals_df = pd.concat([pd.read_pickle(OL_file_path), ngen_100_OL_df, ngen_50_OL_df])
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

# Hybrid init_pop and Hybrid are the same for all repair use cases. 
# all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)]

hybrid_init_pop_repair = all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)].copy()
hybrid_init_pop_repair["option"] = "init_pop_20"

all_vals_df = pd.concat([all_vals_df, hybrid_init_pop_repair])

In [None]:
optimization_df = all_vals_df  #[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OL"] = optimization_df.apply(get_operator_categorisation_OL, axis=1)

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OL == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    # tab = tab.sum()
    # tab = pd.DataFrame(tab).T
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab["qubits"] = tab.qubits.astype(str) + " qubits"
    tab["arbitrary"] = tab.problem.apply(lambda i: QUBITS_and_ARBITRARY[i][1])
    tab["Problem"] = tab.apply(lambda row: "[" + str(row["arbitrary"]) + "]" + row.problem, axis=1)
    tab = tab.sort_values(["qubits", "problem"])  
    tab.index = pd.MultiIndex.from_frame(tab[["qubits", "Problem"]])
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary", "arbitrary", "Problem"])    
    tables[option] = tab
    # display(tab)
    
RQ41_individual_concat = pd.concat(tables.values(), axis=1)
RQ41_individual_concat

## Individual Problems -- Practical

In [None]:
all_vals_df = pd.concat([pd.read_pickle(OED_file_path), ngen_100_OL_df, ngen_50_OL_df])
all_vals_df["repair"] = all_vals_df.problem.apply(lambda p: p in repair_circuits)

# Hybrid init_pop and Hybrid are the same for all repair use cases. 
# all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)]

hybrid_init_pop_repair = all_vals_df[all_vals_df.repair & (all_vals_df.option == HYBRID)].copy()
hybrid_init_pop_repair["option"] = "init_pop_20"

all_vals_df = pd.concat([all_vals_df, hybrid_init_pop_repair])

In [None]:
optimization_df = all_vals_df  #[all_vals_df.repair == False].reset_index()
optimization_df["Categorisation_OED"] = optimization_df.apply(get_operator_categorisation_OL, axis=1)

In [None]:
tables = {}
for option in hybrid_search_settings:
    print(better_options[option])
    rows = []
    for problem in optimization_df.problem.unique():
        # print(problem)
        qubits=optimization_df[optimization_df.problem == problem].qubits.iloc[0]
        arbitrary = optimization_df[optimization_df.problem == problem].arbitrary.iloc[0]
        if arbitrary:
            arb = "Arbitrary"
        else:
            arb = "Specific"
        row = dict(problem = problem, qubits=qubits, Arbitrary=arb)

        for which in ["Optimized", "Pareto Equal", "Worse", "Faulty"]:
            row[which] = len(optimization_df[
                (optimization_df.option == option) & 
                (optimization_df.problem == problem) & 
                (optimization_df.Categorisation_OED == which)                
            ])
        rows.append(row)
        
    tab = pd.DataFrame(rows)
    # tab = tab.reindex(tab["Arbitrary"])
    tables[option] = tab
    
    # tab = tab.sum().drop(columns=["problem", "qubits"])
    # tables[option] = pd.DataFrame(tab)
    
for option, tab in tables.items():
    # tab = tab.sum()
    # tab = pd.DataFrame(tab).T
    
    tab[better_options[option]] = tab.apply(lambda row: " / ".join(row[["Optimized", "Pareto Equal", "Worse", "Faulty"]].astype(str)), axis=1)
    tab = tab.drop(columns=["Optimized", "Pareto Equal", "Worse", "Faulty"])
    tab["qubits"] = tab.qubits.astype(str) + " qubits"
    tab["arbitrary"] = tab.problem.apply(lambda i: QUBITS_and_ARBITRARY[i][1])
    tab["Problem"] = tab.apply(lambda row: "[" + str(row["arbitrary"]) + "]" + row.problem, axis=1)
    tab = tab.sort_values(["qubits", "problem"])  
    tab.index = pd.MultiIndex.from_frame(tab[["qubits", "Problem"]])
    tab = tab.drop(columns=["problem", "qubits", "Arbitrary", "arbitrary", "Problem"])    
    tables[option] = tab
    # display(tab)
    
RQ42_individual_concat = pd.concat(tables.values(), axis=1)
RQ42_individual_concat

## Merge Individual Tables

In [None]:
RQ41_individual_concat_out = RQ41_individual_concat.copy()
RQ42_individual_concat_out = RQ42_individual_concat.copy()

display(RQ41_individual_concat_out)
display(RQ42_individual_concat_out)

RQ41_individual_concat_out_multi_index = RQ41_individual_concat_out.copy()
RQ42_individual_concat_out_multi_index = RQ42_individual_concat_out.copy()
RQ41_individual_concat_out_multi_index.columns = pd.MultiIndex.from_product([["RQ4.1 (Perfect Accuracy)"], RQ41_individual_concat_out.columns])
RQ42_individual_concat_out_multi_index.columns = pd.MultiIndex.from_product([["RQ2.2 (Acceptable Accuracy)"], RQ42_individual_concat_out.columns])
RQ4_individual_concat_out = pd.concat([RQ41_individual_concat_out_multi_index, RQ42_individual_concat_out_multi_index], axis=1)
display(RQ4_individual_concat_out)

# write_table_to_file(individual_rel_improve_out, paper_tables_dir / f"{DATA_PREFIX}-rel-improve.tex", col_fix=True )
write_table_to_file(RQ41_individual_concat_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-individual-perfect.tex", col_fix=False, space_slash=False)
write_table_to_file(RQ42_individual_concat_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-individual-acceptable.tex", col_fix=False, space_slash=False )
write_table_to_file(RQ4_summary_out, paper_tables_dir / f"{DATA_PREFIX}-RQ4-individual.tex", col_fix=True, space_slash=False )