## Results Section 6.4: Clifford+T gate set
In this notebook, we outline the post-processing procedure for Section 6.4. Specifically, we reproduce the results from Figure 6 and Figure 4.

### Imports and setup

In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from analyzer import main_analysis
import os
import warnings

In [None]:
source_file = "../../data/times.csv"
source_output = '../../data/output'
output_folder = "../../data/paper"
mosca_data_path = '../../data/baselines/mosca'
gheorghiu_data_path = '../../data/baselines/gheorgiu'

In [None]:
df = pd.read_csv(source_file, sep='\t', header=None)
df.columns = ['filename', 'time', 'std', 'n_runs', 'successful_runs', 'optimal_runs']

### Defining Helper functions
We need to load the data from Mosca and Gheorghiu from the raw output files. As discussed in the README we could not include their code


In [None]:
def parse_mosca(filename):
    """
    Parses the output file generated by MOSCA and extracts the execution time and number of gates.

    Parameters:
    filename (str): The path to the MOSCA output file.

    Returns:
    tuple: A tuple containing the execution time (float) and the number of gates (int).
           If the output file does not contain the required information, returns (None, None).
    """
    with open(filename) as f:
        raw_output = f.read()
        if 'Path = ' not in raw_output:
            return None, None
        time = raw_output.split('Execution time = ')[1].split('\n')[0]
        time = float(time)
        num_gates = len(raw_output.split('Path = ')[1].split('\n')[0].split(':'))

    return time, num_gates

def parse_gheorgiu(filename):
    """
    Parses the output file generated by Gheorgiu.

    Args:
        filename (str): The path to the output file.

    Returns:
        tuple: A tuple containing the execution time (in seconds) and the number of gates.
               If the output file does not contain the expected information, returns (None, None).
    """
    with open(filename) as f:
        raw_output = f.read()
        if 'OUT:' not in raw_output:
            return None, None
        time = raw_output.split('\n')[-1].split('Took: ')[1].split(' ms')[0]
        time = float(time) / 1000
        num_gates = int(raw_output.split('T-depth: ')[1].split('\n')[0])

    return time, num_gates

### Table 6

In [None]:
results = []
os.makedirs(os.path.join(output_folder, '64', 'comparison'), exist_ok=True)
for folder in os.listdir(os.path.join(source_output, '64', 'comparison', 'tcount')):
    print(folder)
    output = dict()
    full_folder = os.path.join(source_output, '64', 'comparison', 'tcount', folder)
    # search in df for the time
    try:
        element = df[df['filename'] == full_folder[6:] + '/'].to_dict('records')[0]
    except IndexError:
        warnings.warn(f"Could not find an optimal implementation for {folder} in the dataframe. This is caused due to variation between the runs. Running the command for this again should fix this.")
        continue
    if element['optimal_runs'] < 100: # need to add to time until we are at 12 hours
        element['time'] = 12 * 3600 / element['optimal_runs']
    element['time'] = round(element['time'], 2)
    
    output['name'] = folder
    output['tcount_time'] = element['time']
    _, _, _, best_t_count_circ, _, _ = main_analysis(full_folder)
    output['tcount'] = best_t_count_circ.t_count
    best_t_count_circ.circuit.qasm(formatted=True, filename=os.path.join(output_folder, '64', 'comparison', f'{folder}_tcount.qasm'))
    time_baseline, gates_baseline = parse_mosca(os.path.join(mosca_data_path, 'comparison', f'{folder}.txt'))
    output['baseline_time_count'] = time_baseline
    output['baseline_gates_count'] = gates_baseline
    if time_baseline is not None:
        output['tcount_speedup'] = time_baseline / element['time']
    
    full_folder = os.path.join(source_output, '64', 'comparison', 'tdepth', folder)
    # search in df for the time
    try:
        element = df[df['filename'] == full_folder[6:] + '/'].to_dict('records')[0]
    except IndexError:
        warnings.warn(f"Could not find an optimal implementation for {folder} in the dataframe. This is caused due to variation between the runs. Running the command for this again should fix this.")
        continue
    if element['optimal_runs'] < 100: # need to add to time until we are at 12 hours
        element['time'] = 12 * 3600 / element['optimal_runs']
    element['time'] = round(element['time'], 2)
    
    output['name'] = folder
    output['tdepth_time'] = element['time']
    _, _, _, _, best_t_depth_circ, _ = main_analysis(full_folder)
    output['tdepth'] = best_t_depth_circ.t_depth

    time_baseline, gates_baseline = parse_gheorgiu(os.path.join(gheorghiu_data_path, 'comparison', f'{folder}.txt'))
    output['baseline_time_depth'] = time_baseline
    output['baseline_gates_depth'] = gates_baseline
    if time_baseline is not None:
        output['tdepth_speedup'] = time_baseline / element['time']
    
    best_t_depth_circ.circuit.qasm(formatted=True, filename=os.path.join(output_folder, '64', 'comparison', f'{folder}_tdepth.qasm'))

    results.append(output)

results = pd.DataFrame(results)
results.to_csv(os.path.join(output_folder, '64', 'comparison', 'table6.csv'), index=False)
results

### Figure 4

We first load all necessary data and preprocess it.

In [None]:
csv = df.copy()
csv.columns = ["folder", "time", "std", "runs", "succ_runs", "real_success"]
csv = csv[csv["folder"].str.startswith("data/output/64/permutations/")]
csv.drop(["std", "runs", "succ_runs"], axis=1, inplace=True)
csv["kind_theirs"] = csv["folder"].apply(lambda x: x.split("/")[-4])
csv["kind"] = csv["folder"].apply(lambda x: x.split("/")[-3])
csv["theirs"] = csv["kind_theirs"].isin(["tcount", "tdepth"])
csv["kind"] = np.where(csv["theirs"], csv["kind_theirs"], csv["kind"])
csv["number"] = csv["folder"].apply(lambda x: int(x.split("/")[-2]))
csv.drop(["folder"], inplace=True, axis=1)
# adjust the time where number equals to set to the one where csv["theirs"] is true
csv = csv.merge(csv[csv["theirs"]][["time", "number", "kind"]], how="left", on=["number", "kind"])
csv["time_ours"] = csv["time_x"]
csv["time"] = np.where(csv["time_y"].isna(), csv["time_x"], csv["time_y"])
csv.drop(["time_x", "time_y"], inplace=True, axis=1)
csv = csv[np.logical_not(csv["theirs"])]

groups_same = {
    0: [0],
    1: [1,2,5,6,13,22,29],
    2: [3,4,7,8,11,12,15,16,19,20,23,24,27,28],
    3: [9,10,14,17,18,21,25,26]
}

# invert the groups, make each elemtn in the list a single key
groups = {v: k for k, vs in groups_same.items() for v in vs}

normal_data = {
    0: (0, 0), #T-count, T-depth
    1: (7, 3),
    2: (8, 3),
    3: (15, 7)
}
csv["tcount"] = csv["number"].apply(lambda x: normal_data[groups[x]][0])
csv["tdepth"] = csv["number"].apply(lambda x: normal_data[groups[x]][1])
csv.drop(["real_success"], axis=1, inplace=True)

mosca_data = []
for i in range(0, 30):
    with open(os.path.join(mosca_data_path, 'permutations', str(i) + ".txt")) as f:
        tcount = None
        time = 0
        for line in f.readlines():
            if line.startswith("Path :  "):
                tcount = len(line.split(","))
            if line.startswith("Execution time :  "):
                time = float(line.strip()[18:])
        mosca_data.append({
            "number": i,
            "time": time,
            "tcount": tcount,
            "kind": "tcount"
        })
        
mosca = pd.DataFrame(mosca_data)

gheorgiou_data = [{
            "number": 0,
            "time": None,
            "tdepth": None,
            "kind": "tdepth"
        }]
file = os.path.join(gheorghiu_data_path, 'permutations.txt')
with open(file) as f:
    current_number = None
    for i, line in enumerate(f.readlines()):
        if i % 4 == 0:
            current_number = int(line.strip())
        elif i % 4 == 2:
            if "T-depth" in line:
                tdepth = int(line.split(" ")[1])
            else:
                tdepth = None
        elif i % 4 == 3:
            time = float(line.split(" ")[1]) / 1000
            gheorgiou_data.append({
                "number": current_number,
                "time": time,
                "tdepth": tdepth,
                "kind": "tdepth"
            })

gheorgiou = pd.DataFrame(gheorgiou_data)

csv["origin"] = "ours"
gheorgiou["origin"] = "gheorgiou"
mosca["origin"] = "mosca"
# merge all three
all_data = pd.concat([csv, gheorgiou, mosca])

csv_tcount = csv[csv["kind"] == "tcount"]
csv_tcount.sort_values(["tcount", "time"], inplace=True)
csv_tcount.reset_index(inplace=True)
other_numbering = {
    el["number"]: i for i, el in enumerate(csv_tcount.to_dict(orient="records"))
}

csv_tdepth = csv[csv["kind"] == "tdepth"]
csv_tdepth.sort_values(["tdepth", "time"], inplace=True)
csv_tdepth.reset_index(inplace=True)
other_numbering_tdepth = {
    el["number"]: i for i, el in enumerate(csv_tdepth.to_dict(orient="records"))
}

tcount_data = all_data[all_data["kind"] == "tcount"].copy()
tdepth_data = all_data[all_data["kind"] == "tdepth"].copy()

tcount_data["number"] = tcount_data["number"].apply(lambda x: other_numbering[x])
tdepth_data["number"] = tdepth_data["number"].apply(lambda x: other_numbering_tdepth[x])

Plotting it in a nice way

In [None]:
mosca = tcount_data[tcount_data["origin"] == "mosca"].copy()
synthetiq = tcount_data[tcount_data["origin"] == "ours"].copy()

# order them by number
mosca.sort_values(["number"], inplace=True)
synthetiq.sort_values(["number"], inplace=True)

# reorder them further such that mosca tcount goes from 0 to max to nan
mosca.sort_values(["tcount"], inplace=True)
# now make synthetiq get the same order
synthetiq["number"] = synthetiq["number"].apply(lambda x: mosca[mosca["number"] == x]["number"].values[0])
synthetiq.sort_values(["number"], inplace=True)

fig, ax = plt.subplots(figsize=(14, 3), width_ratios=[12, 2], ncols=2)
if any(synthetiq['time'] == 0):
    warnings.warn(f"There are {np.count_nonzero(synthetiq['time'] == 0)} permutations for which no optimal implementation was found within the time limit. This is caused due to the variation between runs. Running the command again should fix this.")
speedups = np.array(mosca["time"]) / np.array(synthetiq["time"])
# plt.scatter([1 for i in range(len(speedups))], speedups, label='Synthetiq', color='blue')
current_palette = sns.color_palette("Dark2", 2)
sns.boxplot([speedup for speedup in speedups if speedup > 0], width=0.8, color=current_palette[0], ax=ax[1])
ax[1].set_yscale('log')
ax[1].set_xticklabels([])
ax[1].set_xticks([])
ax[1].set_title("Speedup")
# make the lines surrounding the plot disappear
sns.despine(left=True, bottom=True)
#make the background grey
ax[1].set_facecolor((0.95, 0.95, 0.95))
# make the title to the left
ax[1].title.set(x=0.05)
ax[1].set_title("Speedup")

mosca_counts = [count if count is not None else 0 for count in mosca["tcount"]]
synthetiq_counts = list(synthetiq["tcount"])

data = pd.DataFrame({'Source': ['Synthetiq' for i in range(len(synthetiq_counts))] + ['Mosca et al.' for i in range(len(mosca_counts))],
                    'count': synthetiq_counts + mosca_counts, 
                    'label': [i for i in range(len(synthetiq_counts))] + [i for i in range(len(mosca_counts))]}
)

ax[0].set_title("T-Count")
ax[0].title.set(x=0)

ax[0].set_facecolor((0.95, 0.95, 0.95))

sns.set_palette(current_palette)
sns.barplot(data=data, y="count", x="label", hue="Source", edgecolor=(0.95, 0.95, 0.95), linewidth=0, ax=ax[0])

ax[0].set_xticks([])
ax[0].set_ylabel("")
ax[0].set_xlabel("")
ax[0].set_xlim(-0.5, 29.5)
ax[0].xaxis.labelpad = 20
plt.tight_layout()
for i in range(22, 30):
    ax[0].text(i, 7, "T/O", rotation=90, fontsize=10, color=current_palette[1], weight="bold")

ax[0].text(-0.3, 0.2, "0", rotation=0, fontsize=10, color=current_palette[0], weight="bold")  

# set title font size
ax[0].title.set_fontsize(16)
ax[0].tick_params(axis='y', labelsize=16)
ax[0].tick_params(axis='x', labelsize=16)
# set the legend font size
ax[0].legend(fontsize=16)
ax[1].title.set_fontsize(16)

# set xticks font size
ax[1].tick_params(axis='y', labelsize=16)

fig.tight_layout()

ax[1].set_ylim(10 ** -1.2, 1.5 * 10 ** 5)

fig.savefig('../../data/paper/fig_4a.pdf', bbox_inches='tight')

synthetiq = tdepth_data[tdepth_data["origin"] == "ours"].copy()
gheorgiou = tdepth_data[tdepth_data["origin"] == "gheorgiou"].copy()

# order them by number
gheorgiou.sort_values(["number"], inplace=True)
synthetiq.sort_values(["number"], inplace=True)

if any(synthetiq['time'] == 0):
    warnings.warn(f"There are {np.count_nonzero(synthetiq['time'] == 0)} permutations for which no optimal implementation was found within the time limit. This is caused due to the variation between runs. Running the command again should fix this.")

speedups = np.array(gheorgiou["time"]) / np.array(synthetiq["time"])

gheorgiou["ours_tdepth"] = synthetiq["tdepth"].tolist()
gheorgiou.sort_values(["ours_tdepth", "tdepth"], inplace=True)

fig, ax = plt.subplots(figsize=(14, 3), ncols=2, width_ratios=[12, 2])
# plt.scatter([1 for i in range(len(speedups))], speedups, label='Synthetiq', color='blue')
current_palette = sns.color_palette("Dark2", 2)
sns.boxplot([speedup for speedup in speedups if speedup > 0], width=0.8, color=current_palette[0], ax=ax[1])
ax[1].set_yscale('log')
ax[1].set_xticklabels([])
ax[1].set_xticks([])
ax[1].set_title("Speedup")
# make the lines surrounding the plot disappear
sns.despine(left=True, bottom=True)
#make the background grey
ax[1].set_facecolor((0.95, 0.95, 0.95))
# make the title to the left
ax[1].title.set(x=0.05)

ax[0].set_facecolor((0.95, 0.95, 0.95))
ax[0].title.set(x=0)
ax[0].set_title("T-Depth")
sns.despine(left=True, bottom=True)

gheorgiou_counts = [count if count is not None else 0 for count in gheorgiou["tdepth"]]
synthetiq_counts = list(gheorgiou["ours_tdepth"])

data = pd.DataFrame({'Source': ['Synthetiq' for i in range(len(synthetiq_counts))] + ['Gheorgiou et al.' for i in range(len(gheorgiou_counts))],
                    'count': synthetiq_counts + gheorgiou_counts, 
                    'label': [i for i in range(len(synthetiq_counts))] + [i for i in range(len(gheorgiou_counts))]}
)

sns.set_palette(current_palette)
sns.barplot(data=data, y="count", x="label", hue="Source", edgecolor=(0.95, 0.95, 0.95), linewidth=0, ax=ax[0])

ax[0].set_xticks([])
ax[0].set_ylabel("")
ax[0].set_xlabel("")
ax[0].set_xlim(-0.5, 29.5)
ax[0].xaxis.labelpad = 20
plt.tight_layout()
for i in range(23, 30):
    ax[0].text(i, 3.3, "T/O", rotation=90, fontsize=10, color=current_palette[1], weight="bold")
    
for i in range(17, 22):
    ax[0].text(i, 1.3, "T/O", rotation=90, fontsize=10, color=current_palette[1], weight="bold")
    
# ax[0].text(7, 1.3, "T/O", rotation=90, fontsize=10, color=current_palette[1], weight="bold")

ax[0].text(0.1, 0.2, "T/O", rotation=90, fontsize=10, color=current_palette[1], weight="bold")
ax[0].text(-0.2, 0.3, "0", rotation=0, fontsize=10, color=current_palette[0], weight="bold")


# set title font size
ax[0].title.set_fontsize(16)
ax[0].tick_params(axis='y', labelsize=16)
ax[0].tick_params(axis='x', labelsize=16)
# set the legend font size
ax[0].legend(fontsize=14)
ax[1].title.set_fontsize(16)

# set xticks font size
ax[1].tick_params(axis='y', labelsize=16)
ax[1].set_ylim(10 ** -1.2, 1.5 * 10 ** 5)

fig.tight_layout()

fig.savefig('../../data/paper/fig_4b.pdf', bbox_inches='tight')