# Reproducing MCCD's Experiments

In [1]:
import json
from functools import partial
from io import StringIO
from operator import itemgetter

from surface_sim.setups.setup import SetupDict

from mccd.random_clifford_circuit import *
from surface_sim.setups import CircuitNoiseSetup
from surface_sim.models import CircuitNoiseModel, BiasedCircuitNoiseModel
from surface_sim import Detectors, Setup
from surface_sim.experiments import schedule_from_circuit, experiment_from_schedule
import time
import stim

from pathlib import Path
import stim
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
from joblib import Parallel, delayed
import itertools
import shelve
from surface_sim.layouts import rot_surface_codes

from pymatching import Matching as MWPM
from mle_decoder import MLEDecoder as MLE
from stimbposd import BPOSD
from sklearn.metrics import accuracy_score

import os

os.environ['GRB_LICENSE_FILE'] = '/Users/fengcong/.gurobi/gurobi.lic'

In [2]:
!gurobi_cl --license

Set parameter Username
Set parameter LicenseID to value 2739212
Set parameter LogFile to value "gurobi.log"
Using license file /Users/fengcong/.gurobi/gurobi.lic
Academic license - for non-commercial use only - expires 2026-11-15


## Baseline Decoders

- MWPM. We use the open-source library PyMatching with the noise model used for data generation as detailed in the ‘Experimentally motivated noise model’ subsection.

- BP-OSD. We use the open-source library stimbposd. We use the exact noise model used for data generation and set the maximal belief propagation iterations to 20.

- MLE. We use the algorithm developed and implemented as in ref. 14.

### Notes

All baselines have PyPI packages.

```
pymatching
mle-decoder
stimbposd
```

In [3]:
DECODER_BASELINES = {
    'BPOSD': partial(BPOSD, max_bp_iters=20),
    'MLE': MLE, # TODO: model too large. acacdemic license.
    'MWPM': MWPM,
}

## Basic Gates & Surface Code

MCCD uses I, X, Y, Z, H (single qubit gates) and CX (two qubit gates).

MCCD uses Rotated Surface Code.

surface-sim supports I, X, Z for rotated gates and I, H, X, Z for unrotated gates.

### Notes

We use the gates which `surface-sim` supports.

In [4]:
from surface_sim.circuit_blocks.rot_surface_code_css import gate_to_iterator
print('Rotated', gate_to_iterator.keys())
from surface_sim.circuit_blocks.unrot_surface_code_css import gate_to_iterator
print('Unrotated', gate_to_iterator.keys())
ROT_GATES = list('IXZ')
UNR_GATES = list('IHXZ')
MCCD_GATES = ['I', 'X', 'Y', 'Z', 'H']

Rotated dict_keys(['TICK', 'I', 'S', 'X', 'Z', 'CX', 'CNOT', 'R', 'RZ', 'RX', 'M', 'MZ', 'MX'])
Unrotated dict_keys(['TICK', 'I', 'S', 'H', 'X', 'Z', 'CX', 'CNOT', 'R', 'RZ', 'RX', 'M', 'MZ', 'MX'])


In [5]:
from gensim.utils import grouper
from stim import CircuitInstruction, Circuit
from surface_sim.setups import SQ_GATES, TQ_GATES, SQ_MEASUREMENTS, SQ_RESETS
from typing import override, Collection
from surface_sim import Model


class ExperimentalCircuitNoiseModel(Model):
    @override
    def __getattribute__(self, name: str) -> object:
        attr = super().__getattribute__(name)

        if not callable(attr):
            return attr

        if name in SQ_GATES:

            def sq_gate(qubits: Collection[str]) -> Circuit:
                inds = self.get_inds(qubits)
                circ = Circuit()
                circ.append(CircuitInstruction(SQ_GATES[name], inds))
                circ.append(CircuitInstruction("PAULI_CHANNEL_1", inds, [0.0001, 0.0001, 0.0001]))
                return circ

            return sq_gate

        elif name in TQ_GATES:

            def tq_gate(qubits: Collection[str]) -> Circuit:
                if len(qubits) % 2 != 0:
                    raise ValueError("Expected and even number of qubits.")

                inds = self.get_inds(qubits)
                circ = Circuit()
                circ.append(CircuitInstruction(TQ_GATES[name], inds))
                probs = [0.0005, 0.00175, 0.000625, 0.0005, 0, 0, 0, 0.00175, 0, 0, 0, 0.000625, 0, 0, 0.00125]
                circ.append(CircuitInstruction("PAULI_CHANNEL_2", inds, probs))
                return circ

            return tq_gate

        elif name in SQ_MEASUREMENTS:

            def sq_meas(qubits: Collection[str]) -> Circuit:
                inds = self.get_inds(qubits)
                noise_name = "X_ERROR" if "_x" not in name else "Z_ERROR"
                circ = Circuit()

                # separates X_ERROR and MZ lines for clearer stim.Circuits and diagrams
                prob: float = 0.002
                circ.append(CircuitInstruction(noise_name, inds, [prob]))
                for qubit in qubits:
                    self.add_meas(qubit)

                return circ

            return sq_meas

        elif name in SQ_RESETS:

            def sq_reset(qubits: Collection[str]) -> Circuit:
                inds = self.get_inds(qubits)
                noise_name = "X_ERROR" if "_x" not in name else "Z_ERROR"
                circ = Circuit()
                circ.append(CircuitInstruction(SQ_RESETS[name], inds))
                prob: float = 0.002
                circ.append(CircuitInstruction(noise_name, inds, [prob]))
                return circ

            return sq_reset

        return attr

    @override
    def idle(self, qubits: Collection[str]) -> Circuit:
        inds = self.get_inds(qubits)
        circ = Circuit()

        circ.append(CircuitInstruction("I", inds))
        circ += self.idle_noise(qubits)

        return circ

    @override
    def idle_noise(
        self, qubits: Collection[str], param_name: str = "idle_error_prob"
    ) -> Circuit:
        # inds = self.get_inds(qubits)
        circ = Circuit()
        # TODO
        return circ

    @override
    def incoming_noise(self, qubits: Collection[str]) -> Circuit:
        return Circuit()


In [6]:
def to_stim_circuit(mccd_circuit):
    """Convert a MCCD circuit to stimuli circuit
    Args:
        mccd_circuit: MCCD circuit

    Returns:
        A stimuli circuit
    """
    # 还是需要自己插入tick，否则只会有final round syndrome。
    # 然后num-detectors也不随着depth变化。这里插入一个tick会对应phy-cir里面的一块detector指令。

    res = stim.Circuit()
    # Must have R and M. Error inactive layout.
    for n in range(mccd_circuit.n_logical_qubits):
        res.append('R', [n])
    res.append('TICK') # After Reset all.

    current = 0
    for name, timestep, qubits in mccd_circuit:
        res.append(name, qubits)
        if timestep != current:
            current = timestep
            res.append('TICK')

    if res[-1].name != 'TICK':
        res.append('TICK') # Before Measure all.
    for n in range(mccd_circuit.n_logical_qubits):
        res.append('M', [n])

    return res

def print_random_circuit(c: RandomCliffordCircuit):
    return list(c)

def dict_product(input_dict):
    keys = input_dict.keys()
    value_lists = input_dict.values()

    # 使用itertools.product生成所有值的组合
    value_combinations = itertools.product(*value_lists)

    # 将每个值的组合与键配对，生成字典列表
    for combo in value_combinations:
        yield dict(zip(keys, combo))

def run_decoder(name: str, circuit: stim.Circuit, shots: int):
    """Runs decoder on the given circuit

    Args:
        name: decoder name
        circuit: circuit to run
        shots: number of shots

    Returns:
        A dict containing the decoder metrics.
    """
    method = DECODER_BASELINES[name](circuit.detector_error_model())
    sampler = circuit.compile_detector_sampler()
    syndrome, labels = sampler.sample(shots=shots, separate_observables=True)
    begin = time.time_ns()
    predictions = method.decode_batch(syndrome)
    end = time.time_ns()
    logical_accuracy = accuracy_score(labels, predictions)
    walltime_seconds = (end - begin) / 1e9
    return dict(
        decoder=name,
        logical_accuracy=logical_accuracy,
        walltime_seconds=walltime_seconds,
    )

def experimental_noise_model(*layouts):
    return ExperimentalCircuitNoiseModel.from_layouts(CircuitNoiseSetup(), *layouts)

def average_depolarizing_noise(*layouts, noise_prob=1e-3):
    setup = CircuitNoiseSetup()
    setup.set_var_param("prob", noise_prob)
    model = CircuitNoiseModel.from_layouts(setup, *layouts)
    return model

def double_depolarizing_noise(*layouts, noise_prob=1e-3):
    return average_depolarizing_noise(noise_prob * 2, *layouts)

def compile_to_physical(log_cir: stim.Circuit, distance: int, noise_model,
                        rotated=True) -> stim.Circuit:
    """Compile a logical circuit to physical one with the average depolarizing noise model.

    Args:
        log_cir: logical circuit
        distance: distance between qubits
        rotated: rotated code or unrotated code.
        noise_prob: average depolarizing noise probability

    Returns:
        The compiled logical circuit.
    """
    if rotated:
        from surface_sim.circuit_blocks.rot_surface_code_css import gate_to_iterator
        from surface_sim.layouts import rot_surface_codes
        layouts = rot_surface_codes(log_cir.num_qubits, distance=distance)
    else:
        from surface_sim.circuit_blocks.unrot_surface_code_css import gate_to_iterator
        from surface_sim.layouts import unrot_surface_codes
        layouts = unrot_surface_codes(log_cir.num_qubits, distance=distance)

    model = noise_model(*layouts)
    detectors = Detectors.from_layouts("pre-gate", *layouts)
    schedule = schedule_from_circuit(log_cir, layouts, gate_to_iterator)
    phy_cir: stim.Circuit = experiment_from_schedule(
        schedule, model, detectors, anc_reset=True
    )
    return phy_cir


def random_mirror_symmetric_clifford(circuit_index: str, depth: int,
                                     rotated=True) -> stim.Circuit:
    """Generate a random mirror symmetric clifford circuit (Type I or Type II)

    Args:
        circuit_index: circuit index
        depth: circuit depth before mirroring.
        rotated: rotated code or unrotated code.

    Returns:
        The generated clifford circuit
    """
    assert circuit_index in ['3', '4']
    logical_class = TypeICircuit if circuit_index == '3' else TypeIICircuit
    n_logical_qubits = 2 if circuit_index == '4' else 1

    random_circuit = logical_class(
        n_logical_qubits=n_logical_qubits,
        circuit_index=circuit_index,
        depth=depth,
        single_qubit_gate_list=ROT_GATES if rotated else ROT_GATES,
    )
    random_circuit.sample_circuit()
    return to_stim_circuit(random_circuit)


def generate_circuit(distance: int, depth: int, circuit_index: str, noise_model):
    log_cir = random_mirror_symmetric_clifford(circuit_index, depth)
    phy_cir: stim.Circuit = compile_to_physical(log_cir, distance, noise_model)
    num_detectors = phy_cir.num_detectors
    config = dict(distance=distance, depth=depth, circuit_type_index=circuit_index,
                  num_detectors=num_detectors, noise_model=noise_model.__name__)
    return phy_cir, log_cir, config


## Circuit Depths

From released source_data.zip, Type I circuits have depths:

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18])

Type II circuits have depths:

array([ 4,  8, 12, 16, 20, 24, 28, 32, 36])


## Noise Model

1. The experimentally motivated noise model, which is the one used in all numerical results in the main text. See Methods section for details.
2. A simplified noise model that replaces all Pauli noise channels, both single-qubit and two-qubit, in the experiment-motivated noise model with a depolarizing noise model using an average noise strength. The probabilities of idling error, reset error, and measurement error remain the same. We refer to this as the average depolarizing noise model.
3. A stronger depolarizing noise model in which the strengths of all noise channels, including single- and two-qubit depolarization, idling error, reset error, and measurement error probability, are doubled compared to the average depolarizing noise model. We refer to this as the double depolarizing noise model.

### Note

1 can only be implemented with `stim`'s builtin instructions like `PAULI_CHANNEL_2` to fill all the parameters.
Or as a custom `Model` of `surface-sim`.

This noise model is used in Results (Fig. 4 & Fig. 5).

2 & 3 can be implemented with `surface-sim`'s `CircuitNoiseModel` with a single `prob` parameter.

The paper does not release the value of the average noise probability. We set it to 1e-3.

### Details of experimentally motivated noise model.

For the numerical studies presented in the ‘Results’ section, we use the stim package for simulation. We consider a circuit-level noise model motivated by the current experimental capability of neutral atom array-based quantum computers. Specifically, we use a circuit-level noise model that includes the following physical noises:

• Each two-qubit physical gate is followed by a two-qubit Pauli noise channel with probability [0.0005, 0.00175, 0.000625, 0.0005, 0, 0, 0, 0.00175, 0, 0, 0, 0.000625, 0, 0, 0.00125]

• Each single-qubit physical gate is followed by a single-qubit depolarizing model with probability [0.0001, 0.0001, 0.0001]

• On a physical level, the atoms are moved to achieve flexible connectivity between different physical qubits. This comes at the cost of having idling error due to the extra time taken during the physical qubit movement, which is captured as a Pauli noise channel with probability [4 × 10−7, 4 × 10−7, 1.6 × 10−6]. This error channel is applied when physical qubit movement happens.

• Resetting a physical qubit has a bit flip error probability of P = 0.002.

• Measuring a physical qubit has a bit flip error probability of P = 0.002.



### Shots & Repeat
We evaluate each decoder over 20 independent runs. In each run, we randomly sample 1,000 syndrome trajectories from Type I/II circuits and average them to obtain a run-level performance estimate. We report the mean across the 20 runs, with error bars showing s.e.m.


In [7]:
num_shots = 1000
trial = 20
noise_model = average_depolarizing_noise

def get_figure4_settings():
    distance = [3, 5]
    depth = [2,  4,  6,  8, 10, 12, 14, 16, 18]
    # Type I one qubit. Type II two qubits.
    circuit_index = ['3']

    return dict_product(dict(
        distance=distance,
        depth=depth,
        circuit_index=circuit_index,
        noise_model=[noise_model],
    ))

def get_figure5_settings():
    distance = [3, 5]
    depth = [ 4,  8, 12, 16, 20, 24, 28, 32, 36]
    # Type I one qubit. Type II two qubits.
    circuit_index = ['4']

    return list(dict_product(dict(
        distance=distance,
        depth=depth,
        circuit_index=circuit_index,
        noise_model=[noise_model],
    )))

figure4_settings = get_figure4_settings()
figure5_settings = get_figure5_settings()

In [8]:
def get_syndrome_labels(circuit: stim.Circuit, shots: int, depth: int, n_logical_qubits: int):
    sampler = circuit.compile_detector_sampler()
    syndrome, labels = sampler.sample(shots=shots, separate_observables=True)
    print(syndrome.shape, labels.shape)
    syndrome = syndrome.reshape((shots, n_logical_qubits, depth * 2, -1))
    return syndrome, labels

In [9]:
def generate_bench_circuits(baseline_settings):
    """Generate the benchmark circuits
    Args:
        baseline_settings: baseline settings

    Returns:
        The generated benchmark circuits
    """

    def gen_circuit_to_str(**params):
        phy_cir, log_cir, config = generate_circuit(**params)
        return str(phy_cir), str(log_cir), config

    def tasks():
        for params in baseline_settings:
            yield delayed(gen_circuit_to_str)(**params)

    res = Parallel(n_jobs=-1, verbose=1)(tasks())
    print('done')
    return res


In [10]:
noise_model.__name__

'average_depolarizing_noise'

Generate circuit

In [11]:
# fig4_circuits = generate_bench_circuits(figure4_settings)
# fig5_circuits = generate_bench_circuits(figure5_settings)

In [12]:
# Must save the circuits for fair comparison with MCCD.
# Filename format: d3_c3_D1, distance=3, circuit_index=3, depth=1

def save_circuits(subdir, bench_circuits):
    save_dir = Path('./data/bench') / noise_model.__name__ / 'circuits' / subdir
    save_dir.mkdir(parents=True, exist_ok=True)

    for phy_cir, log_cir, config in bench_circuits:
        distance, depth, circuit_index = config['distance'], config['depth'], config['circuit_type_index']
        filename = f'd{distance}_c{circuit_index}_D{depth}'
        (save_dir / (filename + '_phy')).with_suffix('.stim').write_text(phy_cir)
        (save_dir / (filename + '_log')).with_suffix('.stim').write_text(log_cir)
        (save_dir / (filename + '_config')).with_suffix('.json').write_text(json.dumps(config, indent=4))


Save circuits.

In [13]:
# save_circuits('fig4', fig4_circuits)
# save_circuits('fig5', fig5_circuits)

Load circuits.

In [14]:
def load_circuits(root_dir: Path):
    circuits = []
    for cir_path in sorted(root_dir.glob('*_phy.stim')):
        config_path = cir_path.with_name(cir_path.stem.replace('_phy', '_config') + '.json')
        log_cir_path = cir_path.with_name(cir_path.stem.replace('_phy', '_log') + '.stim')

        phy_cir = stim.Circuit.from_file(cir_path)
        log_cir = stim.Circuit.from_file(log_cir_path)
        config = json.loads(config_path.read_text())

        circuits.append((str(phy_cir), str(log_cir), config))
    return circuits

In [15]:
fig4_circuits = load_circuits(Path('./data/bench') / noise_model.__name__ / 'circuits' / 'fig4')
fig5_circuits = load_circuits(Path('./data/bench') / noise_model.__name__ / 'circuits' / 'fig5')

In [16]:
def run_decoder_tasks(bench_circuits, bench_decoders, df_name):
    """Run all the baseline decoders on the benchmark circuit.

    Args:
        bench_circuits: Benchmark circuits.
        df_name: Name of the dataframe file.

    Returns:
        The result dataframe.
    """
    def run_decoder_plus(config, cir_str, **kwargs):
        res = config.copy()
        res.update(kwargs)

        kwargs['circuit'] = stim.Circuit.from_file(StringIO(cir_str))
        try:
            res.update(run_decoder(**kwargs))
        except Exception as e:
            print(e)
        else:
            print(res['decoder'], 'depth', res['depth'], 'distance', res['distance'])

        return res

    def tasks():
        for phy_cir, _, config in bench_circuits:
            for decoder in bench_decoders:
                for t in range(trial):
                    yield delayed(run_decoder_plus)(config, phy_cir,
                                                    name=decoder, shots=num_shots)

    bench_result = Parallel(n_jobs=-1, verbose=1)(tasks())

    df = pd.DataFrame.from_records(bench_result)
    df = pd.melt(df, id_vars=['decoder', 'distance', 'depth', 'circuit_type_index'],
             value_vars=['walltime_seconds', 'logical_accuracy'],
             var_name='metric',
             value_name='value')

    filename = Path(f'./data/bench/') / noise_model.__name__ / f'{df_name}.csv'
    filename.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(filename, index=False)
    print('done')
    return df


In [None]:
df4 = run_decoder_tasks(fig4_circuits, DECODER_BASELINES.keys(), 'fig4')


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
Set parameter Username
Set parameter LicenseID to value 2739212
Academic license - for non-commercial use only - expires 2026-11-15
BPOSD depth 10 distance 3
BPOSD depth 10 distance 3
Set parameter Username
Set parameter LicenseID to value 2739212
Academic license - for non-commercial use only - expires 2026-11-15
Set parameter Username
Set parameter LicenseID to value 2739212
Academic license - for non-commercial use only - expires 2026-11-15
BPOSD depth 10 distance 3
Set parameter Username
Set parameter LicenseID to value 2739212
Academic license - for non-commercial use only - expires 2026-11-15
BPOSD depth 10 distance 3
Set parameter Username
Set parameter LicenseID to value 2739212
Academic license -

[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:  1.2min


MLE depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MLE depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MWPM depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
MLE depth 10 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3
BPOSD depth 12 distance 3


[Parallel(n_jobs=-1)]: Done 180 tasks      | elapsed:  9.7min


BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3
BPOSD depth 16 distance 3


In [None]:
df5 = run_decoder_tasks(fig5_circuits, DECODER_BASELINES.keys(), 'fig5')


## Visualization

In [None]:
hue_order = 'MLE BPOSD MWPM'.split()

### Results on Type I Circuits

In [None]:
df4 = pd.read_csv(f'./data/bench/{noise_model.__name__}/fig4.csv')

In [None]:
plt.figure(figsize=(12,5))
ax=plt.subplot(121)
sns.lineplot(df4[(df4.metric == 'logical_accuracy') & (df4.distance == 3)], x='depth', y='value', hue='decoder',hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('logical_accuracy')
plt.title('d = 3, Type I circuits, Logical Accuracy')

ax=plt.subplot(122)
sns.lineplot(df4[(df4.metric == 'logical_accuracy') & (df4.distance == 5)], x='depth', y='value', hue='decoder',hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('logical_accuracy')
plt.title('d = 5, Type I circuits, Logical Accuracy')
plt.suptitle(f'Noise Model: {noise_model.__name__}')


Compared with Fig. 4, the range of decrease is much smaller for Type I Circuits.
However, our decrease ranges on Type I Circuits are still smaller than on Type II Circuits, which is similar to Fig. 4 and Fig. 5.

The performances of different decoders are also closer than Fig. 4.

The relative order is not well-preserved because they are nearly the same for each depth.

In [None]:
plt.figure(figsize=(12,5))
ax=plt.subplot(121)

sns.lineplot(df4[(df4.metric == 'walltime_seconds') & (df4.distance == 3)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('Wall time (s)')
plt.yscale('log')
plt.title('d = 3, Type I circuits, Wall time')

ax=plt.subplot(122)
sns.lineplot(df4[(df4.metric == 'walltime_seconds') & (df4.distance == 5)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('Wall time (s)')
plt.yscale('log')
plt.title('d = 5, Type I circuits, Wall time')
plt.suptitle(f'Noise Model: {noise_model.__name__}')


Compared with Fig. 4, our wall time in seconds are faster for Type I but slower for Type II.

The trend of longer time for deeper circuits are less clear than Fig. 4.

The relative order of baseline methods is preserved: MLE > BPOSD > MWPM.

### Results on Type II Circuits

In [None]:
df5 = pd.read_csv(f'./data/bench/{noise_model.__name__}/fig5.csv')


In [None]:
plt.figure(figsize=(12,5))
ax=plt.subplot(121)
sns.lineplot(df5[(df5.metric == 'logical_accuracy') & (df5.distance == 3)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('logical_accuracy')

plt.title('d = 3, Type II circuits, Logical Accuracy')
ax=plt.subplot(122)
sns.lineplot(df5[(df5.metric == 'logical_accuracy') & (df5.distance == 5)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('logical_accuracy')

plt.title('d = 5, Type II circuits, Logical Accuracy')
plt.suptitle(f'Noise Model: {noise_model.__name__}')

Compared to Fig. 5, the relative order of decoders is preserved: MLE >= BPOSD >= MWPM.

The range of logical accuracy as the depth increases is also very close to that of Fig. 5.

The lower bounds of logical accuracy in Fig. 5 are about 10% better than ours for both d = 3 and d = 5.

The upper bounds of both ours and Fig. 5 are nearly the same: close to 100%.


In [None]:
plt.figure(figsize=(12,5))
ax=plt.subplot(121)
sns.lineplot(df5[(df5.metric == 'walltime_seconds') & (df5.distance == 3)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('Wall time (s)')
plt.yscale('log')
# plt.ylim(1e-3, 10)
plt.title('d = 3, Type II circuits, Wall time')
ax=plt.subplot(122)
sns.lineplot(df5[(df5.metric == 'walltime_seconds') & (df5.distance == 5)], x='depth', y='value', hue='decoder', hue_order=hue_order,
             errorbar=None, style='decoder', markers=True, dashes=False, markersize=10, linewidth=1, ax=ax)
plt.ylabel('Wall time (s)')
plt.yscale('log')
# plt.ylim(1e-3, 10)
plt.title('d = 5, Type II circuits, Wall time')
plt.suptitle(f'Noise Model: {noise_model.__name__}')

Again, the relative order is preserved: MLE > BPOSD > MWPM.

The trend of increasing wall time in seconds is also nearly the same.

The absolute ranges of wall time for each decoder are quite different.

## Conclusion

### Pros
1. Overall trends are reproduced.
    - Increasing wall time as the depth increases.
    - Decreasing logical accuracy as the depth increases.
2. Overall relative orders of different decoders are reproduced.
    - For logical accuracy, MLE > BPOSD > MWPM.
    - For wall time, MLE > BPOSD > MWPM.

### Cons
1. Absolute value ranges are not reproduced.
2. MCCD is not reproduced.
3. The experimental noise model is not reproduced.