In [3]:
# reformat_experiments.py -- paste into a notebook cell and run
import re
import csv
from pathlib import Path
from datetime import datetime

# --- CONFIG ---
# Point to your text file with the copied experiment output, or set `input_text` to a multiline string.
input_path = Path("output.txt")  # create this from your copied output, or leave missing and use input_text
input_text = None  # if you prefer to paste the content directly below as a multiline string, set it here.

out_csv = Path("experiment_parsed.csv")

# --- HELPER PARSING ---
# Primary regex to match lines like:
# Experiment 1/26240: graphstate_nativegates_ibm_opt0_4q (4q), global, noise=0.01, comm=1.0x, cat, parts=2
LINE_RE = re.compile(
    r"""Experiment\s+(?P<idx>\d+)\s*/\s*(?P<total>\d+)\s*:\s*
        (?P<fname>\S+)       # filename (no whitespace)
        \s*\(\s*(?P<qbits>\d+)q\s*\)\s*,\s*
        (?P<strategy>[^,]+)\s*,\s*
        noise\s*=\s*(?P<noise>[\d.]+)\s*,\s*
        comm\s*=\s*(?P<comm>[\d.]+)x\s*,\s*
        (?P<primitive>[^,]+)\s*,\s*
        parts\s*=\s*(?P<parts>\d+)
    """,
    re.IGNORECASE | re.VERBOSE,
)

# Fallback looser regex for lines that slightly differ
LOOSE_RE = re.compile(
    r"Experiment\s*(?P<idx>\d+)/(?P<total>\d+):\s*(?P<fname>\S+).*\(?(?P<qbits>\d+)q\)?.*noise\s*=\s*(?P<noise>[\d.]+).*comm\s*=\s*(?P<comm>[\d.]+)x.*parts\s*=\s*(?P<parts>\d+)",
    re.IGNORECASE,
)

def parse_circuit_filename(fname: str):
    """
    Try to extract family, gate_set (if present), opt_level, qubits from filename
    Example:
      qpeinexact_nativegates_ibm_opt0_18q  -> family=qpeinexact, gate_set=nativegates_ibm, opt=opt0, qubits=18
    """
    parts = fname.split("_")
    family = parts[0] if parts else ""
    gate_set = None
    opt = None
    qubits = None
    # find opt token and qubits token
    for p in parts[1:]:
        if p.startswith("opt"):
            opt = p
        if p.endswith("q") and p[:-1].isdigit():
            try:
                qubits = int(p[:-1])
            except:
                qubits = None
    # guess gate_set as middle piece if long enough
    if len(parts) >= 3:
        gate_set = "_".join(parts[1:-1]) if parts[-1].endswith("q") else "_".join(parts[1:])
    elif len(parts) == 2:
        gate_set = parts[1] if not parts[1].endswith("q") else None
    return {"family": family, "gate_set": gate_set, "opt": opt, "qubits_parsed": qubits}

# --- READ INPUT ---
if input_text is None:
    if not input_path.exists():
        raise SystemExit(f"Input file {input_path} not found. Create it with the cell output you copied, or set input_text.")
    input_text = input_path.read_text(errors="ignore")

lines = [l.strip() for l in input_text.splitlines() if l.strip()]
lines = [l for l in lines if "Error: division by zero" not in l]

if not lines:
    raise SystemExit("No non-empty lines found in input text. Paste the lines into output.txt or set input_text.")

# --- PARSE LINES ---
rows = []
for line in lines:
    m = LINE_RE.search(line)
    if not m:
        m = LOOSE_RE.search(line)
    if not m:
        # if totally unrecognized, store as raw
        rows.append({
            "idx": None, "total": None, "fname": None, "qbits": None,
            "strategy": None, "noise": None, "comm_multiplier": None, "primitive": None, "parts": None,
            "orig_line": line,
        })
        continue
    d = m.groupdict()
    try:
        idx = int(d.get("idx")) if d.get("idx") else None
    except:
        idx = None
    try:
        total = int(d.get("total")) if d.get("total") else None
    except:
        total = None
    fname = d.get("fname")
    try:
        qbits = int(d.get("qbits")) if d.get("qbits") else None
    except:
        qbits = None
    strategy = d.get("strategy").strip() if d.get("strategy") else None

    # robust numeric parsing
    try:
        noise = float(d.get("noise")) if d.get("noise") else None
    except:
        noise = None
    try:
        comm = float(d.get("comm")) if d.get("comm") else None
    except:
        comm = None

    primitive = d.get("primitive").strip() if d.get("primitive") else None
    try:
        parts = int(d.get("parts")) if d.get("parts") else None
    except:
        parts = None

    parsed_fname = parse_circuit_filename(fname) if fname else {}
    row = {
        "idx": idx,
        "total": total,
        "fname": fname,
        "family": parsed_fname.get("family"),
        "gate_set": parsed_fname.get("gate_set"),
        "opt": parsed_fname.get("opt"),
        "qbits": qbits if qbits is not None else parsed_fname.get("qubits_parsed"),
        "strategy": strategy,
        "noise": noise,
        "comm_multiplier": comm,
        "primitive": primitive,
        "parts": parts,
        "orig_line": line,
    }
    rows.append(row)

# --- WRITE CSV ---
fieldnames = [
    "idx", "total", "fname", "family", "gate_set", "opt", "qbits",
    "strategy", "noise", "comm_multiplier", "primitive", "parts", "orig_line"
]

with out_csv.open("w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for r in rows:
        # ensure we only write keys present in fieldnames (prevents ValueError)
        cleaned = {k: r.get(k, None) for k in fieldnames}
        writer.writerow(cleaned)

print(f"Wrote {len(rows)} parsed rows to {out_csv.resolve()}")
print("Sample rows:")
import pandas as pd
df = pd.read_csv(out_csv)
# display first 20 rows in notebook; otherwise print
try:
    from IPython.display import display
    display(df.head(20))
except Exception:
    print(df.head(20))


Wrote 520 parsed rows to /Users/mariagragera/Desktop/ZNE-DQC/experiment_parsed.csv
Sample rows:


Unnamed: 0,idx,total,fname,family,gate_set,opt,qbits,strategy,noise,comm_multiplier,primitive,parts,orig_line
0,1,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.0,cat,2,Experiment 1/26240: graphstate_nativegates_ibm...
1,2,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.0,cat,4,Experiment 2/26240: graphstate_nativegates_ibm...
2,3,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.0,teleportation,2,Experiment 3/26240: graphstate_nativegates_ibm...
3,4,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.0,teleportation,4,Experiment 4/26240: graphstate_nativegates_ibm...
4,5,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.05,cat,2,Experiment 5/26240: graphstate_nativegates_ibm...
5,6,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.05,cat,4,Experiment 6/26240: graphstate_nativegates_ibm...
6,7,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.05,teleportation,2,Experiment 7/26240: graphstate_nativegates_ibm...
7,8,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.05,teleportation,4,Experiment 8/26240: graphstate_nativegates_ibm...
8,9,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.1,cat,2,Experiment 9/26240: graphstate_nativegates_ibm...
9,10,26240,graphstate_nativegates_ibm_opt0_4q,graphstate,nativegates_ibm_opt0,opt0,4,global,0.01,1.1,cat,4,Experiment 10/26240: graphstate_nativegates_ib...
