In [None]:
import subprocess
from pathlib import Path
import pandas as pd
import csv
import itertools
import re
import sys

CWD = Path(r"")

# 파라미터 설정
N_list: list[int] = []
L_list: list[int] = []
R_list: list[int] = []
D_list: list[int] = []

# 실행할 목록록
METHODS: list[str] = ["linear", "cfmindex", "2fmindex"]

EXE = {
    "ref"      : "reference_create.exe",
    "reads"    : "read_create.exe",
    "linear"   : "linear_assemble.exe",
    "fmindex"  : "fmindex_assemble.exe",
    "cfmindex" : "cfmindex_assemble.exe",
    "2fmindex" : "2fmindex_assemble.exe",
}

TIMING_FILE = { m : f"{m}_timing.txt"     for m in EXE if m not in ("ref","reads") }
ASSEMBLED_FILE = { m : f"{m}_assembled.txt"  for m in EXE if m not in ("ref","reads") }

REF_MUT = "reference_mutated.txt"
REF_ORI = "reference.txt"
READS   = "reads.txt"

In [None]:
def run_exe(name: str, *inputs: int) -> None:
    exe_path = CWD / EXE[name]
    payload  = "\n".join(map(str, inputs)) + "\n"
    proc = subprocess.run(
        [str(exe_path)],
        input=payload,
        text=True,
        cwd=CWD,
        capture_output=True,
    )
    if proc.returncode:
        sys.exit(f"[{exe_path.name}] failed\nstdout:\n{proc.stdout}\nstderr:\n{proc.stderr}")

def parse_total_ms(timing_path: Path) -> int:
    with timing_path.open() as f:
        for line in f:
            if "Total pipeline time" in line:
                return int(re.search(r"(\d+)", line).group(1))
    raise RuntimeError(f"Total time not found in {timing_path}")

def accuracy(mut_path: Path, asm_path: Path) -> float:
    mut = mut_path.read_text().strip()
    asm = asm_path.read_text().strip()
    if len(mut) != len(asm):
        raise ValueError("Length mismatch for accuracy check")
    matches = sum(m == a for m, a in zip(mut, asm))
    return round(100 * matches / len(mut), 2)

fieldnames = (
    ["N", "L", "R", "D"] +
    [f"{m}_time" for m in METHODS] +
    [f"{m}_acc"  for m in METHODS]
)

rows = []

for N, L, R, D in itertools.product(N_list, L_list, R_list, D_list):

    try:
        # 생성 단계
        run_exe("ref", N)
        run_exe("reads", L, R, D)

        times = {}
        accs  = {}

        for meth in METHODS:
            try:
                run_exe(meth, D)
                tfile = CWD / TIMING_FILE[meth]
                afile = CWD / ASSEMBLED_FILE[meth]

                times[meth] = parse_total_ms(tfile)
                accs[meth]  = accuracy(CWD / REF_MUT, afile)
            except Exception as e:
                print(f"  ⚠ {meth} 실패 (무시하고 계속 진행): {e}")
                times[meth] = None
                accs[meth]  = None

        row = {"N": N, "L": L, "R": R, "D": D}
        row.update({f"{m}_time": times.get(m) for m in METHODS})
        row.update({f"{m}_acc" : accs.get(m)  for m in METHODS})
        rows.append(row)

    except Exception as e:
        print(f"전체 조합 실패: N={N}, L={L}, R={R}, D={D} → {e}")
        continue

csv_path = CWD / "benchmark_results.csv"
with csv_path.open("w", newline="") as cf:
    writer = csv.DictWriter(cf, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

txt_to_remove = [
    CWD / REF_MUT,
    CWD / REF_ORI,
    CWD / READS,
    *[CWD / TIMING_FILE[m]   for m in METHODS],
    *[CWD / ASSEMBLED_FILE[m] for m in METHODS],
]

for p in txt_to_remove:
    try:
        p.unlink()
    except FileNotFoundError:
        pass

df = pd.read_csv(csv_path)
df

In [None]:
def run_exe(name: str, *inputs: int) -> None:
    exe_path = CWD / EXE[name]
    payload  = "\n".join(map(str, inputs)) + "\n"
    proc = subprocess.run(
        [str(exe_path)],
        input=payload,
        text=True,
        cwd=CWD,
        capture_output=True,
    )
    if proc.returncode:
        sys.exit(f"[{exe_path.name}] failed\nstdout:\n{proc.stdout}\nstderr:\n{proc.stderr}")

def parse_total_ms(timing_path: Path) -> int:
    with timing_path.open() as f:
        for line in f:
            if "Total pipeline time" in line:
                return int(re.search(r"(\d+)", line).group(1))
    raise RuntimeError(f"Total time not found in {timing_path}")

def accuracy(mut_path: Path, asm_path: Path) -> float:
    mut = mut_path.read_text().strip()
    asm = asm_path.read_text().strip()
    if len(mut) != len(asm):
        raise ValueError("Length mismatch for accuracy check")
    matches = sum(m == a for m, a in zip(mut, asm))
    return round(100 * matches / len(mut), 2)

fieldnames = (
    ["N", "L", "D"] +
    [f"{m}_time" for m in METHODS] +
    [f"{m}_acc"  for m in METHODS]
)

rows = []

for N, L, D in itertools.product(N_list, L_list, D_list):
    R = int(18750 // (N / L))

    try:
        # 생성 단계
        run_exe("ref", N)
        run_exe("reads", L, R, D)

        times = {}
        accs  = {}

        for meth in METHODS:
            try:
                run_exe(meth, D)
                tfile = CWD / TIMING_FILE[meth]
                afile = CWD / ASSEMBLED_FILE[meth]

                times[meth] = parse_total_ms(tfile)
                accs[meth]  = accuracy(CWD / REF_MUT, afile)
            except Exception as e:
                print(f"  ⚠ {meth} 실패 (무시하고 계속 진행): {e}")
                times[meth] = None
                accs[meth]  = None

        row = {"N": N, "L": L, "D": D}
        row.update({f"{m}_time": times.get(m) for m in METHODS})
        row.update({f"{m}_acc" : accs.get(m)  for m in METHODS})
        rows.append(row)

    except Exception as e:
        print(f"전체 조합 실패: N={N}, L={L}, D={D} → {e}")
        continue

csv_path = CWD / "benchmark_results.csv"
with csv_path.open("w", newline="") as cf:
    writer = csv.DictWriter(cf, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(rows)

txt_to_remove = [
    CWD / REF_MUT,
    CWD / REF_ORI,
    CWD / READS,
    *[CWD / TIMING_FILE[m]   for m in METHODS],
    *[CWD / ASSEMBLED_FILE[m] for m in METHODS],
]

for p in txt_to_remove:
    try:
        p.unlink()
    except FileNotFoundError:
        pass

df = pd.read_csv(csv_path)
df