In [3]:
import os
import re
from openpyxl import Workbook

LOG_DIR = "logs"
OUT_FILE = "benchmark_results.xlsx"

fname_pattern = re.compile(r"benchmark_bs(\d+)_nc(\d+)\.log")

line_pattern = re.compile(
    r"KV Caches:\s*(\d+),\s*Candidate Embeddings:\s*(\d+),\s*Total time\(ms\):\s*([0-9.]+)"
)

wb = Workbook()
ws = wb.active
ws.title = "results"

headers = [
    "file",
    "batch_size",
    "num_candidate",
    "kv_caches",
    "candidate_embeddings",
    "total_time_ms",
]
ws.append(headers)

count = 0

for fname in os.listdir(LOG_DIR):
    m = fname_pattern.match(fname)
    if not m:
        continue

    batch_size = int(m.group(1))
    num_candidate = int(m.group(2))
    path = os.path.join(LOG_DIR, fname)

    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            m2 = line_pattern.search(line)
            if m2:
                kv_caches = int(m2.group(1))
                cand_emb = int(m2.group(2))
                total_time = float(m2.group(3))

                ws.append([
                    fname,
                    batch_size,
                    num_candidate,
                    kv_caches,
                    cand_emb,
                    total_time,
                ])
                count += 1

wb.save(OUT_FILE)

print(f"Done. Parsed {count} records.")
print(f"Saved to: {OUT_FILE}")

Done. Parsed 1024 records.
Saved to: benchmark_results.xlsx


In [4]:
import pandas as pd

IN_FILE = "benchmark_results.xlsx"
for batch_size in [1, 2, 4, 8]:
    OUT_FILE = f"benchmark_results_bs{batch_size}.xlsx"

    df = pd.read_excel(IN_FILE)

    df_bs1 = df[df["batch_size"] == batch_size]

    df_bs1.to_excel(OUT_FILE, index=False)

    print(f"Done. {len(df_bs1)} rows saved to {OUT_FILE}")

Done. 256 rows saved to benchmark_results_bs1.xlsx
Done. 256 rows saved to benchmark_results_bs2.xlsx
Done. 256 rows saved to benchmark_results_bs4.xlsx
Done. 256 rows saved to benchmark_results_bs8.xlsx


In [6]:
IN_FILE = "benchmark_results.xlsx"
for num_candidate in [128, 256, 512, 1024]:
    OUT_FILE = f"benchmark_results_nc{num_candidate}.xlsx"

    df = pd.read_excel(IN_FILE)

    df_bs1 = df[df["num_candidate"] == num_candidate]

    df_bs1.to_excel(OUT_FILE, index=False)

    print(f"Done. {len(df_bs1)} rows saved to {OUT_FILE}")

Done. 256 rows saved to benchmark_results_nc128.xlsx
Done. 256 rows saved to benchmark_results_nc256.xlsx
Done. 256 rows saved to benchmark_results_nc512.xlsx
Done. 256 rows saved to benchmark_results_nc1024.xlsx
