In [1]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import re
import pandas as pd

In [3]:
ROOT = "artifact_checkgpt/CheckGPT/exp/"
pattern = re.compile(
    r"(?P<domain>[A-Za-z0-9]+)_"                         
    r"(?P<arch>[A-Za-z0-9]+)_Train_Task(?P<task>\d+)"
    r"_Prompt(?P<prompt>\d+)_Test_Task(?P<test_task>\d+)"
    r"_Prompt(?P<test_prompt>\d+)"
)

In [4]:
rows = []
for dirpath, dirnames, _ in os.walk(ROOT):
    for d in dirnames:
        if "Test" not in d:
            continue

        m = pattern.match(d)
        if not m:
            continue

        info = m.groupdict()
        domain = info["domain"]
        arch = info["arch"]
        task = int(info["task"])
        prompt = int(info["prompt"])

        log_path = os.path.join(dirpath, d, "train.log")
        if not os.path.isfile(log_path):
            continue

        test_acc = None
        acc_gpt = None
        acc_human = None
        f1 = None

        with open(log_path, "r") as f:
            for line in f:
                mm = re.search(
                    r"Test accuracy:\s*([\d\.]+)%.*?Acc_GPT:\s*([\d\.]+)%.*?Acc_Human:\s*([\d\.]+)%.*?F1:\s*([\d\.]+)",
                    line
                )
                if mm:
                    test_acc = float(mm.group(1))
                    acc_gpt = float(mm.group(2))
                    acc_human = float(mm.group(3))
                    f1 = float(mm.group(4))
                    break

        if test_acc is None:
            continue

        rows.append({
            "domain": domain,
            "architecture": arch,
            "task": task,
            "prompt": prompt,
            "test_accuracy": test_acc,
            "acc_gpt": acc_gpt,
            "acc_human": acc_human,
            "f1": f1,
        })

df = pd.DataFrame(rows)
df = df.sort_values(by=["domain", "architecture", "task", "prompt"])

In [5]:
print(df)
df.to_csv("results_summary.csv", index=False)

    domain       architecture  task  prompt  test_accuracy  acc_gpt  \
186     CS  BiLSTMwoAttention     1       1          99.90     99.9   
106     CS  BiLSTMwoAttention     1       2          99.85     99.7   
28      CS  BiLSTMwoAttention     1       3          99.85     99.8   
182     CS  BiLSTMwoAttention     1       4          99.95     99.9   
210     CS  BiLSTMwoAttention     2       1          99.40     99.2   
..     ...                ...   ...     ...            ...      ...   
67     PHX                RCH     2       4          98.80     98.3   
35     PHX                RCH     3       1          95.30     97.4   
58     PHX                RCH     3       2          94.80     95.4   
175    PHX                RCH     3       3          95.20     96.4   
88     PHX                RCH     3       4          96.50     96.8   

     acc_human      f1  
186       99.9  0.9990  
106      100.0  0.9985  
28        99.9  0.9985  
182      100.0  0.9995  
210       99.6  0.9940