In [3]:
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import re
import pandas as pd

In [5]:
LOG_DIRECTORY = "artifact_checkgpt/CheckGPT/exp/"
TEST_DIRS_REGEX = re.compile(
    r"Train_(?P<train_domains>[A-Z]+)_"
    r"(?P<arch>[A-Za-z0-9]+)_"
    r"Task(?P<train_tasks>[0-9]+)_"
    r"Prompt(?P<train_prompts>[0-9]+)_"
    r"Test_(?P<test_domains>[A-Z]+)_"
    r"Task(?P<test_tasks>[0-9]+)_"
    r"Prompt(?P<test_prompts>[0-9]+)"
)
METRIC_REGEX = re.compile(
    r"Test accuracy:\s*(?P<acc>[0-9.]+)%.*?"
    r"Acc_GPT:\s*(?P<tpr>[0-9.]+)%.*?"
    r"Acc_Human:\s*(?P<tnr>[0-9.]+)%.*?"
    r"F1:\s*(?P<f1>[0-9.]+)",
    re.DOTALL
)

In [6]:
rows = []
for root, dirs, files in os.walk(LOG_DIRECTORY):
    folder_name = os.path.basename(root)

    if "Test" not in folder_name:
        continue

    match = TEST_DIRS_REGEX.search(folder_name)
    if match is None:
        continue

    log_path = os.path.join(root, "train.log")
    if not os.path.isfile(log_path):
        continue

    with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
        log_text = f.read()

    metric_match = METRIC_REGEX.search(log_text)
    if metric_match is None:
        continue

    row = {
        "train_domains": match.group("train_domains"),
        "architecture": match.group("arch"),
        "train_tasks": match.group("train_tasks"),
        "train_prompts": match.group("train_prompts"),
        "test_domains": match.group("test_domains"),
        "test_tasks": match.group("test_tasks"),
        "test_prompts": match.group("test_prompts"),
        "accuracy": float(metric_match.group("acc")),
        "tpr": float(metric_match.group("tpr")),
        "tnr": float(metric_match.group("tnr")),
        "f1": float(metric_match.group("f1")),
    }

    rows.append(row)

df = pd.DataFrame(rows)
df = df.sort_values(by=["train_domains", "architecture", "train_tasks", "train_prompts", "test_domains", "test_tasks", "test_prompts", "accuracy", "tpr", "tnr", "f1"])

In [7]:
print(df)
df.to_csv("results_summary.csv", index=False)

    train_domains       architecture train_tasks train_prompts test_domains  \
195            CS  BiLSTMwoAttention           1             1           CS   
158            CS  BiLSTMwoAttention           1             2           CS   
98             CS  BiLSTMwoAttention           1             3           CS   
67             CS  BiLSTMwoAttention           1             4           CS   
57             CS  BiLSTMwoAttention           2             1           CS   
..            ...                ...         ...           ...          ...   
14            PHX                RCH           2             4          PHX   
228           PHX                RCH           3             1          PHX   
19            PHX                RCH           3             2          PHX   
44            PHX                RCH           3             3          PHX   
189           PHX                RCH           3             4          PHX   

    test_tasks test_prompts  accuracy   tpr    tnr 