In [None]:
import pandas as pd

def calculate_metrics_per_file(csv_file):
    df = pd.read_csv(csv_file)

    # 補齊空值
    df["S_human_review"] = df["S_human_review"].fillna(df["S_review"])
    df["O_human_review"] = df["O_human_review"].fillna(df["O_review"])
    df[["S_review", "S_human_review", "O_review", "O_human_review"]] = df[["S_review", "S_human_review", "O_review", "O_human_review"]].astype(int)

    # 計算主體 TP / FP / FN
    df["TP_S"] = ((df["S_review"] == 0) & (df["S_human_review"] == 0)).astype(int)
    df["FP_S"] = ((df["S_review"] == 0) & (df["S_human_review"] == 1)).astype(int)
    df["FN_S"] = ((df["S_review"] == 1) & (df["S_human_review"] == 0)).astype(int)

    # 計算客體 TP / FP / FN
    df["TP_O"] = ((df["O_review"] == 0) & (df["O_human_review"] == 0)).astype(int)
    df["FP_O"] = ((df["O_review"] == 0) & (df["O_human_review"] == 1)).astype(int)
    df["FN_O"] = ((df["O_review"] == 1) & (df["O_human_review"] == 0)).astype(int)

    # 分檔案計算
    grouped = df.groupby("檔名")[["TP_S", "FP_S", "FN_S", "TP_O", "FP_O", "FN_O"]].sum()

    # 加上 precision/recall/f1
    grouped["Precision_S"] = grouped["TP_S"] / (grouped["TP_S"] + grouped["FP_S"])
    grouped["Recall_S"] = grouped["TP_S"] / (grouped["TP_S"] + grouped["FN_S"])
    grouped["F1_S"] = 2 * grouped["Precision_S"] * grouped["Recall_S"] / (grouped["Precision_S"] + grouped["Recall_S"])

    grouped["Precision_O"] = grouped["TP_O"] / (grouped["TP_O"] + grouped["FP_O"])
    grouped["Recall_O"] = grouped["TP_O"] / (grouped["TP_O"] + grouped["FN_O"])
    grouped["F1_O"] = 2 * grouped["Precision_O"] * grouped["Recall_O"] / (grouped["Precision_O"] + grouped["Recall_O"])

    grouped.fillna(0, inplace=True)
    return grouped


def calculate_overall_metrics(csv_file):
    df = pd.read_csv(csv_file)

    # 補齊空值
    df["S_human_review"] = df["S_human_review"].fillna(df["S_review"])
    df["O_human_review"] = df["O_human_review"].fillna(df["O_review"])
    df[["S_review", "S_human_review", "O_review", "O_human_review"]] = df[["S_review", "S_human_review", "O_review", "O_human_review"]].astype(int)

    # 計算主體 TP / FP / FN
    TP_S = ((df["S_review"] == 0) & (df["S_human_review"] == 0)).sum()
    FP_S = ((df["S_review"] == 0) & (df["S_human_review"] == 1)).sum()
    FN_S = ((df["S_review"] == 1) & (df["S_human_review"] == 0)).sum()

    # 計算客體 TP / FP / FN
    TP_O = ((df["O_review"] == 0) & (df["O_human_review"] == 0)).sum()
    FP_O = ((df["O_review"] == 0) & (df["O_human_review"] == 1)).sum()
    FN_O = ((df["O_review"] == 1) & (df["O_human_review"] == 0)).sum()

    # 計算 precision / recall / F1
    Precision_S = TP_S / (TP_S + FP_S) if (TP_S + FP_S) > 0 else 0
    Recall_S = TP_S / (TP_S + FN_S) if (TP_S + FN_S) > 0 else 0
    F1_S = 2 * Precision_S * Recall_S / (Precision_S + Recall_S) if (Precision_S + Recall_S) > 0 else 0

    Precision_O = TP_O / (TP_O + FP_O) if (TP_O + FP_O) > 0 else 0
    Recall_O = TP_O / (TP_O + FN_O) if (TP_O + FN_O) > 0 else 0
    F1_O = 2 * Precision_O * Recall_O / (Precision_O + Recall_O) if (Precision_O + Recall_O) > 0 else 0

    # 打包成 DataFrame
    result = pd.DataFrame([{
        "TP_S": TP_S, "FP_S": FP_S, "FN_S": FN_S,
        "Precision_S": Precision_S, "Recall_S": Recall_S, "F1_S": F1_S,
        "TP_O": TP_O, "FP_O": FP_O, "FN_O": FN_O,
        "Precision_O": Precision_O, "Recall_O": Recall_O, "F1_O": F1_O
    }], index=["總計"])

    return result



In [8]:

# 跑個別檔案
df_filewise = calculate_metrics_per_file("./docs/output/4_llm_resolution/review_v7_done.csv")
print(df_filewise)

# 跑總體
df_total = calculate_overall_metrics("./docs/output/4_llm_resolution/review_v7_done.csv")
print(df_total)


# 合併展示
final = pd.concat([df_filewise, df_total])
print(final)

                                           TP_S  FP_S  FN_S  TP_O  FP_O  FN_O  \
檔名                                                                              
228事件(20).json                               40     4     0    46     0     0   
「友仔」是什麼？光復初期臺北地區非法組織調查報告告訴您(37).json         30     1     0    30     0     0   
「回首向來蕭瑟處，歸去，也無風雨也無晴」—民國38年國軍遷臺紀事(30).json   152     0     0   152     0     0   
「威海衛」租借地的收回(42).json                          3    15     0    18     0     0   
「快速」發展的年代：麥克阿瑟公路通車一甲子(203).json              26     0     0    26     0     0   
...                                         ...   ...   ...   ...   ...   ...   
香蕉輸日與臺灣經濟(119).json                          46     0     0    46     0     0   
高雄市升格：行政區劃與人事組織的調整(145).json                  8     0     0     8     0     0   
鬼斧神工的中橫公路(105).json                          88     0     0     6     0     0   
黃金歲月：臺灣鳳梨罐頭行銷全球(175).json                    41     0     0    41     0     0   
黑與白的精彩對弈—圍棋(74).json        

In [9]:
import pandas as pd
from tabulate import tabulate

def calculate_metrics_per_file(csv_file):
    df = pd.read_csv(csv_file)
    df["S_human_review"] = df["S_human_review"].fillna(df["S_review"])
    df["O_human_review"] = df["O_human_review"].fillna(df["O_review"])
    df[["S_review", "S_human_review", "O_review", "O_human_review"]] = df[
        ["S_review", "S_human_review", "O_review", "O_human_review"]
    ].astype(int)

    df["TP_S"] = ((df["S_review"] == 0) & (df["S_human_review"] == 0)).astype(int)
    df["FP_S"] = ((df["S_review"] == 0) & (df["S_human_review"] == 1)).astype(int)
    df["FN_S"] = ((df["S_review"] == 1) & (df["S_human_review"] == 0)).astype(int)

    df["TP_O"] = ((df["O_review"] == 0) & (df["O_human_review"] == 0)).astype(int)
    df["FP_O"] = ((df["O_review"] == 0) & (df["O_human_review"] == 1)).astype(int)
    df["FN_O"] = ((df["O_review"] == 1) & (df["O_human_review"] == 0)).astype(int)

    grouped = df.groupby("檔名")[["TP_S", "FP_S", "FN_S", "TP_O", "FP_O", "FN_O"]].sum()

    grouped["Precision_S"] = grouped["TP_S"] / (grouped["TP_S"] + grouped["FP_S"])
    grouped["Recall_S"] = grouped["TP_S"] / (grouped["TP_S"] + grouped["FN_S"])
    grouped["F1_S"] = 2 * grouped["Precision_S"] * grouped["Recall_S"] / (grouped["Precision_S"] + grouped["Recall_S"])

    grouped["Precision_O"] = grouped["TP_O"] / (grouped["TP_O"] + grouped["FP_O"])
    grouped["Recall_O"] = grouped["TP_O"] / (grouped["TP_O"] + grouped["FN_O"])
    grouped["F1_O"] = 2 * grouped["Precision_O"] * grouped["Recall_O"] / (grouped["Precision_O"] + grouped["Recall_O"])

    grouped.fillna(0, inplace=True)
    return grouped

def calculate_overall_metrics(csv_file):
    df = pd.read_csv(csv_file)
    df["S_human_review"] = df["S_human_review"].fillna(df["S_review"])
    df["O_human_review"] = df["O_human_review"].fillna(df["O_review"])
    df[["S_review", "S_human_review", "O_review", "O_human_review"]] = df[
        ["S_review", "S_human_review", "O_review", "O_human_review"]
    ].astype(int)

    TP_S = ((df["S_review"] == 0) & (df["S_human_review"] == 0)).sum()
    FP_S = ((df["S_review"] == 0) & (df["S_human_review"] == 1)).sum()
    FN_S = ((df["S_review"] == 1) & (df["S_human_review"] == 0)).sum()

    TP_O = ((df["O_review"] == 0) & (df["O_human_review"] == 0)).sum()
    FP_O = ((df["O_review"] == 0) & (df["O_human_review"] == 1)).sum()
    FN_O = ((df["O_review"] == 1) & (df["O_human_review"] == 0)).sum()

    Precision_S = TP_S / (TP_S + FP_S) if (TP_S + FP_S) > 0 else 0
    Recall_S = TP_S / (TP_S + FN_S) if (TP_S + FN_S) > 0 else 0
    F1_S = 2 * Precision_S * Recall_S / (Precision_S + Recall_S) if (Precision_S + Recall_S) > 0 else 0

    Precision_O = TP_O / (TP_O + FP_O) if (TP_O + FP_O) > 0 else 0
    Recall_O = TP_O / (TP_O + FN_O) if (TP_O + FN_O) > 0 else 0
    F1_O = 2 * Precision_O * Recall_O / (Precision_O + Recall_O) if (Precision_O + Recall_O) > 0 else 0

    result = pd.DataFrame([{
        "TP_S": TP_S, "FP_S": FP_S, "FN_S": FN_S,
        "Precision_S": Precision_S, "Recall_S": Recall_S, "F1_S": F1_S,
        "TP_O": TP_O, "FP_O": FP_O, "FN_O": FN_O,
        "Precision_O": Precision_O, "Recall_O": Recall_O, "F1_O": F1_O
    }], index=["總計"])

    return result

# ✅ 執行並整齊列印
def run_and_print_metrics(csv_file):
    filewise = calculate_metrics_per_file(csv_file)
    overall = calculate_overall_metrics(csv_file)
    combined = pd.concat([filewise, overall])

    # 格式化數字（百分比與整數）
    formatted = combined.copy()
    for col in formatted.columns:
        if "Precision" in col or "Recall" in col or "F1" in col:
            formatted[col] = formatted[col].apply(lambda x: f"{x:.2%}")
        else:
            formatted[col] = formatted[col].astype(int)

    print(tabulate(formatted.reset_index(), headers="keys", tablefmt="grid"))

In [10]:
run_and_print_metrics("./docs/output/4_llm_resolution/review_v7_done.csv")

+-----+-----------------------------------------------------------------------------+--------+--------+--------+--------+--------+--------+---------------+------------+---------+---------------+------------+---------+
|     | index                                                                       |   TP_S |   FP_S |   FN_S |   TP_O |   FP_O |   FN_O | Precision_S   | Recall_S   | F1_S    | Precision_O   | Recall_O   | F1_O    |
|   0 | 228事件(20).json                                                            |     40 |      4 |      0 |     46 |      0 |      0 | 90.91%        | 100.00%    | 95.24%  | 100.00%       | 100.00%    | 100.00% |
+-----+-----------------------------------------------------------------------------+--------+--------+--------+--------+--------+--------+---------------+------------+---------+---------------+------------+---------+
|   1 | 「友仔」是什麼？光復初期臺北地區非法組織調查報告告訴您(37).json             |     30 |      1 |      0 |     30 |      0 |      0 | 96.77%        | 1