In [1]:
import pandas as pd

df_mapping_r1 = pd.read_csv("data/r1_mapping.txt", sep="\t", header=None)
df_mapping_r2 = pd.read_csv("data/r2_mapping.txt", sep="\t", header=None)


SUMMARY_LABELS = {
    "Summary Accuracy",
    "Summary Clarity",
    "Segmentation Granularity",
}

DIFF_LABELS = {
    "Diff Faithfulness",
    "Diff Completeness",
    "Diff Salience",
}

df_summary_diff_r1 = pd.read_csv("data/r1_summary_diff.txt", sep="\t", header=None)
df_summary_diff_r1.columns = ["Label"] + [
    f"Score_{i}" for i in range(1, df_summary_diff_r1.shape[1])
]
df_summary_r1 = df_summary_diff_r1[
    df_summary_diff_r1["Label"].isin(SUMMARY_LABELS)
].reset_index(drop=True)
df_diff_r1 = df_summary_diff_r1[
    df_summary_diff_r1["Label"].isin(DIFF_LABELS)
].reset_index(drop=True)

df_summary_diff_r2 = pd.read_csv("data/r2_summary_diff.txt", sep="\t", header=None)
df_summary_diff_r2.columns = ["Label"] + [
    f"Score_{i}" for i in range(1, df_summary_diff_r2.shape[1])
]
df_summary_r2 = df_summary_diff_r2[
    df_summary_diff_r2["Label"].isin(SUMMARY_LABELS)
].reset_index(drop=True)
df_diff_r2 = df_summary_diff_r2[
    df_summary_diff_r2["Label"].isin(DIFF_LABELS)
].reset_index(drop=True)

In [2]:
import numpy as np

# === Overall Mapping ===
vec_mapping_r1 = df_mapping_r1.stack().reset_index(drop=True).to_numpy()
vec_mapping_r2 = df_mapping_r2.stack().reset_index(drop=True).to_numpy()
mapping_strict = np.mean(vec_mapping_r1 == vec_mapping_r2)
mapping_relaxed = np.mean(np.abs(vec_mapping_r1 - vec_mapping_r2) <= 1)

# === Summary Only ===
vec_summary_r1 = df_summary_r1.iloc[:, 1:].to_numpy().flatten()
vec_summary_r2 = df_summary_r2.iloc[:, 1:].to_numpy().flatten()
summary_strict = np.mean(vec_summary_r1 == vec_summary_r2)
summary_relaxed = np.mean(np.abs(vec_summary_r1 - vec_summary_r2) <= 1)

# === Diff Only ===
vec_diff_r1 = df_diff_r1.iloc[:, 1:].to_numpy().flatten()
vec_diff_r2 = df_diff_r2.iloc[:, 1:].to_numpy().flatten()
diff_strict = np.mean(vec_diff_r1 == vec_diff_r2)
diff_relaxed = np.mean(np.abs(vec_diff_r1 - vec_diff_r2) <= 1)

# === Print concise results ===
print("=== Percent Agreement ===")
print(f"Overall Mapping : strict={mapping_strict:.3f}, relaxed(±1)={mapping_relaxed:.3f}")
print(f"Summary Only    : strict={summary_strict:.3f}, relaxed(±1)={summary_relaxed:.3f}")
print(f"Diff Only       : strict={diff_strict:.3f}, relaxed(±1)={diff_relaxed:.3f}")

=== Percent Agreement ===
Overall Mapping : strict=0.943, relaxed(±1)=0.980
Summary Only    : strict=0.704, relaxed(±1)=0.963
Diff Only       : strict=0.741, relaxed(±1)=0.991
