# Get intercoder agreement stats

In [153]:
import os
import json
import pandas as pd
import numpy as np
import krippendorff
from itertools import combinations
from sklearn.metrics import cohen_kappa_score
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

# === COMMON SETTINGS ===
FRAME_LABELS = [
    "Foreign influence threat_present",
    "Systemic institutional corruption_present",
    "Elite collusion_present",
    "Politicized investigations_present",
    "Authoritarian reformism_present",
    "Judicial and institutional accountability failures_present",
    "Mobilizing anti-corruption_present"
]

def encode_label(val):
    """Encode 'Present' as 1, 'Not Present' as 0, else None."""
    return 1 if val == "Present" else 0 if val == "Not Present" else None

def load_all_annotations(session_folder, allowed_annotators, min_articles=15):
    """
    Load all session JSON files and return a DataFrame of annotations.
    Only annotators with at least `min_articles` unique URIs are retained.
    """
    data = []
    for filename in os.listdir(session_folder):
        if filename.endswith("_session.json"):
            user_id = filename.replace("_session.json", "")
            path = os.path.join(session_folder, filename)
            with open(path, "r", encoding="utf-8") as f:
                session_data = json.load(f)
                for ann in session_data.get("annotations", []):
                    ann["user_id"] = user_id
                    data.append(ann)
    df = pd.DataFrame(data)
    df = df[df["user_id"].isin(allowed_annotators)]
    annotator_counts = df.groupby("user_id")["uri"].nunique()
    complete_annotators = annotator_counts[annotator_counts >= min_articles].index.tolist()
    df = df[df["user_id"].isin(complete_annotators)]
    print(f"✅ Annotators with ≥{min_articles} articles: {complete_annotators}")
    return df

def prepare_matrix(df, frame_label):
    """Pivot the annotation DataFrame into a coder×URI matrix of binary labels."""
    df_sub = df[["user_id", "uri", frame_label]].copy()
    df_sub[frame_label] = df_sub[frame_label].map(encode_label)
    return df_sub.pivot(index="uri", columns="user_id", values=frame_label)

def safe_krippendorff_alpha(data, level='nominal'):
    """Compute Krippendorff’s α, returning 1.0 or NaN when appropriate."""
    unique_vals = np.unique(data[~np.isnan(data)])
    if len(unique_vals) < 2:
        return 1.0
    try:
        alpha = krippendorff.alpha(reliability_data=data, level_of_measurement=level)
        return alpha if not isinstance(alpha, (np.ndarray, list)) else np.nan
    except Exception as e:
        print(f"⚠️ Error computing alpha: {e}")
        return np.nan

def pairwise_krippendorff(matrix, coders):
    """Compute pairwise Krippendorff’s α for all coder pairs."""
    results = {}
    for coder1, coder2 in combinations(coders, 2):
        pair_data = matrix[[coder1, coder2]].dropna()
        n_rows = pair_data.shape[0]
        alpha = safe_krippendorff_alpha(pair_data.to_numpy().T) if n_rows > 0 else np.nan
        results[(coder1, coder2)] = (alpha, n_rows)
    return results

def compute_human_reliability(
    session_folder,
    output_excel,
    allowed_annotators=None,
    min_articles=15,
    label="ICR"
):
    """
    Compute per-frame reliability for a given session folder (human coders only).
    Writes results to `output_excel` and returns a DataFrame.
    """
    if allowed_annotators is None:
        allowed_annotators = ["Assia", "Alexander", "Elisa", "Luigia", "Yara"]
    df = load_all_annotations(session_folder, allowed_annotators, min_articles=min_articles)
    results = []
    for frame_label in FRAME_LABELS:
        print(f"\n=== Frame: {frame_label} ({label}) ===")
        matrix = prepare_matrix(df, frame_label)
        coders_with_data = [c for c in matrix.columns if matrix[c].notna().any()]
        pairwise_alphas = pairwise_krippendorff(matrix, coders_with_data)
        for (c1, c2), (alpha, n_rows) in sorted(pairwise_alphas.items()):
            print(f"  {c1} vs {c2}: α = {alpha:.3f} (n={n_rows})" if not np.isnan(alpha)
                  else f"  {c1} vs {c2}: α = N/A (n={n_rows})")
        complete_data = matrix.dropna(how='any')
        overall_alpha = safe_krippendorff_alpha(complete_data.to_numpy().T) if not complete_data.empty else np.nan
        disagreement_count = complete_data.apply(lambda row: len(set(row)), axis=1)
        num_disagreements = (disagreement_count > 1).sum()
        num_agreements = (disagreement_count == 1).sum()
        num_articles_coded_by_all = complete_data.shape[0]
        results.append({
            "frame": frame_label,
            "overall_alpha": overall_alpha,
            "avg_pairwise_alpha": np.mean([v[0] for v in pairwise_alphas.values() if not np.isnan(v[0])]),
            "num_disagreements": num_disagreements,
            "num_agreements": num_agreements,
            "num_articles_coded_by_all": num_articles_coded_by_all
        })
    results_df = pd.DataFrame(results)
    results_df.to_excel(output_excel, index=False)
    print(f"\n✅ {label} results saved to {output_excel}")
    return results_df

# === LOW-LEVEL α FOR FINAL SUMMARY ===
def krippendorff_alpha_nominal(data):
    values = pd.unique(data[~pd.isnull(data)])
    if len(values) <= 1:
        return 1.0
    cm = pd.DataFrame(0, index=values, columns=values, dtype=int)
    for row in data.T:
        row = row[~pd.isnull(row)]
        for i, j in combinations(row, 2):
            cm.loc[i, j] += 1
            cm.loc[j, i] += 1
    total = cm.to_numpy().sum()
    if total == 0:
        return np.nan
    Do = sum(cm.loc[i, j] * (0 if i == j else 1) for i in values for j in values)
    marginals = cm.sum(axis=0)
    De = sum(
        marginals[i] * marginals[j] * (0 if i == j else 1)
        for i in range(len(values)) for j in range(len(values))
    ) / (total - 1)
    if De == 0:
        return 1.0
    return round(1 - Do / De, 3)

def main_intercoder_reliability(
    main_file,
    extra_file=None,
    extra_frames=None,
    output_file="final_intercoder_reliability_summary.xlsx"
):
    """
    Compute average κ vs Yara and Krippendorff’s α per frame.
    Extra frames override the main file where provided.
    Returns the summary and detailed coder-vs-Yara dataframe.
    """
    if extra_frames is None:
        extra_frames = []
    exclude_cols = ["URI", "Translated Text", "Yara", "Yara.1", "LLM", "LLM_Updated"]

    main_xls = pd.ExcelFile(main_file)
    extra_xls = pd.ExcelFile(extra_file) if extra_file else None

    def process_sheet(df, frame):
        df.columns = df.columns.str.strip()
        if "Yara" not in df.columns:
            return pd.DataFrame()
        coders = [c for c in df.columns if c not in exclude_cols]
        records = []
        for coder in coders:
            valid = df[[coder, "Yara"]].dropna()
            if valid.empty:
                continue
            kappa = cohen_kappa_score(valid[coder], valid["Yara"])
            agree = (valid[coder] == valid["Yara"]).mean()
            records.append({
                "frame": frame,
                "annotator": coder,
                "% agreement": round(agree * 100, 2),
                "cohen_kappa": round(kappa, 3),
                "n_items": valid.shape[0]
            })
        return pd.DataFrame(records)

    # Build coder-vs-Yara data
    main_dfs = []
    for sheet in main_xls.sheet_names:
        df = pd.read_excel(main_xls, sheet_name=sheet)
        main_dfs.append(process_sheet(df, sheet))
    main_df = pd.concat(main_dfs, ignore_index=True)

    extra_df = pd.DataFrame()
    if extra_xls:
        extra_dfs = []
        for sheet in extra_xls.sheet_names:
            if sheet in extra_frames:
                df = pd.read_excel(extra_xls, sheet_name=sheet)
                extra_dfs.append(process_sheet(df, sheet))
        extra_df = pd.concat(extra_dfs, ignore_index=True)

    # Combine, giving priority to extra frames
    combined_df = pd.concat(
        [main_df[~main_df["frame"].isin(extra_frames)], extra_df],
        ignore_index=True
    )

    # Count unique URIs per frame
    uri_counts = {}
    for sheet in main_xls.sheet_names:
        if sheet not in extra_frames:
            df = pd.read_excel(main_xls, sheet_name=sheet)
            df.columns = df.columns.str.strip()
            uri_counts[sheet] = df["URI"].nunique()
    if extra_xls:
        for sheet in extra_frames:
            df1 = pd.read_excel(extra_xls, sheet_name=sheet)
            df2 = pd.read_excel(main_xls, sheet_name=sheet)
            combined = pd.concat([df1["URI"], df2["URI"]])
            uri_counts[sheet] = combined.nunique()

    # Attach correct n_items
    combined_df["n_items"] = combined_df["frame"].map(uri_counts)

    # Aggregate % agreement and κ per frame
    summary = (
        combined_df.groupby("frame")
        .agg({
            "% agreement": "mean",
            "cohen_kappa": "mean",
            "n_items": "first"
        })
        .reset_index()
    )
    summary["annotator"] = "Average"

    # Compute Krippendorff’s α per frame (merge main and extra if needed)
    kripp_dict = {}
    for sheet in main_xls.sheet_names:
        df = pd.read_excel(main_xls, sheet_name=sheet)
        df.columns = df.columns.str.strip()
        if sheet in extra_frames and extra_xls:
            df2 = pd.read_excel(extra_xls, sheet_name=sheet)
            df = pd.concat([df, df2], ignore_index=True)
            df.columns = df.columns.str.strip()
        coders = [c for c in df.columns if c not in exclude_cols]
        matrix = df[coders].T.to_numpy()
        kripp_dict[sheet] = krippendorff_alpha_nominal(matrix)

    kripp_df = pd.DataFrame(
        [(frame, alpha) for frame, alpha in kripp_dict.items()],
        columns=["frame", "krippendorff_alpha"]
    )
    summary = summary.merge(kripp_df, on="frame", how="left")

    # Export to Excel
    with pd.ExcelWriter(output_file) as writer:
        summary.to_excel(writer, sheet_name="summary", index=False)
        combined_df.to_excel(writer, sheet_name="coder_vs_yara", index=False)

    print(f"\n✅ Final summary saved to {output_file}")
    return summary, combined_df

def plot_results(results_df, title):
    sns.set(style="whitegrid")
    plt.figure(figsize=(12, 6))
    results_df_sorted = results_df.sort_values("overall_alpha", ascending=False)
    ax = sns.barplot(data=results_df_sorted, x="overall_alpha", y="frame", palette="viridis")
    ax.set_title(title, fontsize=16)
    ax.set_xlabel("Overall α", fontsize=12)
    ax.set_ylabel("Frame", fontsize=12)
    ax.set_xlim(0, 1)
    for i, val in enumerate(results_df_sorted["overall_alpha"]):
        if not np.isnan(val):
            ax.text(val + 0.02, i, f"{val:.2f}", va='center')
    plt.tight_layout()
    plt.show()

# === DRIVER ===
if __name__ == "__main__":
    # ICR-1 session folder (human-only)
    icr1_session_folder = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/sessions/"
    )
    icr1_output = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/"
        "coding_frames/ICR/ICR_test1/reliability_results_icr1_human_only.xlsx"
    )

    # ICR-2 session folder (human-only)
    icr2_session_folder = (
        "/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/"
        "annotations/coding_frames/ICR/ICR_test2/sessions"
    )
    icr2_output = os.path.expanduser(
        "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/"
        "coding_frames/ICR/ICR_test2/reliability_results_icr2_human_only.xlsx"
    )

    # Final summary file paths
    main_file = (
        "/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/"
        "coding_frames/ICR/ICR_test2/agreement_with_yara_and_text_icr2.xlsx"
    )
    extra_file = (
        "/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/"
        "coding_frames/ICR/ICR_test1/agreement_with_yara_and_text.xlsx"
    )
    extra_frames = [
        "Mobilizing anti-corruption_pres",
        "Systemic institutional corrupti",
        "Politicized investigations_pres"
    ]
    final_output = (
        "/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/"
        "coding_frames/ICR/final_intercoder_reliability_summary.xlsx"
    )

    # 1. Compute ICR-1 reliability (human coders only) and save to ICR_test1 folder
    print("===== ICR-1 Reliability (human coders only) =====")
    icr1_results = compute_human_reliability(
        session_folder=icr1_session_folder,
        output_excel=icr1_output,
        label="ICR-1"
    )

    # 2. Compute ICR-2 reliability (human coders only) and save to ICR_test2 folder
    print("\n===== ICR-2 Reliability (human coders only) =====")
    icr2_results = compute_human_reliability(
        session_folder=icr2_session_folder,
        output_excel=icr2_output,
        label="ICR-2"
    )

    # Optionally plot the results for each round
    #plot_results(icr1_results, "ICR-1: Overall Krippendorff's Alpha per Frame (human coders only)")
    #plot_results(icr2_results, "ICR-2: Overall Krippendorff's Alpha per Frame (human coders only)")

    # 3. Compute the final combined summary (κ vs Yara and α)
    print("\n===== Final Intercoder Reliability Summary =====")
    summary_df, coder_vs_yara_df = main_intercoder_reliability(
        main_file=main_file,
        extra_file=extra_file,
        extra_frames=extra_frames,
        output_file=final_output
    )

    # Inspect the final summary
    print("\n=== Final Summary ===")
    print(summary_df)


===== ICR-1 Reliability (human coders only) =====
✅ Annotators with ≥15 articles: ['Alexander', 'Assia', 'Elisa', 'Luigia', 'Yara']

=== Frame: Foreign influence threat_present (ICR-1) ===
  Alexander vs Assia: α = 0.000 (n=15)
  Alexander vs Elisa: α = -0.036 (n=15)
  Alexander vs Luigia: α = 1.000 (n=15)
  Alexander vs Yara: α = 0.000 (n=15)
  Assia vs Elisa: α = 0.000 (n=15)
  Assia vs Luigia: α = 0.000 (n=15)
  Assia vs Yara: α = 1.000 (n=15)
  Elisa vs Luigia: α = -0.036 (n=15)
  Elisa vs Yara: α = 0.000 (n=15)
  Luigia vs Yara: α = 0.000 (n=15)

=== Frame: Systemic institutional corruption_present (ICR-1) ===
  Alexander vs Assia: α = 0.584 (n=15)
  Alexander vs Elisa: α = 0.341 (n=15)
  Alexander vs Luigia: α = 0.847 (n=15)
  Alexander vs Yara: α = 0.670 (n=15)
  Assia vs Elisa: α = 0.233 (n=15)
  Assia vs Luigia: α = 0.420 (n=15)
  Assia vs Yara: α = 0.540 (n=15)
  Elisa vs Luigia: α = 0.460 (n=15)
  Elisa vs Yara: α = 0.597 (n=15)
  Luigia vs Yara: α = 0.820 (n=15)

=== Frame: