In [None]:
import pandas as pd
import numpy as np

# ---- 1) Load the Scenario B raw file ----
# If you're running this in the same environment where the file exists:
PATH = "raw_crowd_annotations_scenarioB_disagreement_only_v3.csv"
df = pd.read_csv(PATH)

# If you're running in this chat sandbox, you can use:
# PATH = "/mnt/data/synthetic_raw_crowd_annotations_scenarioB_mixed_realistic.csv"
# df = pd.read_csv(PATH)


# ---- 2) Krippendorffâ€™s alpha (nominal) for 0/1 with missing values ----
def krippendorff_alpha_nominal(matrix: np.ndarray) -> float:
    """
    matrix: shape (items, raters), entries in {0,1} or np.nan
    Returns Krippendorff's alpha (nominal).
    """
    m = np.asarray(matrix, dtype=float)

    # Observed disagreement Do
    Do_num = 0.0
    Do_den = 0.0
    for row in m:
        vals = row[~np.isnan(row)]
        n = len(vals)
        if n < 2:
            continue
        n0 = np.sum(vals == 0.0)
        n1 = np.sum(vals == 1.0)
        # For binary nominal: disagreement pairs = 2 * n0 * n1
        Do_num += 2.0 * n0 * n1
        Do_den += n * (n - 1)
    if Do_den == 0:
        return np.nan
    Do = Do_num / Do_den

    # Expected disagreement De
    all_vals = m[~np.isnan(m)]
    if len(all_vals) < 2:
        return np.nan
    n0 = np.sum(all_vals == 0.0)
    n1 = np.sum(all_vals == 1.0)
    De_num = 2.0 * n0 * n1
    De_den = len(all_vals) * (len(all_vals) - 1)
    if De_den == 0:
        return np.nan
    De = De_num / De_den

    # If no expected disagreement, alpha is 1 by definition
    if De == 0:
        return 1.0

    return 1.0 - (Do / De)


# ---- 3) Compute per-trait IRR table ----
rows = []
for trait, sub in df.groupby("trait", sort=True):
    # Pivot to items x raters (one row per comment_id)
    mat = sub.pivot_table(
        index="comment_id",
        values=["annotator_1", "annotator_2", "annotator_3"],
        aggfunc="first"
    ).to_numpy(dtype=float)

    alpha = krippendorff_alpha_nominal(mat)

    # Frequency from majority_vote (assumes 0/1)
    freq = int(sub["majority_vote"].sum())

    # Percent with 3rd annotator present
    pct3 = float(sub["annotator_3"].notna().mean()) * 100.0

    rows.append((trait, freq, alpha, pct3))

irr = pd.DataFrame(
    rows,
    columns=["Trait", "Frequency", "Krippendorff_alpha", "Pct_with_3rd_annotator"]
).sort_values("Trait")

# Add average row
avg_alpha = irr["Krippendorff_alpha"].mean()
avg_pct3 = irr["Pct_with_3rd_annotator"].mean()
irr = pd.concat(
    [irr, pd.DataFrame([{
        "Trait": "Average across traits",
        "Frequency": "--",
        "Krippendorff_alpha": avg_alpha,
        "Pct_with_3rd_annotator": avg_pct3
    }])],
    ignore_index=True
)

# ---- 4) Output ----
pd.set_option("display.max_colwidth", 80)
print(irr.to_string(index=False, float_format=lambda x: f"{x:.3f}"))


                                   Trait Frequency  Krippendorff_alpha  Pct_with_3rd_annotator
         Discouragement without guidance        66               0.575                   9.032
                     Dismissive attitude        32               0.400                   8.065
Disregard for others' time or boundaries        14               0.505                   2.903
                       Excessive control        17               0.283                   6.129
                   Lack of specification       100               0.721                   7.097
                                 Mockery        43               0.624                   5.484
                        Personal attacks        28               0.557                   4.516
                 Threats or intimidation         4               1.000                   0.000
                        Unconscious bias        17               0.630                   2.258
                   Average across traits        --