# Compute Inter-annotator agreement for manually annotated data

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm



# === 1. Load the Annotation Files ===
def load_annotations(file_annotator1, file_annotator2):
    """
    Loads two annotation files and merges them based on 'question_id_individual'.
    """
    annotator1 = pd.read_csv(file_annotator1, delimiter=";")
    annotator2 = pd.read_csv(file_annotator2, delimiter=";")

    # Rename columns to specify annotators
    annotator1 = annotator1.rename(columns={
        "Conceptual Question Type": "Conceptual_A1",
        "Functional Question Type": "Functional_A1"
    })

    annotator2 = annotator2.rename(columns={
        "Conceptual Question Type": "Conceptual_A2",
        "Functional Question Type": "Functional_A2"
    })

    # Merge on the unique question identifier
    merged_df = pd.merge(annotator1, annotator2, on="question_id_individual", how="inner")

    print(f"Data merged. {len(merged_df)} items matched between annotators.")

    return merged_df

def compute_agreement(merged_df, column_A1, column_A2):
    """
    Computes Cohen's Kappa and percentage agreement for a given annotation type.
    Handles missing values by dropping or replacing them.
    """
    # Convert all labels to string to avoid type errors
    merged_df[column_A1] = merged_df[column_A1].astype(str)
    merged_df[column_A2] = merged_df[column_A2].astype(str)

    # Drop rows where either annotation is missing
    valid_entries = merged_df.dropna(subset=[column_A1, column_A2])

    # Calculate percentage agreement
    total = len(valid_entries)
    agreement = sum(valid_entries[column_A1] == valid_entries[column_A2])
    percentage_agreement = agreement / total * 100 if total > 0 else 0

    # Calculate Cohen’s Kappa
    kappa = cohen_kappa_score(valid_entries[column_A1], valid_entries[column_A2])

    print(f"\n **{column_A1} vs {column_A2}**")
    print(f"Percentage Agreement: {percentage_agreement:.2f}%")
    print(f"Cohen’s Kappa: {kappa:.3f}")

    return percentage_agreement, kappa

# === 3. Generate a Confusion Matrix ===
def plot_confusion_matrix(merged_df, column_A1, column_A2, title, filename):
    """
    Plots a confusion matrix while ensuring that each individual cell has a fixed size.
    Saves the plot as a PDF file.
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.metrics import confusion_matrix

    # Convert labels to string type (avoid float issues)
    merged_df[column_A1] = merged_df[column_A1].astype(str)
    merged_df[column_A2] = merged_df[column_A2].astype(str)

    # Drop NaN values
    merged_df = merged_df.dropna(subset=[column_A1, column_A2])

    # Get all labels and compute the confusion matrix
    all_labels = sorted(set(merged_df[column_A1].unique()).union(set(merged_df[column_A2].unique())))
    cm = confusion_matrix(merged_df[column_A1], merged_df[column_A2], labels=all_labels)

    # Specify the font file path and set the font family globally
    font_path = 'lmroman10-regular.otf'
    fm.fontManager.addfont(font_path)
    plt.rcParams['font.family'] = 'Latin Modern Roman'

    # --- Compute figure size based on desired cell dimensions ---
    # Desired dimensions per cell in inches
    cell_width = 0.8
    cell_height = 0.8
    # Additional space for margins, labels, etc.
    margin_width = 2
    margin_height = 2
    num_labels = len(all_labels)
    fig_width = cell_width * num_labels + margin_width
    fig_height = cell_height * num_labels + margin_height

    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                annot_kws={'fontsize': 16},
                xticklabels=all_labels, yticklabels=all_labels,
                square=True, linewidths=0.5, ax=ax)
    plt.xlabel(f"Annotator 2", fontsize=20, labelpad=20)
    plt.ylabel(f"Annotator 1", fontsize=20, labelpad=20)
    plt.xticks(rotation=45, ha="right", fontsize=16)
    plt.yticks(rotation=0, fontsize=16)

    # Adjust the colorbar (legend) font size:
    cbar = ax.collections[0].colorbar
    cbar.ax.tick_params(labelsize=16)

    # Save the figure as a PDF file with the given filename
    fig.savefig(filename, format="pdf", bbox_inches="tight")
    plt.show()


# === 4. Run Full Analysis ===
def analyze_inter_annotator_agreement(file_annotator1, file_annotator2):
    """
    Full pipeline: loads data, computes agreement, and visualizes disagreements.
    """
    # Step 1: Load and merge annotation files
    merged_df = load_annotations(file_annotator1, file_annotator2)

    # Step 2: Compute agreement metrics
    print("\n**Agreement Results**")
    conceptual_agreement = compute_agreement(merged_df, "Conceptual_A1", "Conceptual_A2")
    functional_agreement = compute_agreement(merged_df, "Functional_A1", "Functional_A2")

    # Step 3: Plot confusion matrices
    print("\n**Visualizing Disagreements**")
    plot_confusion_matrix(merged_df, "Conceptual_A1", "Conceptual_A2", "Conceptual Question Type", "confusion_matrix_conceptual.pdf")
    plot_confusion_matrix(merged_df, "Functional_A1", "Functional_A2", "Functional Question Type", "confusion_matrix_functional.pdf")

    return merged_df, conceptual_agreement, functional_agreement


# === 5. Example Usage ===
# File paths
file_annotator1 = "extracted_questions_for_IAA_annotator1_after.csv"
file_annotator2 = "extracted_questions_for_IAA_annotator2_after.csv"

merged_data, conceptual_results, functional_results = analyze_inter_annotator_agreement(file_annotator1, file_annotator2)

---
* Extract misaligned cases to get better understanding of tagging behaviour

---

In [None]:
# Set pandas options to display entire width of output
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

def find_misalignments(merged_df, column_A1, column_A2, label_A1, label_A2):
    """
    Find all instances where Annotator 1 assigned label_A1 but Annotator 2 assigned label_A2.
    """
    mismatched_cases = merged_df[
        (merged_df[column_A1] == label_A1) & (merged_df[column_A2] == label_A2)
    ][["question_id_individual", "question_individual", column_A1, column_A2]]

    print(f"\n Cases where A1 labeled '{label_A1}' but A2 labeled '{label_A2}':")

    if mismatched_cases.empty:
        print("No such mismatches found")
    else:
        print(mismatched_cases)

    return mismatched_cases

# Example: Find cases where A1 labeled "Empowerment" but A2 labeled "Advice"
empowerment_vs_advice = find_misalignments(
    merged_data, "Conceptual_A1", "Conceptual_A2", "Empowerment", "Advice"
)

# Reset pandas display options to default if necessary
pd.reset_option('display.width')
pd.reset_option('display.max_colwidth')