In [None]:
%load_ext autoreload
%autoreload 2

get krippendorff alpha

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from bench_lib.evaluation import load_ai_labels, load_human_labels, krippendorf_alpha
from scipy.stats import entropy
from scipy.special import rel_entr
from functools import reduce

human_labels, questions, comment_cols = load_human_labels()
alphas = [
    krippendorf_alpha(human_labels["post_id"], human_labels[q]) for q in questions
]
alphas  

get disagreement subset

In [None]:
def has_disagreement(x):
    for q in questions:
        values = x[q].dropna().unique()
        if len(values) > 1:
            try:
                if krippendorf_alpha(x["post_id"], x[q]) < 0.60: # krippendorf's alpha lower bound for tentative results
                    return True
            except ValueError:
                continue
    return False

disagreement_subset = human_labels[
    ["post_id", "classification_by"] + questions
].groupby("post_id").filter(has_disagreement)
disagreement_subset.head(3)

In [None]:
disagreement_subset_long = pd.melt(
    disagreement_subset, id_vars=["post_id", "classification_by"], value_vars=questions)
disagreement_subset_long.head(3)

calculate entropy per group (question, post)

In [None]:
def compute_entropy(labels):
    value_counts = labels.value_counts(normalize=True) # get probs
    return entropy(value_counts, base=2) # base 2 for bits

entropies = []
for q in questions:
    entropies_q = (
        disagreement_subset.groupby("post_id")[q]
        .apply(lambda x: compute_entropy(x.dropna()))
        .reset_index(name=f"{q}_entropy")
    )
    entropies.append(entropies_q)

entropy_df = reduce(lambda left, right: left.merge(right, on="post_id"), entropies)
entropy_df.head(3)

Stratify (Low, Modedarate, High Entropies)
Due to number of annotators participated the variance is low. array([0.91829583, 1. , 0.81127812, 0. ]). Still we map them 1 to high entropy, 0 to low entropy and 
anything between 0 and 1 to moderate entropy

In [None]:
def stratify_entropy(entropy):
    if entropy == 0:
        return "Low"
    elif entropy == 1:
        return "High"
    else:
        return "Moderate"

entropy_cols = [col for col in entropy_df.columns if col.endswith("_entropy")]

for col in entropy_cols:
    bin_col = col.replace("_entropy", "_entropy_bin")
    entropy_df[bin_col] = entropy_df[col].apply(stratify_entropy)

entropy_df.head(3)

get ai labels

In [None]:
models = [
    "google/gemma-3-4b-it",
    "google/gemma-3-12b-it",
    "google/gemma-3-27b-it",
    "Qwen/Qwen2.5-VL-3B-Instruct",
    "Qwen/Qwen2.5-VL-7B-Instruct",
    "Qwen/Qwen2.5-VL-72B-Instruct"
]

run_folders = [
    "toxicainment_videos_log_Temp_0_7_1",
    "toxicainment_videos_log_Temp_0_7_2",
    "toxicainment_videos_log_Temp_0_7_3",
    "toxicainment_videos_log_Temp_0_7_4",
    "toxicainment_videos_log_Temp_0_7_5"
]

base_path = "/home/tanalp/toxicainment/simple_inference_benchmark_results"

model_results = {}

for model in models:
    folders = [
        f"{base_path}/{run_folder}/{model}"
        for run_folder in run_folders
    ]
    ai_labels_long = pd.concat(
        [load_ai_labels([folder], questions, comment_cols) for folder in folders],
        ignore_index=True
    )
    model_results[model] = ai_labels_long

In [None]:
human_labels_long, _, _ = load_human_labels(long=True)

In [None]:
human_soft = (
    human_labels_long
    .groupby(['post_id', 'variable'])['value']
    .mean()
    .reset_index()
    .rename(columns={'value': 'human_soft_label'})
)

In [None]:
results = []

# get mean of 5 stochastic runs for each model
for model, ai_labels_long in model_results.items():
    model_soft = (
        ai_labels_long
        .groupby(['post_id', 'variable'])['value']
        .mean()
        .reset_index()
        .rename(columns={'value': 'model_soft_label'})
    )
    merged = pd.merge(human_soft, model_soft, on=['post_id', 'variable'])
    merged['brier_score'] = (merged['human_soft_label'] - merged['model_soft_label']) ** 2 # more like a cost function, mean squared error https://en.wikipedia.org/wiki/Brier_score
    merged['model'] = model
    results.append(merged)

all_results = pd.concat(results, ignore_index=True)
all_results.head(3)

In [None]:
rows = []
for q in questions:
    for _, row in entropy_df.iterrows():
        rows.append({
            "post_id": row["post_id"],
            "question": q,
            "entropy": row[f"{q}_entropy"],
            "entropy_bin": row[f"{q}_entropy_bin"]
        })


new_entropy = pd.DataFrame(rows)
new_entropy = new_entropy.sort_values(["entropy_bin", "question", "post_id"])
new_entropy.head(3)

In [None]:
merged = all_results.merge(
    new_entropy[["post_id", "question", "entropy_bin"]],
    left_on=["post_id", "variable"],
    right_on=["post_id", "question"],
    how="left"
)

summary = (
    merged.groupby(["model", "variable", "entropy_bin"])["brier_score"]
    .mean()
    .reset_index()
    .sort_values(["model", "variable", "entropy_bin"])
)

summary.head(3)

In [None]:
def plot_brier_scores(summary):
    plt.figure(figsize=(12, 8))
    sns.barplot(
        data=summary,
        x="entropy_bin",
        y="brier_score",
        hue="model",
        ci=None,
        palette="rocket",
        order=["Low", "Moderate", "High"]
    )
    plt.title("Brier Scores by Model and Entropy Bin")
    plt.xlabel("Entropy Bin")
    plt.ylabel("Brier Score")
    plt.legend(title="Model", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

plot_brier_scores(summary)

get brier plot with variables instead

In [None]:
summary = (
    all_results.groupby(["model", "variable"])["brier_score"]
    .mean()
    .reset_index()
    .sort_values(["model", "variable"])
)

summary.head(3)


def plot_brier_scores(summary):
    plt.figure(figsize=(12, 8))
    sns.barplot(
        data=summary,
        x="variable",
        y="brier_score",
        hue="model",
        ci=None,
        palette="rocket"
    )
    plt.title("Brier Scores by Model and Variable")
    plt.xlabel("Variable")
    plt.ylabel("Brier Score")
    plt.legend(title="Model", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

plot_brier_scores(summary)

get kl divergence 

In [None]:
grouped_humans = disagreement_subset_long.groupby(['post_id', 'variable'])['value'].apply(list).reset_index()
grouped_humans

In [None]:
disagreement_pairs = set(
    zip(disagreement_subset_long["post_id"], disagreement_subset_long["variable"])
)
model_label_lists = {}

for model, ai_labels_long in model_results.items():
    filtered = ai_labels_long[
        ai_labels_long.apply(
            lambda row: (row["post_id"], row["variable"]) in disagreement_pairs, axis=1
        )
    ]
   
    grouped = (
        filtered.groupby(["post_id", "variable"])["value"]
        .apply(list)
        .reset_index()
    )
    model_label_lists[model] = grouped

In [None]:
labels_tuples = []

for model, grouped in model_label_lists.items():
    for post_id, variable, value in grouped.itertuples(index=False):
        #print(f"Model: {model}, Post ID: {post_id}, Variable: {variable}, Labels: {value}")
        labels_tuples.append((model, post_id, variable, value))

#print(f"Total number of disagreement pairs: {len(labels_tuples)}")

In [None]:
grouped_humans

In [None]:
model_labels_df = pd.DataFrame(labels_tuples, columns=["model", "post_id", "variable", "labels"])

merged = pd.merge(
    model_labels_df,
    grouped_humans,
    on=["post_id", "variable"]
)


In [None]:
def get_distribution(labels):
    counts = pd.Series(labels).value_counts(normalize=True).reindex([0, 1], fill_value=0)
    return counts.values

def kl_divergence(row):
    p = get_distribution(row["value"]) # human
    q = get_distribution(row["labels"]) # model
    p = np.clip(p, 1e-10, 1)
    q = np.clip(q, 1e-10, 1)
    return np.sum(rel_entr(p, q))

merged["kl_divergence"] = merged.apply(kl_divergence, axis=1) 
merged

In [None]:
def plot_kl_divergence(merged):
    plt.figure(figsize=(12, 6))
    sns.boxplot(data=merged, x="model", y="kl_divergence")
    plt.title("KL Divergence by Model")
    plt.xlabel("Model")
    plt.ylabel("KL Divergence")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

plot_kl_divergence(merged)

get brier skill

get ece