# Analyzing synthetic annotations

## Loading the synthetic conversations

In [None]:
import pandas as pd
import seaborn as sns
import scipy.stats
import scikit_posthocs
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm

import json
import os
import re


# code adapted from https://www.geeksforgeeks.org/python-list-all-files-in-directory-and-subdirectories/
def files_from_dir_recursive(start_path="."):
    all_files = []
    for root, dirs, files in os.walk(start_path):
        for file in files:
            all_files.append(os.path.join(root, file))
    return all_files

In [None]:
def import_conversations(conv_dir: str) -> pd.DataFrame:
    file_paths = files_from_dir_recursive(conv_dir)
    rows = []

    for file_path in file_paths:
        with open(file_path, "r") as fin:
            conv = json.load(fin)

        conv = pd.json_normalize(conv)
        conv = conv[["id", "user_prompts", "logs"]]
        conv = conv.explode("logs")
        # get name, not path of parent directory
        conv["conv_variant"] = os.path.basename(os.path.dirname(file_path))
        conv["user"] = conv.logs.apply(lambda x: x[0])
        conv["message"] = conv.logs.apply(lambda x: x[1])
        del conv["logs"]
        rows.append(conv)

    full_df = pd.concat(rows)
    full_df = full_df.set_index("id")
    return full_df


conv_df = import_conversations("../output/conversations")
conv_df = conv_df[conv_df.message.str.len() > 0]
conv_df

In [None]:
# code adapted from ChatGPT
def extract_attributes(text):
    # Regex pattern to match the desired attributes
    pattern = r"You are (.+?) expert annotator"
    match = re.search(pattern, text)
    if match:
        return f"{match.group(1)}"
    return None


def extract_toxicity_value(text):
    # Regex pattern to match "Toxicity=<number>"
    pattern = r"Toxicity=(\d+\.?\d*)"
    match = re.search(pattern, text)
    if match:
        return match.group(1)
    return None

In [None]:
def import_annotations(annot_dir: str) -> pd.DataFrame:
    file_paths = files_from_dir_recursive(annot_dir)
    rows = []

    for file_path in file_paths:
        with open(file_path, "r") as fin:
            conv = json.load(fin)

        conv = pd.json_normalize(conv)
        conv = conv[["conv_id", "annotator_prompt", "logs"]]
        conv = conv.explode("logs")
        conv.annotator_prompt = conv.annotator_prompt.apply(extract_attributes)
        conv["message"] = conv.logs.apply(lambda x: x[0])
        conv["toxicity"] = conv.logs.apply(lambda x: x[1])
        conv["toxicity"] = conv.toxicity.apply(extract_toxicity_value)
        del conv["logs"]
        rows.append(conv)

    full_df = pd.concat(rows)
    full_df = full_df.set_index("conv_id")
    return full_df


annot_df = import_annotations("../output/annotations").dropna()
annot_df.toxicity = annot_df.toxicity.astype(int)
annot_df

In [None]:
full_df = pd.merge(
    conv_df,
    annot_df,
    left_on=["id", "message"],
    right_on=["conv_id", "message"],
    how="inner",
)


def simplify_labels(text):
    if "African American" in text:
        return "African American"
    elif "LGBT" in text:
        return "LGBT"
    elif "neutral" in text:
        return "Neutral"
    elif "typical" in text:
        return "Control"
    else:
        return None


full_df.annotator_prompt = full_df.annotator_prompt.apply(simplify_labels)
full_df

## Analyzing the annotators

In [None]:
# Calculate the count of each toxicity classification per annotator_prompt
toxicity_counts = (
    full_df.groupby(["annotator_prompt", "toxicity"]).size().reset_index(name="count")
)

# Plotting
plt.figure(figsize=(14, 8))
sns.barplot(
    data=toxicity_counts,
    y="annotator_prompt",
    x="count",
    hue="toxicity",
    palette="flare",
)

# Adding title and labels
plt.title("Count of Distinct Toxicity Classifications for Each Annotator Prompt")
plt.ylabel("Annotator Prompt")
plt.xlabel("Count")
plt.legend(title="Toxicity", fontsize="15", title_fontsize="20")

# Show plot
plt.tight_layout()
plt.show()

In [None]:
# Creating the plot
plt.figure(figsize=(12, 8))
sns.barplot(
    data=full_df,
    y="conv_variant",
    x="toxicity",
    hue="annotator_prompt",
    estimator=np.mean,
)

# Adding title and labels
plt.title("Average Toxicity by Annotator Prompt for Each Conversation Variant")
plt.ylabel("Conversation Type")
plt.xlabel("Average Toxicity")
plt.xlim(0, 5)
plt.legend(title="Annotator Demographic", fontsize="13", title_fontsize="16")

# Show plot
plt.tight_layout()
plt.show()

## Testing for statistical significance

In [None]:
grouped = full_df.groupby('annotator_prompt')['toxicity'].apply(list)
toxicity_groups = grouped.tolist()
_, p = scipy.stats.kruskal(*toxicity_groups)
print("H_0: mean toxicity between annotator demoggrahics are the same: p=", p)


posthoc = scikit_posthocs.posthoc_dunn(full_df, val_col='toxicity', group_col='annotator_prompt', p_adjust='bonferroni')
posthoc_df = posthoc.reset_index().melt(id_vars='index', var_name='Comparison', value_name='p-value')
posthoc_df.columns = ['Group1', 'Group2', 'p-value']
posthoc_df.pivot(index="Group1", columns="Group2", values="p-value")

In [None]:
grouped = full_df.groupby(["annotator_prompt", "conv_variant"])["toxicity"].apply(list) # group by annotator_prompt and conv_variant

toxicity_groups = grouped.tolist() # grouped data to a list of lists

# non-parametric anova
_, p = scipy.stats.kruskal(*toxicity_groups)
print("H_0: means of annotator_prompt and conv_variant are the same: p=", p)

In [None]:
# create a column to identify the groups
stats_df = full_df
stats_df["group"] = (
    stats_df["annotator_prompt"].astype(str)
    + "_"
    + stats_df["conv_variant"].astype(str)
)

# non-parametric post_hoc test
posthoc = scikit_posthocs.posthoc_dunn(
    stats_df, val_col="toxicity", group_col="group", p_adjust="bonferroni"
)
posthoc_df = posthoc.reset_index().melt(id_vars='index', var_name='Comparison', value_name='p-value')
posthoc_df.columns = ['Group1', 'Group2', 'p-value']
posthoc_df.pivot(index="Group1", columns="Group2", values="p-value")

## Calculating annotator disagreement

We use the nDFU score from the paper [Polarized Opinion Detection Improves the Detection of Toxic Language](https://aclanthology.org/2024.eacl-long.117) (Pavlopoulos & Likas, EACL 2024)

In [None]:
# code from John Pavlopoulos https://github.com/ipavlopoulos/ndfu/blob/main/src/__init__.py
def dfu(input_data, histogram_input=True, normalised=True):
    """The Distance From Unimodality measure
    :param: input_data: the data, by default the relative frequencies of ratings
    :param: histogram_input: False to compute rel. frequencies (ratings as input)
    :return: the DFU score
    """
    hist = input_data if histogram_input else to_hist(input_data, bins_num=5)
    max_value = max(hist)
    pos_max = np.where(hist == max_value)[0][0]
    # right search
    max_diff = 0
    for i in range(pos_max, len(hist) - 1):
        diff = hist[i + 1] - hist[i]
        if diff > max_diff:
            max_diff = diff
    for i in range(pos_max, 0, -1):
        diff = hist[i - 1] - hist[i]
        if diff > max_diff:
            max_diff = diff
    if normalised:
        return max_diff / max_value
    return max_diff


def to_hist(scores, bins_num=3, normed=True):
    """Creating a normalised histogram
    :param: scores: the ratings (not necessarily discrete)
    :param: bins_num: the number of bins to create
    :param: normed: whether to normalise or not, by default true
    :return: the histogram
    """
    # not keeping the values order when bins are not created
    counts, bins = np.histogram(a=scores, bins=bins_num)
    counts_normed = counts / counts.sum()
    return counts_normed if normed else counts

In [None]:
pivot_df = pd.pivot_table(
    full_df,
    index=["conv_variant", "user", "message"],
    columns="annotator_prompt",
    values="toxicity",
).reset_index()

# Reset the column names
pivot_df.columns.name = None
pivot_df = pivot_df.dropna()
pivot_df

In [None]:
pivot_df["nDFU"] = [
    dfu(list(row[3:7]), histogram_input=False, normalised=True)
    for idx, row in pivot_df.iterrows()
]
pivot_df = pivot_df.sort_values("nDFU")
pivot_df

In [None]:
# Create a histogram plot with the nDFU counts
sns.histplot(pivot_df["nDFU"].dropna(), kde=False, bins=100)
plt.xlabel("nDFU")
plt.ylabel("Count")
plt.title("Histogram of nDFU Scores")
plt.show()