# ==== INTERACTIVE CLUSTERING : INTER-ANNOTATORS SCORE STUDY ====
> ### Stage 1 : Analyze inter-annotators score and Plot some figures.

-----

## READ-ME BEFORE RUNNING

### Quick Description

This notebook is **aimed at analyze inter-annotators scores on interactive clustering annotation experiments**.
- Environments are represented by subdirectories in the `/experiments` folder.
- Each subdirectories of `/experiments` folder represents an annotation experiment with several annotators.

### Description each steps

First of all, **load constraints annotated** from JSON files.

Then, **compute a contengency matrix** with groundtruth and annotators.

-----

## 1. IMPORT PYTHON DEPENDENCIES

In [None]:
from typing import Any, Dict, List, Optional, Tuple, Union
import json
import numpy as np
import openpyxl
import pandas as pd
from scipy import stats as scipystats

-----

## 2. LOAD DATA

### 2.1. Load groundtruth from JSON file.

In [None]:
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/texts.json", "r") as fr_texts:
    dict_of_texts: Dict[str, Dict[str, Union[str,bool]]] = json.load(fr_texts)

In [None]:
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/dict_of_true_intents.json", "r") as fr_intents:
    dict_of_true_intents: Dict[str, str] = json.load(fr_intents)

### 2.2. Load constraints annotated from JSON file.

In [None]:
# Case 2 : 400 semi-random annotations (200 MUST_LINK, 200 CANNOT_LINK) with 4 annotators.
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/constraints_-_template_to_annotate_2.json", "r") as fr_constraints_sampled:
    dict_of_constraints_sampled: Dict[str, str] = json.load(fr_constraints_sampled)
list_of_annotators: List[str] = ["1.2", "7.2", "9.2", "12.2"]

In [None]:
dict_of_constraints: Dict[str, Dict[str, Any]]= {}
# Add groundtruth.
dict_of_constraints["groundtruth"] = {}
for constraint_id, constraint in dict_of_constraints_sampled.items():
    text_id1: str = constraint["data"]["id_1"]
    label_1: str = dict_of_true_intents[text_id1]
    text_id2: str = constraint["data"]["id_2"]
    label_2: str = dict_of_true_intents[text_id2]
    constraints_type: bool = (
        "MUST_LINK"
        if label_1 == label_2
        else "CANNOT_LINK"
    )
    dict_of_constraints["groundtruth"][constraint_id] = constraints_type

# Add annotations.
for annotator in list_of_annotators:
    dict_of_constraints[annotator] = {}
    with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/constraints_-_{0}.json".format(annotator), "r") as fr_constraints_annotated:
        constraints_annotated: Dict[str, Any] = json.load(fr_constraints_annotated)
        for constraint_id, constraint in constraints_annotated.items():
            if constraint_id in dict_of_constraints_sampled.keys():
                dict_of_constraints[annotator][constraint_id] = constraint["constraint_type"]

### 2.3. Format annotation in pandas DataFrame.

Define list of annotators.

In [None]:
list_of_annotators = ["groundtruth"] + list_of_annotators

Format annotations with one annotator per column.

In [None]:
df_annotations_by_line = pd.DataFrame(
    [
        [annotation_id] + [dict_of_constraints[annotator][annotation_id] for annotator in list_of_annotators]
        for annotation_id in dict_of_constraints["groundtruth"].keys()
    ],
    columns=(["annotation_id"] + list_of_annotators)
).set_index("annotation_id")
df_annotations_by_line.head(5000)

Format annotations in only one column.

In [None]:
df_annotations_one_column = pd.DataFrame(
    [
        [annotator, annotation_id, annotation]
        for annotator, annotations in dict_of_constraints.items()
        for annotation_id, annotation in annotations.items()
    ],
    columns=["annotator", "annotation_id", "annotation"]
)
df_annotations_one_column.head(5000)

-----

## 3. COMPUTE INTER-ANNOTATORS SCORE

### 3.1. Compute basic agreement.

Define basic agreement.

In [None]:
def evaluate_basic_agreement(df: pd.DataFrame) -> str:
    df["agreement"] = df.apply(
        lambda row: len(set([
            row[annotator]
            for annotator in df.columns
            if (annotator in list_of_annotators) and (row[annotator] is not None)
        ]))==1,
        axis=1
    )
    return "{0:.4f} (+/-{1:.4f})".format(
        np.mean(df["agreement"]),
        scipystats.sem(df["agreement"]),
    )

Compute local basic agreement.

In [None]:
df_basic_agreement = pd.DataFrame(
    [
        [annotator1] + [
            "" if (a1 <= a2) else evaluate_basic_agreement(df_annotations_by_line[[annotator1, annotator2]].copy())
            for a2, annotator2 in enumerate(list_of_annotators)
        ]
        for a1, annotator1 in enumerate(list_of_annotators)
        if annotator1 != "groundtruth"
    ],
    columns=["annotator"]+list_of_annotators
).set_index("annotator")

df_basic_agreement.head(5000)

Compute global basic agreement.

In [None]:
df = df_annotations_by_line[["7.2", "9.2", "12.2"]].copy()
print("Global inter-annotators basic agreement: agreement={0}".format(
    evaluate_basic_agreement(df),
))

In [None]:
df = df_annotations_by_line.copy()
print("Global inter-annotators basic agreement: agreement={0}".format(
    evaluate_basic_agreement(df),
))

### 3.2. Compute Cohen's Kappa

Define Cohen agreement.

### 3.3. Compute Krippendorff's alpha

Define Krippendorff's alpha agreement.

In [None]:
import simpledorff

In [None]:
def interprete_krippendorffs_alpha(alpha: float) -> str:
    if alpha < 0.0 or 1.0 < alpha: raise ValueError("`alpha` must be a value between 0.0 and 1.0. Currently its `{0}`.".format(alpha))
    elif alpha < 0.667: return "low"
    elif alpha < 0.8:   return "acceptable"
    elif alpha < 1.0:   return "good"
    else:               return "perfect"

In [None]:
def evaluate_krippendorffs_alpha(df: pd.DataFrame) -> str:
    alpha: float = simpledorff.calculate_krippendorffs_alpha_for_df(
        df,
        experiment_col="annotation_id",
        annotator_col="annotator",
        class_col="annotation",
    )
    return "{0:.4f} ({1})".format(
        alpha,
        interprete_krippendorffs_alpha(alpha),
    )

Compute local Krippendorff's alpha agreement.

In [None]:
df_krippendorff_alpha_agreement = pd.DataFrame(
    [
        [annotator1] + [
            "" if (a1 <= a2) else evaluate_krippendorffs_alpha(df_annotations_one_column[df_annotations_one_column["annotator"].isin([annotator1, annotator2])].copy())
            for a2, annotator2 in enumerate(list_of_annotators)
        ]
        for a1, annotator1 in enumerate(list_of_annotators)
        if annotator1 != "groundtruth"
    ],
    columns=["annotator"]+list_of_annotators
).set_index("annotator")

df_krippendorff_alpha_agreement.head(5000)

Compute global Krippendorff's alpha agreement.

In [None]:
df = df_annotations_one_column[df_annotations_one_column["annotator"].isin(["7.2", "9.2", "12.2"])].copy()
print("Global inter-annotators Krippendorff's alpha agreement: alpha={0}".format(
    evaluate_krippendorffs_alpha(df),
))

In [None]:
df = df_annotations_one_column.copy()
print("Global inter-annotators Krippendorff's alpha agreement: alpha={0}".format(
    evaluate_krippendorffs_alpha(df),
))

### 3.3. Store results.

In [None]:
with pd.ExcelWriter("../results/etude-robustesse-scores-inter-annotateurs.xlsx") as writer:
    df_basic_agreement.to_excel(
        writer,
        sheet_name="bsaic_agreement",
    )
    df_krippendorff_alpha_agreement.to_excel(
        writer,
        sheet_name="krippendorff_alpha",
    )