# ==== INTERACTIVE CLUSTERING : INTER-ANNOTATORS SCORE STUDY ====
> ### Stage 1 : Analyze inter-annotators score and Plot some figures.

-----

## READ-ME BEFORE RUNNING

### Quick Description

This notebook is **aimed at analyze inter-annotators scores on interactive clustering annotation experiments**.
- Environments are represented by subdirectories in the `/experiments` folder.
- Each subdirectories of `/experiments` folder represents an annotation experiment with several annotators.

### Description each steps

First of all, **load constraints annotated** from JSON files.

Then, **compute a contengency matrix** with groundtruth and annotators.

-----

## 1. IMPORT PYTHON DEPENDENCIES

In [19]:
from typing import Any, Dict, List, Optional, Tuple, Union
import json
import numpy as np
import openpyxl
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
import matplotlib.cm as cm
from matplotlib.colors import Normalize

-----

## 2. LOAD DATA

### 2.1. Load groundtruth from JSON file.

In [24]:
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/texts.json", "r") as fr_texts:
    dict_of_texts: Dict[str, Dict[str, Union[str,bool]]] = json.load(fr_texts)

In [129]:
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/dict_of_true_intents.json", "r") as fr_intents:
    dict_of_true_intents: Dict[str, str] = json.load(fr_intents)

### 2.2. Load constraints annotated from JSON file.

In [130]:
# Case 1 : 1000 random annotations with 14 annotators.
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/constraints_-_template_to_annotate_1.json", "r") as fr_constraints_sampled:
    dict_of_constraints_sampled: Dict[str, str] = json.load(fr_constraints_sampled)
list_of_annotators: List[str] = ["1.1", "2.1", "3.1", "4.1", "5.1", "6.1", "7.1", "8.1", "9.1", "10.1", "11.1", "12.1", "13.1", "14.1"]

In [131]:
# Case 2 : 400 semi-random annotations (200 MUST_LINK, 200 CANNOT_LINK) with 4 annotators.
with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/constraints_-_template_to_annotate_2.json", "r") as fr_constraints_sampled:
    dict_of_constraints_sampled: Dict[str, str] = json.load(fr_constraints_sampled)
list_of_annotators: List[str] = ["1.2", "7.2", "9.2", "12.2"]

In [132]:
dict_of_constraints: Dict[str, Dict[str, Any]]= {}
# Add groundtruth.
dict_of_constraints["groundtruth"] = {}
for constraint_id, constraint in dict_of_constraints_sampled.items():
    text_id1: str = constraint["data"]["id_1"]
    label_1: str = dict_of_true_intents[text_id1]
    text_id2: str = constraint["data"]["id_2"]
    label_2: str = dict_of_true_intents[text_id2]
    constraints_type: bool = (
        "MUST_LINK"
        if label_1 == label_2
        else "CANNOT_LINK"
    )
    dict_of_constraints["groundtruth"][constraint_id] = constraints_type

# Add annotations.
for annotator in list_of_annotators:
    dict_of_constraints[annotator] = {}
    with open("../experiments/mlsum_fr_train_subset_v1.0.0.schild/constraints_-_{0}.json".format(annotator), "r") as fr_constraints_annotated:
        constraints_annotated: Dict[str, Any] = json.load(fr_constraints_annotated)
        for constraint_id, constraint in constraints_annotated.items():
            if constraint_id in dict_of_constraints_sampled.keys():
                dict_of_constraints[annotator][constraint_id] = constraint["constraint_type"]

-----

## 3. COMPUTE INTER-ANNOTATORS SCORE

### 3.1. Compute contingency table of annotators agreements.

In [133]:
inter_annotator_contingencies: Dict[str, Dict[str, int]] = {}
for annotator_1, constraints_list_1 in dict_of_constraints.items():
    for case_1 in ["MUST_LINK", "CANNOT_LINK"]:  # ["MUST_LINK", "CANNOT_LINK", None]:
        inter_annotator_contingencies["{0}:{1}".format(annotator_1, case_1)] = {}
        for annotator_2, constraints_list_2 in dict_of_constraints.items():
            for case_2 in ["MUST_LINK", "CANNOT_LINK"]:  # ["MUST_LINK", "CANNOT_LINK", None]:
                inter_annotator_contingencies["{0}:{1}".format(annotator_1, case_1)]["{0}:{1}".format(annotator_2, case_2)] = 0
                for constraint_id in dict_of_constraints_sampled:
                    if (dict_of_constraints[annotator_1][constraint_id], dict_of_constraints[annotator_2][constraint_id]) == (case_1, case_2):
                        inter_annotator_contingencies["{0}:{1}".format(annotator_1, case_1)]["{0}:{1}".format(annotator_2, case_2)] += 1
pd.DataFrame.from_dict(inter_annotator_contingencies)

Unnamed: 0,groundtruth:MUST_LINK,groundtruth:CANNOT_LINK,1.2:MUST_LINK,1.2:CANNOT_LINK,7.2:MUST_LINK,7.2:CANNOT_LINK,9.2:MUST_LINK,9.2:CANNOT_LINK,12.2:MUST_LINK,12.2:CANNOT_LINK
groundtruth:MUST_LINK,200,0,178,14,167,33,161,39,155,43
groundtruth:CANNOT_LINK,0,200,7,190,17,183,24,176,9,191
1.2:MUST_LINK,178,7,185,0,165,20,154,31,149,34
1.2:CANNOT_LINK,14,190,0,204,14,190,25,179,8,196
7.2:MUST_LINK,167,17,165,14,184,0,156,28,144,39
7.2:CANNOT_LINK,33,183,20,190,0,216,29,187,20,195
9.2:MUST_LINK,161,24,154,25,156,29,185,0,134,49
9.2:CANNOT_LINK,39,176,31,179,28,187,0,215,30,185
12.2:MUST_LINK,155,9,149,8,144,20,134,30,164,0
12.2:CANNOT_LINK,43,191,34,196,39,195,49,185,0,234


### 3.2. Compute inter-annotators scores.

In [134]:
inter_annotator_scores: Dict[str, Dict[str, float]] = {}
for annotator_1 in dict_of_constraints.keys():
    inter_annotator_scores[annotator_1] = {}
    for annotator_2 in dict_of_constraints.keys():
        inter_annotator_scores[annotator_1][annotator_2] = sum(
            inter_annotator_contingencies["{0}:{1}".format(annotator_1, case)]["{0}:{1}".format(annotator_2, case)]
            for case in ["MUST_LINK", "CANNOT_LINK"]
        ) / sum(
            inter_annotator_contingencies["{0}:{1}".format(annotator_1, case_1)]["{0}:{1}".format(annotator_2, case_2)]
            for case_1 in ["MUST_LINK", "CANNOT_LINK"]
            for case_2 in ["MUST_LINK", "CANNOT_LINK"]
        )
pd.DataFrame.from_dict(inter_annotator_scores)

Unnamed: 0,groundtruth,1.2,7.2,9.2,12.2
groundtruth,1.0,0.946015,0.875,0.8425,0.869347
1.2,0.946015,1.0,0.912596,0.856041,0.891473
7.2,0.875,0.912596,1.0,0.8575,0.851759
9.2,0.8425,0.856041,0.8575,1.0,0.801508
12.2,0.869347,0.891473,0.851759,0.801508,1.0


In [135]:
list_of_groundtruth_agreement_scores: List[str] = [
    inter_annotator_scores["groundtruth"][annotator]
    for annotator in dict_of_constraints.keys()
    if annotator not in ["groundtruth", "1.1", "1.2"]
]
print("Grountruth average agreement:", "{0:.4f} (+/-{1:.4f})".format(np.mean(list_of_groundtruth_agreement_scores), np.std(list_of_groundtruth_agreement_scores)))

Grountruth average agreement: 0.8623 (+/-0.0142)


In [136]:
list_of_inter_annotators_scores: List[str] = [
    inter_annotator_scores[annotator_1][annotator_2]
    for annotator_1 in dict_of_constraints.keys()
    for annotator_2 in dict_of_constraints.keys()
    if (annotator_1 < annotator_2) and (annotator_1 not in ["groundtruth", "1.1", "1.2"]) and (annotator_2 not in ["groundtruth", "1.1", "1.2"])
]
print("Inter-annotators average score:", "{0:.4f} (+/-{1:.4f})".format(np.mean(list_of_inter_annotators_scores), np.std(list_of_inter_annotators_scores)))

Inter-annotators average score: 0.8369 (+/-0.0252)
