In [1]:
from irrCAC.raw import CAC
from irrCAC.benchmark import Benchmark
import pandas as pd

ANNOTATION_CSV_PATH = "../data/annotation_data/Coherence Task March 2022_July 2021 All - both months raw labels.csv"

In [2]:
def benchmark(coefficient_dict):
    """Given computed coefficient values from the irrCAC library, print the likely membership scale probabilities"""
    est_dict = coefficient_dict["est"]
    benchmarker = Benchmark(coeff=est_dict["coefficient_value"], se=est_dict["se"])

    print("Altman Scale")
    print(benchmarker.altman())
    print()

    print("Landis Koch Scale")
    print(benchmarker.landis_koch())
    print()

    print("Fleiss Scale")
    print(benchmarker.fleiss())
    print()

In [3]:
annotations_df = pd.read_csv(ANNOTATION_CSV_PATH)
display(annotations_df)

Unnamed: 0,Model ID,Cluster ID,Ryan Coherence,Rebecca Coherence,Ethan Coherence
0,2021-07_kmeans_clusters,0,1,1,1
1,2021-07_kmeans_clusters,1,1,1,1
2,2021-07_kmeans_clusters,2,1,1,0
3,2021-07_kmeans_clusters,3,0,1,0
4,2021-07_kmeans_clusters,4,1,1,0
...,...,...,...,...,...
195,2022-03_kmeans_clusters,95,1,1,1
196,2022-03_kmeans_clusters,96,1,1,1
197,2022-03_kmeans_clusters,97,1,1,1
198,2022-03_kmeans_clusters,98,1,1,1


In [4]:
cac = CAC(annotations_df.drop(columns=["Model ID", "Cluster ID"]))
print(cac)


<irrCAC.raw.CAC Subjects: 200, Raters: 3, Categories: [0, 1], Weights: "identity">


In [5]:
print("AC1 statistic")
gwet_dict = cac.gwet()
print(gwet_dict)

benchmark(gwet_dict)

AC1 statistic
{'est': {'coefficient_value': 0.8503, 'coefficient_name': 'AC1', 'confidence_interval': (0.79906, 0.90154), 'p_value': 0.0, 'z': 32.72321, 'se': 0.02598, 'pa': 0.88, 'pe': 0.19839}, 'weights': array([[1., 0.],
       [0., 1.]]), 'categories': [0, 1]}
Altman Scale
{'scale': [(0.8, 1.0), (0.6, 0.8), (0.4, 0.6), (0.2, 0.4), (-1.0, 0.2)], 'Altman': ['Very Good', 'Good', 'Moderate', 'Fair', 'Poor'], 'CumProb': [0.97357, 1.0, 1.0, 1.0, 1.0]}

Landis Koch Scale
{'scale': [(0.8, 1.0), (0.6, 0.8), (0.4, 0.6), (0.2, 0.4), (0.0, 0.2), (-1.0, 0.0)], 'Landis-Koch': ['Almost Perfect', 'Substantial', 'Moderate', 'Fair', 'Slight', 'Poor'], 'CumProb': [0.97357, 1.0, 1.0, 1.0, 1.0, 1.0]}

Fleiss Scale
{'scale': [(0.75, 1.0), (0.4, 0.75), (-1.0, 0.4)], 'Fleiss': ['Excellent', 'Intermediate to Good', 'Poor'], 'CumProb': [0.99994, 1.0, 1.0]}



In [6]:
print("Fleiss' Kappa")
fleiss_dict = cac.fleiss()
print(fleiss_dict)

benchmark(fleiss_dict)

Fleiss' Kappa
{'est': {'coefficient_value': 0.39514, 'coefficient_name': "Fleiss' kappa", 'confidence_interval': (0.2424, 0.54789), 'p_value': 7.837092732554396e-07, 'z': 5.10145, 'se': 0.07746, 'pa': 0.88, 'pe': 0.80161}, 'weights': array([[1., 0.],
       [0., 1.]]), 'categories': [0, 1]}
Altman Scale
{'scale': [(0.8, 1.0), (0.6, 0.8), (0.4, 0.6), (0.2, 0.4), (-1.0, 0.2)], 'Altman': ['Very Good', 'Good', 'Moderate', 'Fair', 'Poor'], 'CumProb': [0.0, 0.00409, 0.47499, 0.99412, 1.0]}

Landis Koch Scale
{'scale': [(0.8, 1.0), (0.6, 0.8), (0.4, 0.6), (0.2, 0.4), (0.0, 0.2), (-1.0, 0.0)], 'Landis-Koch': ['Almost Perfect', 'Substantial', 'Moderate', 'Fair', 'Slight', 'Poor'], 'CumProb': [0.0, 0.00409, 0.47499, 0.99412, 1.0, 1.0]}

Fleiss Scale
{'scale': [(0.75, 1.0), (0.4, 0.75), (-1.0, 0.4)], 'Fleiss': ['Excellent', 'Intermediate to Good', 'Poor'], 'CumProb': [0.0, 0.47499, 1.0]}

