# This notebook is supposed to help with calculating the IAA metric, Krippendorff's alpha.

The metric is explained in the paper found [here](https://www.asc.upenn.edu/sites/default/files/2021-03/Computing%20Krippendorff%27s%20Alpha-Reliability.pdf).

For more information on IAA measures, check out this [summary video](https://youtu.be/Ofm2m1fVao8?si=0quFfOtyzbVU3lsr).

The input should be a pandas dataframe, with the following structure:

| annotator_id   |   0 |   1 |   2 |   3 |   4 | 
|:---------------|----:|----:|----:|----:|----:|
| A              |   1 |   2 |   3 |   4 |   5 | 
| B              |   1 |   2 |   3 |   4 |   4 | 
| C              |   1 |   3 |   3 |   5 |   5 | 
| D              |   1 |   2 |   3 |   3 |   5 | 

To clarify, the index of the dataframe is each annotator. Each column then corresponds to an annotated sample (image, text, etc.). 
Categorical values need to be recoded to numeric ones. Note that each **annotated feature requires a separate alpha** to be calculated (see reading material).

Use the code at your own risk. There is **no guarantee** that it has been implemented without error.

In [1]:
import pandas as pd
import numpy as np
import itertools

In [9]:
def get_rating_scale_from_df(df):
    # returns the range of ratings, assuming all ratings are present.
    return (np.unique(df)[~np.isnan(np.unique(df))])



def get_column_total_from_df(df):
    return df.count()

def get_empty_matrix(scale):
    return np.zeros((scale,scale))

def count_pairs(col_tot):
    return col_tot * (col_tot-1)

def update_coincidence_matrix(k_pairs, matrix, mu):
    # update coincidence matrix
    for i in k_pairs:
        c,k = int(i[0])-1,int(i[1])-1
        matrix[c][k] += 1/(mu-1)
    return matrix

def make_coincidence_matrix(df):
    # get length of scale.
    # i.e. 2 for binary scale (0,1), 5 for 5-point scale (1,2,3,4,5)
    scale_len = len(get_rating_scale_from_df(df))

    # make empty coincidence matrix
    coincidence_matrix = np.zeros((scale_len,scale_len))

    # Get column totals, n_k
    col_tot = get_column_total_from_df(df)
    
    n_columns = df.shape[1]

    for k in range(n_columns): # iterate through data matrix
        mu = col_tot[k]

        k_entries = np.array(df.iloc[:,k])
        k_entries = k_entries[~np.isnan(k_entries)] # get all non-NaN values
        k_pairs = list(itertools.permutations(k_entries,2)) # get all pairs of values

        # update values in matrix
        coincidence_matrix = update_coincidence_matrix(k_pairs, coincidence_matrix, mu)

    return coincidence_matrix

def get_difference_matrix(coincidence_matrix, rating = None, method = "nominal"): # uses the previously created coincidence matrix
    methods = ["nominal", "interval", "ordinal", "ratio", "bipolar"]
    if method not in methods:
        print(f'Error: Incorrect method. Please set method to one of the following: {methods}')
        pass
    
    if method == "bipolar":
        if rating.all() == None:
            print(f'Please include the rating scale, as a list (e.g. [-1,0,1]). \n You can use get_rating_scale_from_df(<df>)')
            pass
        cmin,cmax = np.min(rating), np.max(rating)

    # make empty difference matrix
    nc,nk = coincidence_matrix.shape[0], coincidence_matrix.shape[1]
    difference_matrix = np.zeros((nc,nk))

    for c in range(nc):
        for k in range(nk):

            if method == "nominal":
                difference_matrix = np.ones((nc,nk))
                return np.triu(difference_matrix,1) + np.triu(difference_matrix,1).T
            
            elif method == "interval":
                difference_matrix[c][k] = (c-k)**2

            elif method == "ordinal":
                if c <=k:
                    sumc = (sum(coincidence_matrix[c]))
                    sumk = (sum(coincidence_matrix[:][k]))
                    n = ((sumc+sumk)/2)
                
                    ord = ((np.sum(coincidence_matrix[c:k+1])))
                    ord_squared = (ord - n)**2
                    difference_matrix[c][k] = ord_squared
            
            elif method == "ratio":
                if (c-k) == (c+k) == 0:
                    difference_matrix[c][k] = 0
                else:
                    difference_matrix[c][k] = ( (c-k)/(c+k) )**2

            elif method == "bipolar":
                difference_matrix[c][k] = ((rating[c]-rating[k])**2) / ( (rating[c] + rating[k] - 2*cmin) * (2*cmax-rating[c]-rating[k]) )

    return np.triu(difference_matrix,1) + np.triu(difference_matrix,1).T

def get_nuber_of_values(coincidence_matrix):
    return np.sum(coincidence_matrix)

def krippendorff_alpha(df,rating = None, method="nominal"):
    top,bot = 0,0

    coincidence_matrix = make_coincidence_matrix(df)
    
    n = get_nuber_of_values(coincidence_matrix)

    difference_matrix = get_difference_matrix(coincidence_matrix,rating = rating, method = method)

    m = range(len(coincidence_matrix))

    for c in m:
        for k in m:
            if k > c:
                top += coincidence_matrix[c][k] * difference_matrix[c][k] 
                bot += sum(coincidence_matrix[c]) * sum(coincidence_matrix[:][k]) * difference_matrix[c][k]
            else:
                continue

    alpha = 1 - (n-1) * (top/bot)

    return (f'\n Krippendorff {method} alpha: {round(alpha,3)}')

# Example using the data from the [linked paper](https://www.asc.upenn.edu/sites/default/files/2021-03/Computing%20Krippendorff%27s%20Alpha-Reliability.pdf):

Please note, that the answer for the Ratio metric here is mismatched from that of the paper's.

I think the paper might have a mistake, but it is more likely that I have made an error somewhere.

In [3]:
# Annotations from 5 different annotators. Ratings are from 1-5.
data_example = {
        'anno_1': [1,2,3,3,2,1,4,1,2,None,None,None], 
        'anno_2': [1,2,3,3,2,2,4,1,2,5,None,3], 
        'anno_3': [None,3,3,3,2,3,4,2,2,5,1,None],
        'anno_4': [None,None,None,None,None,None,None,None,None,None,None,None],
        'anno_5': [1,2,3,3,2,4,4,1,2,5,1,None]}

# Create dataframe
df = pd.DataFrame.from_dict(data_example, orient='index')

# Print alpha, depending on choice of difference metric (please see the linked paper above)
print(  krippendorff_alpha(df,rating = None, method="nominal"),
        krippendorff_alpha(df,rating = None, method="ordinal"),
        krippendorff_alpha(df,rating = None, method="interval"),
        krippendorff_alpha(df,rating = None, method="ratio"),
        krippendorff_alpha(df,rating = get_rating_scale_from_df(df), method="bipolar")
        )


 Krippendorff nominal alpha: 0.743 
 Krippendorff ordinal alpha: 0.815 
 Krippendorff interval alpha: 0.849 
 Krippendorff ratio alpha: 0.734 
 Krippendorff bipolar alpha: 0.835


  difference_matrix[c][k] = ((rating[c]-rating[k])**2) / ( (rating[c] + rating[k] - 2*cmin) * (2*cmax-rating[c]-rating[k]) )


# More examples:

All difference metrics behave the same on nominal data (binary selection i.e. True/False, 1/0, etc.)

In [4]:
data_nominal = {
        'anno_1': [1,1,1,0,0,0,1,1,1,0],
        'anno_2': [1,1,1,0,0,0,1,1,1,0],
        'anno_3': [1,1,1,1,0,0,1,1,0,0]}

# Create dataframe
df = pd.DataFrame.from_dict(data_nominal, orient='index')

# Print alpha, depending on choice of difference metric (please see the linked paper above)
print(  krippendorff_alpha(df,rating = None, method="nominal"),
        krippendorff_alpha(df,rating = None, method="ordinal"),
        krippendorff_alpha(df,rating = None, method="interval"),
        krippendorff_alpha(df,rating = None, method="ratio"),
        krippendorff_alpha(df,rating = get_rating_scale_from_df(df), method="bipolar")
        )


 Krippendorff nominal alpha: 0.731 
 Krippendorff ordinal alpha: 0.731 
 Krippendorff interval alpha: 0.731 
 Krippendorff ratio alpha: 0.731 
 Krippendorff bipolar alpha: 0.731


  difference_matrix[c][k] = ((rating[c]-rating[k])**2) / ( (rating[c] + rating[k] - 2*cmin) * (2*cmax-rating[c]-rating[k]) )


# Example when rating with a range (e.g. 1-5):
 
A nominal metric will simply check for a 'hit' or 'miss.

Ordinal and Interval metrics can measure how close, or far, a rating is from each other. So, Annotator_1 rating 5, and Annotator_2 rating 4, should be penalised less than if they rated 5 and 2 respetively:

"Nominal: the data can only be categorized. Ordinal: the data can be categorized and ranked. Interval: the data can be categorized and ranked, and evenly spaced. Ratio: the data can be categorized, ranked, evenly spaced and has a natural zero" [Source](https://www.scribbr.com/statistics/levels-of-measurement/)

In [5]:
data_rank = {
        'anno_1': [1,2,3,4,5,5,5,5],
        'anno_2': [1,2,3,4,5,4,4,4] # Minor mismatch on last samples
        }

data_rank_alt = {
        'anno_1': [1,2,3,4,5,5,5,5],
        'anno_2': [1,2,3,4,5,2,2,2]} # Significant mismatch on last samples

examples = [data_rank,data_rank_alt]

for d in examples:
    df = pd.DataFrame.from_dict(d, orient='index')
    print(  krippendorff_alpha(df,rating = None, method="nominal"),
            krippendorff_alpha(df,rating = None, method="ordinal")
            )


 Krippendorff nominal alpha: 0.536 
 Krippendorff ordinal alpha: 0.779

 Krippendorff nominal alpha: 0.536 
 Krippendorff ordinal alpha: 0.285


# Multiple annotators

You can add additional annotators to a dataset, as needed. It also handles missing values!

In [6]:
# Start with 2 annotators
annotator_2 = {
        'anno_1': [1,2,3,4,5],
        'anno_2': [1,2,3,4,5]} 

df = pd.DataFrame.from_dict(annotator_2, orient='index')
print(df)
print(krippendorff_alpha(df,rating = None, method="ordinal"))

# Now we add a 3rd
df.loc["anno_3"] = [1,3,3,3,5]

print(df)
print(krippendorff_alpha(df,rating = None, method="ordinal"))

# Now we add a 4th, but they skipped some entries by mistake!
df.loc["anno_4"] = [2,None,4,None,4]

print(df)
print(krippendorff_alpha(df,rating = None, method="ordinal"))

        0  1  2  3  4
anno_1  1  2  3  4  5
anno_2  1  2  3  4  5

 Krippendorff ordinal alpha: 1.0
        0  1  2  3  4
anno_1  1  2  3  4  5
anno_2  1  2  3  4  5
anno_3  1  3  3  3  5

 Krippendorff ordinal alpha: 0.914
          0    1    2    3    4
anno_1  1.0  2.0  3.0  4.0  5.0
anno_2  1.0  2.0  3.0  4.0  5.0
anno_3  1.0  3.0  3.0  3.0  5.0
anno_4  2.0  NaN  4.0  NaN  4.0

 Krippendorff ordinal alpha: 0.842


# USing our csv

In [77]:
import pandas as pd
import csv

# Assuming your data is stored in a CSV file named 'data.csv'
df = pd.read_csv("C:\\Users\\elias\\Desktop\\Projects in data science\\Final project\\Annotation guide1.csv")

# Separate data for annotator 1 and annotator 2
annotator1_data = df[df['Annotator'] == 'Annotator 1']
annotator2_data = df[df['Annotator'] == 'Annotator 2']

# For Annotator 1
annotator1_list = annotator1_data.iloc[:, 2:].values.flatten().tolist()

# For Annotator 2
annotator2_list = annotator2_data.iloc[:, 2:].values.flatten().tolist()



In [79]:
# Start with 2 annotators
annotator_2 = {
        'anno_1': annotator1_list,
        'anno_2': annotator2_list} 

df = pd.DataFrame.from_dict(annotator_2, orient='index')
print(df)
print(krippendorff_alpha(df,rating = None, method="ordinal"))

        0    1    2    3    4    5    6    7    8    9    ...  650  651  652  \
anno_1    2    2    1    1    1    1    1    1    1    1  ...    2    1    1   
anno_2    2    2    1    1    1    1    1    1    1    2  ...    2    1    1   

        653  654  655  656  657  658  659  
anno_1    1    1    1    1    1    1    1  
anno_2    1    1    1    1    1    1    1  

[2 rows x 660 columns]

 Krippendorff ordinal alpha: 0.742


In [78]:
def split_list(lst, chunk_size):
    """Split a list into smaller lists of a specified chunk size."""
    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]

# Example usage:
long_list1 = list(all_values1)
long_list2 = list(all_values2)

# Split the long list into smaller lists of length 6
anno1 = split_list(long_list1, 11)
anno2 = split_list(long_list2, 11)

In [86]:
anno1_listoflist=split_list(annotator1_list,11)
anno2_listoflist=split_list(annotator2_list,11)

In [88]:
for i in range(len(anno1_list)):
    annotator_2 = {
        'anno_1': anno1_listoflist[i],
        'anno_2': anno2_listoflist[i]} 

    df = pd.DataFrame.from_dict(annotator_2, orient='index')
    #print(df)
    print(krippendorff_alpha(df,rating = None, method="ordinal"))
    


 Krippendorff ordinal alpha: 0.753

 Krippendorff ordinal alpha: 0.3

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 0.562

 Krippendorff ordinal alpha: 0.625

 Krippendorff ordinal alpha: 0.826

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 0.8

 Krippendorff ordinal alpha: 0.821

 Krippendorff ordinal alpha: 0.25

 Krippendorff ordinal alpha: 0.8

 Krippendorff ordinal alpha: 0.625

 Krippendorff ordinal alpha: 0.8

 Krippendorff ordinal alpha: 0.625

 Krippendorff ordinal alpha: 0.65

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 0.821

 Krippendorff ordinal alpha: 0.753

 Krippendorff ordinal alpha: 0.8

 Krippendorff ordinal alpha: 0.8

 Krippendorff ordinal alpha: 0.632

 Krippendorff ordinal alpha: 0.562

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal alpha: 1.0

 Krippendorff ordinal a