In [121]:
import pandas as pd
import itertools
import numpy as np

### Implementation based on https://en.wikipedia.org/wiki/Krippendorff's_alpha.

Should be fairly quick, almost all computation is done using pandas.

## Sample data

In [250]:
data = [[None, 1, None],
       [None, None, None],
       [None, 2, 2],
       [None, 1, 1],
       [None, 3, 3],
       [3, 3, 4],
       [4, 4, 4],
       [1, 3, None],
       [2, None, 2],
       [1, None, 1],
       [1, None, 1],
       [3, None, 3],
       [3, None, 3],
       [None, None, None],
       [3, None, 4]]



values = [1, 2, 3, 4]
annotators = ["A", "B", "C"]


## Metrics

In [246]:
def nominal_metric(a, b):
    return a != b

def interval_metric(a, b):
    return (a-b)**2

def ratio_metric(a, b):
    a, b = a+1, b+1
    return ((a-b)/(a+b))**2


## Main agreement metric

In [251]:
def krippendorff_alpha(data, annotators, values, metric=nominal_metric):
    R = range(len(values))
    C = range(len(code))
    
    # Replacing values with value indexes
    units = pd.DataFrame(data, columns=C)
    for value in values:
        units = units.replace(value, values.index(value))
    # Filtering rows with 0 or 1 annotation
    units["Total"] = len(C) - units.isnull().sum(axis=1)
    units = units[units["Total"] > 1]

    # Computing distance matrix using selected metric
    d_metric =  [[float(metric(c, k)) for k in R] for c in R]
    d_metric = pd.DataFrame(d_metric)

    # All combinations of annotators id, where id1 != id2
    C_comb = filter(lambda i:i[0] != i[1], list(itertools.product(C, C)))

    # Computing coincidence matrix
    ovu = np.zeros((len(R), len(R)))
    for v in R:
        for u in R:
            for i, j in C_filt:
                a = (units[i] == v).astype(int)
                b = (units[j] == u).astype(int)
                ovu[v][u] += ((a * b) / (units["Total"]-1)).sum()

    ovu = pd.DataFrame(ovu)
    ovu["Total"] = ovu.sum(axis=1)
    n = ovu["Total"].sum()

    nvnu = [[ovu["Total"][v] * ovu["Total"][u] for u in R] for v in R]
    nvnu = pd.DataFrame(nvnu)

    De = (nvnu * d_metric).sum().sum() / (n-1)
    Do = (ovu[R] * d_metric).sum().sum()
    alpha = 1 - Do/De
    return alpha

## Testing

In [258]:
assert round(krippendorff_alpha(data, annotators, values, nominal_metric), 4) == 0.6914
assert round(krippendorff_alpha(data, annotators, values, interval_metric), 4) == 0.8108

## Run on sample data

In [260]:
print "krippendorff_alpha (nominal_metric) =", krippendorff_alpha(data, annotators, values, nominal_metric)
print "krippendorff_alpha (interval_metric) =", krippendorff_alpha(data, annotators, values, interval_metric)

krippendorff_alpha (nominal_metric) = 0.691358024691
krippendorff_alpha (interval_metric) = 0.810844892812
