In [108]:
from os.path import basename
from glob import glob
from pandas import DataFrame, read_csv, concat
from dtale import show

In [109]:
LABELS = ["pos", "neg", "neu"]
NAMES = ["olivia", "river", "gerardo"]

## Compiling everyone's data
Each person has a folder with their sentiment annotations of 5 different bible translations.

The following cell takes everyone's annotations and puts them in one dataframe as\
`bible, chunk, name1, name2, `

In [110]:
# go through a person's folder and compile the labels into one dataframe for that person
def compile_labels(name) -> DataFrame:
    frames = []

    # get the annotator's files
    files = sorted(glob(f"../manual_annotation/{name}/*.csv"))

    for f in files:
        # read the csv, only keep sentiment column and chunk as index
        df = DataFrame(read_csv(f, index_col="chunk"), columns=["sentiment"])
        frames.append(df)

    # combine the dataframes, index by bible and chunk
    compiled = concat(
        frames, keys=[basename(f).split(".")[0] for f in files], names=["bible", "chunk"]
    )

    return compiled


# combine all the annotators into one dataframe
def compile() -> DataFrame:

    # compile the labels of each annotator
    annotator_frames = [compile_labels(name) for name in NAMES]

    # new dataframe with only the indexes (bible, chunk)
    df = DataFrame(annotator_frames[0], columns=[])

    # add a column for each annotator
    for name, frame in zip(NAMES, annotator_frames):
        df[name] = frame["sentiment"]

    return df

In [111]:
compiled = compile()
compiled

Unnamed: 0_level_0,Unnamed: 1_level_0,olivia,river,gerardo
bible,chunk,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
asv,2556,3,2,1
asv,10127,3,3,3
asv,9900,2,2,1
asv,3951,2,2,3
asv,3840,3,3,3
...,...,...,...,...
wmb,4236,3,3,1
wmb,5307,3,3,3
wmb,9942,3,2,2
wmb,4226,3,3,1


## Analysis

### Fleiss' kappa

$k=\dfrac{p_o - p_e}{1 - p_e}$

$p_e =$ expected agreement if random\
$p_o =$ observed agreement


In [112]:
# count number of annotators that chose each label
def label_counts(df: DataFrame) -> DataFrame:
    posCount = []
    negCount = []
    neuCount = []

    # count
    for i, row in df.iterrows():
        vals = list(row.values)
        neuCount.append(vals.count(1))
        posCount.append(vals.count(2))
        negCount.append(vals.count(3))

    # add cols to dataframe
    df["pos"] = posCount
    df["neg"] = negCount
    df["neu"] = neuCount

    # only keep the counts
    df = DataFrame(df, columns=["pos", "neg", "neu"])
    return df

In [113]:
counted = label_counts(compiled)
counted

Unnamed: 0_level_0,Unnamed: 1_level_0,pos,neg,neu
bible,chunk,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
asv,2556,1,1,1
asv,10127,0,3,0
asv,9900,2,0,1
asv,3951,2,1,0
asv,3840,0,3,0
...,...,...,...,...
wmb,4236,0,2,1
wmb,5307,0,3,0
wmb,9942,2,1,0
wmb,4226,0,2,1


In [114]:
# essentially agreement per sample
def add_P_i(df: DataFrame, num_annotators: int) -> DataFrame:
    p_i = []

    for i, row in df.iterrows():

        # observed agreement
        denom = num_annotators * (num_annotators - 1)
        numer = sum(count**2 for count in row.values) - num_annotators
        p = numer / denom

        # save value for the row
        p_i.append(p)

    # add column to dataframe
    df["P_i"] = p_i
    return df


# fleiss' kappa
def fleiss(df: DataFrame) -> float:
    # observed agreement
    P_o = df["P_i"].mean()

    # agreement per label
    total_labels = sum(df[LABELS].sum())
    P_j = [df[label].sum() / total_labels for label in LABELS]

    # expected agreement if random
    P_e = sum(p**2 for p in P_j)

    # fleiss kappa
    k = (P_o - P_e) / (1 - P_e)

    return k

In [115]:
df = add_P_i(counted, len(NAMES))
print(f"Fleiss's kappa = {fleiss(df):.3f}")
show(df)

Fleiss's kappa = 0.314


