# Analyze scenarios from two raters

This notebook parses scenarios annotated using a pre-agreed coding frame to compute the inter-rater agreement Kappa statistic for above-chance agreement, and to review agreements and disagreements.

The notebook also creates a file that includes the labeled words to identify code mismatches between the two raters.

In [2]:
%pip install spacy
import spacy.cli
spacy.cli.download("en_core_web_sm")

Note: you may need to restart the kernel to use updated packages.
✔ Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


In [None]:
from dataclasses import dataclass
from lib_analysis import read_raw_sample

read_raw_sample("../datasets/scenarios1.json", 10, "../datasets/sample3.txt")

In [1]:
from lib_analysis import read_and_parse_data, is_consistent
from lib_analysis import read_data


data1 = read_and_parse_data('../datasets/sample2-TH.txt')
data2 = read_and_parse_data('../datasets/sample2-vk.txt')

In [2]:
is_consistent(data1, data2)

Scenario IDs matched.


In [3]:
from sklearn.metrics import cohen_kappa_score
import csv

scenario_ids = list(data1.keys())
all_codes1 = [c for d in data1.values() for c in d['codes']]
all_codes2 = [c for d in data2.values() for c in d['codes']]

# uncomment to compute kappa on non-BIO code format
#all_codes1 = ['o' if len(c) == 1 else c[2:] for d in data1.values() for c in d['codes']]
#all_codes2 = ['o' if len(c) == 1 else c[2:] for d in data2.values() for c in d['codes']]

kappa = cohen_kappa_score(all_codes1, all_codes2)
print('Cohen\'s Kappa, All Codes: %0.4f' % kappa)

# write the words and simplified codes for both datasets
# simplified codes: the b/i prefixes are removed
with open('coded_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['word','rater1','rater2'])
    for scenario_id in scenario_ids:
        words = data1[scenario_id]['words']
        codes1 = ['o' if len(c) == 1 else c[2:] for c in data1[scenario_id]['codes']]
        codes2 = ['o' if len(c) == 1 else c[2:] for c in data2[scenario_id]['codes']]
        for i in range(len(words)):
            writer.writerow([words[i], codes1[i], codes2[i]])

Cohen's Kappa, All Codes: 0.5742


In [6]:
# count code distributions
possible_codes = sorted(list(set(all_codes1).union(set(all_codes2))))
tally = {'r1': {c:0 for c in possible_codes},
        'r2': {c:0 for c in possible_codes}}
for c in all_codes1:
    tally['r1'][c] += 1
for c in all_codes2:
    tally['r2'][c] += 1
print('\tRater1\tRater2')
for c in possible_codes:
    print('%s\t%s\t%s' % (c, tally['r1'][c], tally['r2'][c]))

	Rater1	Rater2
b-i	91	100
i-i	303	219
o	1666	1741


In [7]:
# use simplified codes
flow_only1 = ['o' if len(c) == 1 else c[2:] for d in data1.values() for c in d['codes']]
flow_only1 = [c if c != 'i' else 'o' for c in all_codes1]

flow_only2 = ['o' if len(c) == 1 else c[2:] for d in data2.values() for c in d['codes']]
flow_only2 = [c if c != 'i' else 'o' for c in all_codes2]

kappa = cohen_kappa_score(flow_only1, flow_only2)
print('Cohen\'s Kappa, Flow, Only: %0.4f' % kappa)

Cohen's Kappa, Flow, Only: 0.5742


In [None]:
# index information types into tuples: i, j, score, phrase                      
def index_infotype(data):
    info = []
    phrase = []
    j = -1
    for i, (word, code) in enumerate(zip(data['words'], data['codes'])):
        if code == 'b-i':
            phrase = [word]
            j = i
        elif code == 'i-i':
            phrase.append(word)
        elif code == 'o' and j >= 0:
            info.append((j, j+len(phrase), data['scores'][len(info)], ' '.join(phrase)))
            phrase = []
            j = -1
    return info

# identify risk scores for overlapping information types
def overlaps(i1, j1, i2, j2):
    return len(set(range(i1, j1)).intersection(set(range(i2, j2)))) > 0

def find_overlaps(info1, info2):
    overlap = []
    for i1, j1, score1, phrase1 in info1:
        for i2, j2, score2, phrase2 in info2:
            if overlaps(i1, j1, i2, j2):
                overlap.append([(score1, phrase1), (score2, phrase2)])
    return overlap

agreed = 0
disagreed = 0
for scenario_id in data1.keys():
    info1 = index_infotype(data1[scenario_id])
    info2 = index_infotype(data2[scenario_id])
    overlap = find_overlaps(info1, info2)

    for i, ((s1, p1), (s2, p2)) in enumerate(overlap):
        print('\n%s, match %i: score %i, %s' % (scenario_id, i, int(s1), p1))
        print('%s, match %i: score %i, %s' % (scenario_id, i, int(s2), p2))
        
    agreed += len(overlap)
    disagreed += len(info1) - len(overlap) + len(info2) - len(overlap)

print('\nAgreed: %i' % agreed)
print('Disagreed: %i' % disagreed)

scores1 = [int(s) for d in data1.values() for s in d['scores']]
scores2 = [int(s) for d in data2.values() for s in d['scores']]  
print('\nScore average for Rater 1: %0.4f' % (sum(scores1) / len(scores1)))
print('Score average for Rater 2: %0.4f' % (sum(scores2) / len(scores2)))

In [4]:
# write the disagreements out to a file for inspection
disagreed = []

# format of data1/data2: sent_id, word_id, code
for x1, x2 in zip(data1, data2):
    if x1[2] != x2[2]:
        # record the sent_id, word_id, word, codes, plus the sentence
        sent = list(sentences[x1[0]])
        sent[x1[1]] = '[' + sent[x1[1]] + ']'
        disagreed.append([
            x1[0], x1[1], x1[2], x2[2], ' '.join(sent)
        ])

with open('disagreements.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['sent_id', 'word_id', 'code1', 'code2', 'sentence'])
    for row in disagreed:
        writer.writerow(row)