# Analyze scenarios from two raters

This notebook parses scenarios annotated using a pre-agreed coding frame to compute the inter-rater agreement Kappa statistic for above-chance agreement, and to review agreements and disagreements.

The notebook also creates a file that includes the labeled words to identify code mismatches between the two raters.

In [1]:
from lib_analysis import read_and_parse_data, is_consistent

data1 = read_and_parse_data('../datasets/sample1-TH.json')
data2 = read_and_parse_data('../datasets/sample1-vk.json')

ModuleNotFoundError: No module named 'lib_analysis'

In [2]:
print(data1[list(data1.keys())[0]])

{'scenario_id': 'MAS_0036', 'text': "To g|find the [5 daily rain or general precipitation forecast]/[5 total amount forecast], I a|tap on the [5 weekly rain amount forecast] on the home screen page. Then I u|scan the [5 daily and/or hourly high for rain or precip] to u|see if there's a particularly [5 large amount of rain in the upcoming forecast for that day or week], depending on the situation. My goals in using this app are pretty simple in that I needed a better caliber of weather app to help g|keep track of [5 upcoming storms], especially things like [5 sudden rain showers or thunderstorms]. My wife and I live in a 110-year-old house in an area of the United States (philadephia) that's prone to a fairly high amount of rain in the spring/summer and thus you'll have wet/flooded basements. this app helps me g|plan my week as far as when I travel or when to keep on eye on things like the gutters/basement.", 'clean_text': "To find the daily rain or general precipitation forecast/total 

In [3]:
is_consistent(data1, data2)

Scenario IDs matched.


In [10]:
from sklearn.metrics import cohen_kappa_score
import csv

scenario_ids = list(data1.keys())
all_codes1 = [c for d in data1.values() for c in d['codes']]
all_codes2 = [c for d in data2.values() for c in d['codes']]

# uncomment to compute kappa on non-BIO code format
#all_codes1 = ['o' if len(c) == 1 else c[2:] for d in data1.values() for c in d['codes']]
#all_codes2 = ['o' if len(c) == 1 else c[2:] for d in data2.values() for c in d['codes']]

kappa = cohen_kappa_score(all_codes1, all_codes2)
print('Cohen\'s Kappa, All Codes: %0.4f' % kappa)

# write the words and simplified codes for both datasets
# simplified codes: the b/i prefixes are removed
with open('coded_data.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['word','rater1','rater2'])
    for scenario_id in scenario_ids:
        words = data1[scenario_id]['words']
        codes1 = ['o' if len(c) == 1 else c[2:] for c in data1[scenario_id]['codes']]
        codes2 = ['o' if len(c) == 1 else c[2:] for c in data2[scenario_id]['codes']]
        for i in range(len(words)):
            writer.writerow([words[i], codes1[i], codes2[i]])

Cohen's Kappa, All Codes: 0.4897


In [11]:
# count code distributions
possible_codes = sorted(list(set(all_codes1).union(set(all_codes2))))
tally = {'r1': {c:0 for c in possible_codes},
        'r2': {c:0 for c in possible_codes}}
for c in all_codes1:
    tally['r1'][c] += 1
for c in all_codes2:
    tally['r2'][c] += 1
print('\tRater1\tRater2')
for c in possible_codes:
    print('%s\t%s\t%s' % (c, tally['r1'][c], tally['r2'][c]))

	Rater1	Rater2
b-a	81	32
b-g	24	25
b-i	141	100
b-u	45	31
i-i	320	287
o	1180	1316


In [16]:
# use simplified codes
flow_only1 = ['o' if len(c) == 1 else c[2:] for d in data1.values() for c in d['codes']]
flow_only1 = [c if c != 'i' else 'o' for c in all_codes1]

flow_only2 = ['o' if len(c) == 1 else c[2:] for d in data2.values() for c in d['codes']]
flow_only2 = [c if c != 'i' else 'o' for c in all_codes2]

kappa = cohen_kappa_score(flow_only1, flow_only2)
print('Cohen\'s Kappa, Flow, Only: %0.4f' % kappa)

Cohen's Kappa, Flow, Only: 0.4897


In [17]:
# index information types into tuples: i, j, score, phrase                      
def index_infotype(data):
    info = []
    phrase = []
    j = -1
    for i, (word, code) in enumerate(zip(data['words'], data['codes'])):
        if code == 'b-i':
            phrase = [word]
            j = i
        elif code == 'i-i':
            phrase.append(word)
        elif code == 'o' and j >= 0:
            info.append((j, j+len(phrase), data['scores'][len(info)], ' '.join(phrase)))
            phrase = []
            j = -1
    return info

# identify risk scores for overlapping information types
def overlaps(i1, j1, i2, j2):
    return len(set(range(i1, j1)).intersection(set(range(i2, j2)))) > 0

def find_overlaps(info1, info2):
    overlap = []
    for i1, j1, score1, phrase1 in info1:
        for i2, j2, score2, phrase2 in info2:
            if overlaps(i1, j1, i2, j2):
                overlap.append([(score1, phrase1), (score2, phrase2)])
    return overlap

agreed = 0
disagreed = 0
for scenario_id in data1.keys():
    info1 = index_infotype(data1[scenario_id])
    info2 = index_infotype(data2[scenario_id])
    overlap = find_overlaps(info1, info2)

    for i, ((s1, p1), (s2, p2)) in enumerate(overlap):
        print('\n%s, match %i: score %i, %s' % (scenario_id, i, int(s1), p1))
        print('%s, match %i: score %i, %s' % (scenario_id, i, int(s2), p2))
        
    agreed += len(overlap)
    disagreed += len(info1) - len(overlap) + len(info2) - len(overlap)

print('\nAgreed: %i' % agreed)
print('Disagreed: %i' % disagreed)

scores1 = [int(s) for d in data1.values() for s in d['scores']]
scores2 = [int(s) for d in data2.values() for s in d['scores']]  
print('\nScore average for Rater 1: %0.4f' % (sum(scores1) / len(scores1)))
print('Score average for Rater 2: %0.4f' % (sum(scores2) / len(scores2)))


MAS_0036, match 0: score 5, daily rain or general precipitation forecast/total
MAS_0036, match 0: score 6, the daily rain

MAS_0036, match 1: score 5, daily rain or general precipitation forecast/total
MAS_0036, match 1: score 6, general precipitation forecast/total

MAS_0036, match 2: score 5, forecast,
MAS_0036, match 2: score 6, forecast,

MAS_0036, match 3: score 5, weekly rain amount forecast
MAS_0036, match 3: score 6, the weekly rain amount forecast

MAS_0036, match 4: score 5, daily and/or hourly high for rain or precip
MAS_0036, match 4: score 6, daily and/or hourly high for rain or precip

MAS_0036, match 5: score 5, large amount of rain in the upcoming forecast for that day or week,
MAS_0036, match 5: score 6, particularly large amount of rain

MAS_0036, match 6: score 5, upcoming storms,
MAS_0036, match 6: score 6, upcoming storms, especially things like sudden rain showers or thunderstorms.

MAS_0036, match 7: score 5, sudden rain showers or thunderstorms.
MAS_0036, match