In [1]:
import numpy as np
import scipy as sp

import pandas as pd
from collections import Counter
import sklearn 
from sklearn.metrics import classification_report 

# pip install word2number
from word2number import w2n

import matplotlib.pyplot as plt
%matplotlib inline

# pip install pandas_confusion
import pandas_confusion

from statistics import mode

In [2]:
crowd_extractions = pd.read_csv("crowd-extractions.csv")
# the -main-task.csv implies it excludes the questions used
# for testing/validation
gold_extractions = pd.read_csv("yalin-gold-main-task.csv", encoding='latin1')

In [3]:
crowd_extractions.head()

Unnamed: 0,pmid,id,arm_num,difficulty,feedback,group_1_dose,group_1_duration,group_1_intervention_name,group_1_intervention_type,group_1_route,...,shared_route,shared_route_yn,shared_sample_size,shared_sample_size_yn,shared_schedule,shared_schedule_yn,orig__golden,abstract,pmid.1,title
0,347992,1824164382,two,4.0,,,,Beclomethasone dipropionate,Pharmacological,,...,na,True,30.0,True,na,True,,"A double-blind, crossover trial of beclomethas...",347992,Beclomethasone dipropionate in pediatric peren...
1,347992,1824312940,two,,,,,Beclomethasone dipropionate in pediatric peren...,Not Sure,,...,,,30.0,True,,,,"A double-blind, crossover trial of beclomethas...",347992,Beclomethasone dipropionate in pediatric peren...
2,347992,1829141300,two,,,,,beclomethasone dipropionate intranasal aerosol,Pharmacological,intranasal,...,,,30.0,True,,True,,"A double-blind, crossover trial of beclomethas...",347992,Beclomethasone dipropionate in pediatric peren...
3,766905,1824222508,two,4.0,,,,BCG,Pharmacological,,...,na,True,,,na,True,,A controlled randomised trial was carried out ...,766905,Failure of BCG immunostimulation to affect the...
4,766905,1825026771,two,4.0,,,,BCG immunostimulation,Not Sure,,...,,True,40.0,True,,True,,A controlled randomised trial was carried out ...,766905,Failure of BCG immunostimulation to affect the...


In [4]:
gold_extractions.head()

Unnamed: 0,pmid,abstract,arm_num_gold,share_duration,shared_duration_yn,shared_dose,shared_dose_yn,shared_intervention_name,shared_intervention_name_yn,shared_intervention_type,...,group_7_schedule,group_8_dose,group_8_duration,group_8_intervention_name,group_8_intervention_type,group_8_route,group_8_sample_size,group_8_schedule,over_eight_groups,title
0,347992,"A double-blind, crossover trial of beclomethas...",two,a week,True,,,,,Pharmacological,...,,,,,,,,,,
1,766905,A controlled randomised trial was carried out ...,two,,,,,,,Pharmacological,...,,,,,,,,,,
2,807952,Ninety-six patients with advanced squamous cel...,two,,,,,radical irradiation,True,,...,,,,,,,,,,
3,1683365,Although continuous positive airway pressure (...,two,3-hour,True,,,,,Medical Devices,...,,,,,,,,,,
4,1853859,To study the impact of dietary intervention on...,two,26 weeks,True,,,,,Behavioral,...,,,,,,,,,,


In [5]:
sample_pmid = 347992
crowd_extractions[crowd_extractions["pmid"] == sample_pmid]["arm_num"]

0    two
1    two
2    two
Name: arm_num, dtype: object

In [6]:
gold_extractions[gold_extractions["pmid"] == sample_pmid]["arm_num_gold"]

0    two 
Name: arm_num_gold, dtype: object

In [7]:
all_pmids = [int(pmid) for pmid in gold_extractions["pmid"].values]

In [8]:
pmid= 8018001
pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == int(pmid)]["arm_num"].values
pmid_crowd_num_arms

array(['two', 'two', 'above_eight'], dtype=object)

In [9]:
gold_extractions[gold_extractions["pmid"] == 2569600]["arm_num_gold"]

10    four
Name: arm_num_gold, dtype: object

In [10]:
# Word2number package update a new error handling for invalid input 
# and it throws a string "Error: Please enter a valid number word" instead of the Exception
# I fix the bugs from this reason
crowd_num_arms, reference_num_arms = {}, {}
for pmid in all_pmids:
    #print("on PMID: %s" % pmid)
    pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid]["arm_num"].values
    crowd_responses = []
    for n_arms in pmid_crowd_num_arms:
        w2n_number = w2n.word_to_num(n_arms)
        if type(w2n_number) != type(1):
            print("failed on %s for crowd! arm responses: %s" % (pmid, n_arms))
            print 
        else:
            crowd_responses.append(w2n.word_to_num(n_arms)) #w2n.word_to_num(n_arms) for n_arms in pmid_crowd_num_arms
    crowd_num_arms[pmid] = mode(crowd_responses)
    
    cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid]["arm_num_gold"].values[0]
    w2n_number = w2n.word_to_num(cur_ref_val)
    if type(w2n_number) != type(1):
        print("failed on %s for expert! ref val: %s" % (pmid, cur_ref_val))
        print
    else:
        reference_num_arms[pmid] = w2n.word_to_num(cur_ref_val)

        # w2n.word_to_num(gold_extractions[gold_extractions["pmid"] == int(pmid)]["arm_num_gold"])
    #print()

failed on 2619027 for expert! ref val: ???

failed on 7853047 for expert! ref val: ???

failed on 8018001 for crowd! arm responses: above_eight

failed on 8018001 for expert! ref val: more than 8

failed on 9549451 for crowd! arm responses: above_eight

failed on 12069289 for expert! ref val: one?

failed on 17855465 for expert! ref val: one?

failed on 18851769 for expert! ref val: ???



In [11]:
y, y_hat = [], []
for pmid in all_pmids:
    if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                crowd_num_arms.keys()]]):
        y.append(reference_num_arms[pmid])
        y_hat.append(crowd_num_arms[pmid])
print y, y_hat    

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 3, 2, 2, 2, 3, 2, 2, 7, 4, 3, 2, 2, 2, 3, 2, 3, 2, 4, 2, 2, 2, 4, 4, 4, 2, 6, 4, 2, 2, 4, 2, 4, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 6] [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 7, 4, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 4, 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 6]


In [12]:
print(classification_report(y, y_hat))

             precision    recall  f1-score   support

          2       0.84      1.00      0.91        46
          3       0.86      0.75      0.80         8
          4       1.00      0.22      0.36         9
          6       1.00      0.50      0.67         2
          7       1.00      1.00      1.00         1

avg / total       0.87      0.85      0.82        66



In [13]:
print crowd_extractions.difficulty.value_counts(dropna=False)

 4     91
 5     74
NaN    26
 3     19
 2      6
Name: difficulty, dtype: int64


In [14]:
def most_common(l):
    return Counter(l).most_common(1)[0][0]

In [15]:
def confusion_crowd_gold(crowd_extractions, gold_extractions):
    crowd_num_arms, reference_num_arms = {}, {}
    for pmid in all_pmids:
        if pmid not in crowd_extractions.pmid.unique().tolist():
            continue
        #print("on PMID: %s" % pmid)
        pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid]["arm_num"].values
        crowd_responses = []
        for n_arms in pmid_crowd_num_arms:
            w2n_number = w2n.word_to_num(n_arms)
            if type(w2n_number) != type(1):
                print("failed on %s for crowd! arm responses: %s" % (pmid, n_arms))
                print 
            else:
                crowd_responses.append(w2n.word_to_num(n_arms)) #w2n.word_to_num(n_arms) for n_arms in pmid_crowd_num_arms
        if len(crowd_responses)>0:
            crowd_num_arms[pmid] = most_common(crowd_responses)

        cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid]["arm_num_gold"].values[0]
        w2n_number = w2n.word_to_num(cur_ref_val)
        if type(w2n_number) != type(1):
            print("failed on %s for expert! ref val: %s" % (pmid, cur_ref_val))
            print
        else:
            reference_num_arms[pmid] = w2n.word_to_num(cur_ref_val)
    y, y_hat = [], []
    for pmid in all_pmids:
        if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                    crowd_num_arms.keys()]]):
            y.append(reference_num_arms[pmid])
            y_hat.append(crowd_num_arms[pmid])
    print len(y), len(y_hat)
    confusion_matrix = classification_report(y, y_hat)
    print confusion_matrix
    return confusion_matrix

In [16]:
x = confusion_crowd_gold(crowd_extractions, gold_extractions)


failed on 2619027 for expert! ref val: ???

failed on 7853047 for expert! ref val: ???

failed on 8018001 for crowd! arm responses: above_eight

failed on 8018001 for expert! ref val: more than 8

failed on 9549451 for crowd! arm responses: above_eight

failed on 12069289 for expert! ref val: one?

failed on 17855465 for expert! ref val: one?

failed on 18851769 for expert! ref val: ???

66 66
             precision    recall  f1-score   support

          2       0.84      1.00      0.91        46
          3       0.86      0.75      0.80         8
          4       1.00      0.22      0.36         9
          6       1.00      0.50      0.67         2
          7       1.00      1.00      1.00         1

avg / total       0.87      0.85      0.82        66



In [17]:
match_columns = ['difficulty']
df = crowd_extractions[["pmid","arm_num", "difficulty"]].groupby(match_columns)
for index, group in df:
    print "-"*20
    print index
    x = confusion_crowd_gold(group, gold_extractions) 

--------------------
2.0
failed on 8018001 for crowd! arm responses: above_eight

failed on 8018001 for expert! ref val: more than 8

failed on 9549451 for crowd! arm responses: above_eight

4 4
             precision    recall  f1-score   support

          2       1.00      1.00      1.00         4

avg / total       1.00      1.00      1.00         4

--------------------
3.0
failed on 7853047 for expert! ref val: ???

failed on 12069289 for expert! ref val: one?

16 16
             precision    recall  f1-score   support

          2       0.92      1.00      0.96        11
          3       1.00      1.00      1.00         3
          4       1.00      0.50      0.67         2

avg / total       0.94      0.94      0.93        16

--------------------
4.0
failed on 2619027 for expert! ref val: ???

failed on 8018001 for expert! ref val: more than 8

failed on 17855465 for expert! ref val: one?

failed on 18851769 for expert! ref val: ???

65 65
             precision    recall  f1

  'precision', 'predicted', average, warn_for)


In [18]:
gold_extractions.shape

(72, 75)