In [1]:
import numpy as np
import scipy as sp
import re
import pandas as pd
from collections import Counter
import sklearn 
from sklearn.metrics import classification_report 
# pip install word2number
from word2number import w2n
from sklearn.feature_extraction.text import CountVectorizer

import matplotlib.pyplot as plt
%matplotlib inline

# pip install pandas_confusion
import pandas_confusion

from statistics import mode

In [2]:
crowd_extractions = pd.read_csv("crowd-extractions.csv")
# the -main-task.csv implies it excludes the questions used
# for testing/validation
gold_extractions = pd.read_csv("yalin-gold-main-task.csv", encoding='latin1')

In [3]:
def my_accuracy_score(y_true, y_pred):
    length = len(y_true)
    count = .0
    for a,b in zip(y_true, y_pred):
        if a == b:
            count += 1
    return count/length

In [4]:
# Skip the NAcolumns(contain more than 90% NA) in crowd_extractions and gold_extractions
def Nan_columns(extraction):
    sample_size =  extraction.shape[0]
    extractions_NANcolumns = []
    for value in extraction.columns.values:
        nan_num = extraction[value].isnull().sum()
        if nan_num >= sample_size*0.9: 
            #print "feature %s all Nan"%value
            extractions_NANcolumns.append(value)
    return extractions_NANcolumns
crowd_extractions_NANcolumns = Nan_columns(crowd_extractions)
gold_extractions_NANcolumns = Nan_columns(gold_extractions)
gold_extractions_NANcolumns = [s.encode('utf-8') for s in gold_extractions_NANcolumns]
Nan_columns = list(set(crowd_extractions_NANcolumns+gold_extractions_NANcolumns))

common_columns = [x for x in crowd_extractions.columns.values if (x in gold_extractions.columns.values) and (x not in Nan_columns)]
# crowd_gold_columns map the columns from crowd_extractions to gold_extractions
# 'arm_num' equals to 'arm_num_gold'
# We also remove field: 'pmid', 'abstract'
crowd_gold_columns = {key:key for key in common_columns}
crowd_gold_columns['arm_num'] = 'arm_num_gold'
crowd_gold_columns.pop('abstract', None)
crowd_gold_columns.pop('pmid', None)
print crowd_gold_columns.keys()


['group_1_sample_size', 'group_1_intervention_name', 'shared_schedule', 'shared_intervention_type', 'shared_route_yn', 'share_duration', 'shared_duration_yn', 'group_1_dose', 'shared_sample_size', 'shared_schedule_yn', 'group_1_schedule', 'group_2_intervention_name', 'shared_intervention_name_yn', 'shared_route', 'shared_intervention_type_yn', 'group_2_sample_size', 'shared_sample_size_yn', 'arm_num', 'group_3_intervention_name', 'shared_intervention_name', 'group_2_dose']


In [5]:
all_pmids = [int(pmid) for pmid in gold_extractions["pmid"].values]

In [6]:
# find most common element in a list
# function 'statistics.mode' will raise error when there are more than one most common element
def most_common(l):
    return Counter(l).most_common(1)[0][0]

In [7]:
# Convert string containing number to integer, e.g., "24 year" -> 24.0, "above_eight"->8.0 
def Num_to_int(s):
    try:
        if np.isnan(s):
            return -1.0
    except TypeError:
        pass
    if isinstance(s, float) or isinstance(s, int):
        return s
    else:
        s = s.encode('utf-8')
    num_d = w2n.american_number_system
    s = re.sub('[^0-9a-zA-Z.]+', ' ', s).strip()
    l_num = re.findall(r"[-+]?\d*\.\d+|\d+",s)
    if len(l_num) >= 1:
        return float(l_num[0])
    l_word = s.split(' ')
    for word in l_word:
        if word not in num_d:
            l_word.remove(word)
    s = ' '.join(l_word)
    s = w2n.word_to_num(s)
    if type(s) == type(1):
        return float(s)
    return -1.0

In [8]:
print Num_to_int('above_eight')
print Num_to_int('???')
print Num_to_int('one?')
print Num_to_int('more than 8')
print Num_to_int('smaller than 8')
print Num_to_int('8?')


8.0
-1.0
1.0
8.0
8.0
8.0


In [9]:
# Convert string containing time duration to integer, e.g., "24 year" -> 24.0, "above_eight"->8.0 
def Duration_to_int(s):
    number = -1
    unit = -1
    try:
        if np.isnan(s):
            return -1.0
    except TypeError:
        pass
    if isinstance(s, float) or isinstance(s, int):
        return s
    else:
        s = s.encode('utf-8')
    unit_d = {'year':365, 'month':30, 'week':7, 'day':1, 'hour': 1.0/24, 
              'minute': 1.0/(24*60), 'second': 1.0/(24*60*60),'years':365, 
              'months':30, 'weeks':7, 'days':1, 'hours': 1.0/24, 
              'minutes': 1.0/(24*60), 'seconds': 1.0/(24*60*60)}
    num_d = w2n.american_number_system
    
    s = re.sub('[^0-9a-zA-Z.]+', ' ', s).strip()
    l_num = re.findall(r"[-+]?\d*\.\d+|\d+",s)
    if len(l_num) >= 1:
        number = float(l_num[0])
    if number == -1:
        l_word = s.split(' ')
        for word in l_word:
            if word not in num_d:
                l_word.remove(word)
        ns = ' '.join(l_word)
        ns = w2n.word_to_num(ns)
        if type(ns) == type(1):
            number = float(ns)

    l_word = s.split(' ')
    for word in l_word:
        if word in unit_d:
            unit = unit_d[word]
            break
            
    if (number != (-1))&(unit != (-1)):
        return float(number*unit)
    elif(number == (-1))&(unit != (-1)):
        return float(unit)
    elif(number != (-1))&(unit == (-1)):
        return float(number)
    else:
        return -1

In [10]:
print Duration_to_int('a week')
print Duration_to_int('3-hour')
print Duration_to_int('26 weeks')
print Duration_to_int('6 hours')
print Duration_to_int('one month')
print Duration_to_int('5 days')
print Duration_to_int('90 days')
print Duration_to_int('five weeks')
print Duration_to_int('two periods of 14 days')
print Duration_to_int('two periods')
print Duration_to_int('14 days')

7.0
0.125
182.0
0.25
30.0
5.0
90.0
35.0
14.0
2.0
14.0


In [11]:
# Convert string containing medicine dose to integer 
# e.g., "approximately 4,500,000 allergy units" -> [4500000.0] "15 mg/L"-> [15.0], '400 mg, 20 g'->[400,20]
def Dose_to_int(s):
    try:
        if np.isnan(s):
            return [-1.0]
    except TypeError:
        pass
    if isinstance(s, float) or isinstance(s, int):
        return [s]
    else:
        try:
            s = s.decode('ascii',errors='ignore')
        except UnicodeEncodeError:
            s = re.sub(r'[^\x00-\x7f]',r'', s)
    Dose_list = []
    sentence_list = s.split(', ')
    for sentence in sentence_list:
        number = Num_to_int(sentence)
        if number != -1.0:
            Dose_list.append(number)
    if len(Dose_list) > 0:
        return Dose_list
    return [-1.0]

In [12]:
for s in gold_extractions["group_1_dose"].unique().tolist():
    print s, "->", Dose_to_int(s)
    print

nan -> [-1.0]

400 mg, 20 g -> [400.0, 20.0]

0.04 mg -> [0.04]

3 mg -> [3.0]

6 h -> [6.0]

75 mg -> [75.0]

140 mg, 5 mg -> [140.0, 5.0]

3.2 g, 2.2 g -> [3.2, 2.2]

25 mg -> [25.0]

70 beats/min -> [70.0]

0 mg -> [0.0]

50 mg -> [50.0]

approximately 4,500,000 allergy units -> [4.0]

15 mg/L -> [15.0]

100 mg -> [100.0]

1000 micrograms -> [1000.0]

50 micrograms -> [50.0]

40 mg -> [40.0]

60 mg -> [60.0]

doses were titrated at 12-week intervals to a maximum of 80 mg -> [12.0]

fluticasone propionate (FP) (44 microg)/salmeterol (21 microg) -> [44.0]

800 microg -> [800.0]

2 mg -> [2.0]

20 Hz for 4 mins -> [20.0]

10 exercises, two to three sets/exercise -> [10.0, 5.0]

670/0.75 -> [670.0]

0.3 mg -> [0.3]

20 mL  -> [20.0]

0.5 Õå¼g/kg -> [0.5]



In [13]:
# Convert string containing group_intervention_name to word list 
# e.g., "(HRC) high caries risk children receiving oral health education (OHE)" -> 
# ['receiving', 'risk', 'ohe', 'high', 'health', 'caries', 'education', 'oral', 'children', 'hrc']

def Inter_to_words(s):
    try:
        if np.isnan(s):
            return ['']
    except TypeError:
        pass
    if isinstance(s, float) or isinstance(s, int):
        return [str(s)]
    else:
        try:
            s = s.decode('ascii',errors='ignore')
        except UnicodeEncodeError:
            s = re.sub(r'[^\x00-\x7f]',r'', s)
    s = s.encode('utf-8')
    s = [s]
    vectorizer = CountVectorizer(stop_words='english')
    try:
        vectorizer.fit(s)
        vocabulary = vectorizer.vocabulary_.keys()
    except ValueError:
        #print "maybe empty sentence %s"%s
        return ['']
    return [key.encode('utf-8') for key in vocabulary]



In [14]:
print Inter_to_words('(HRC) high caries risk children receiving oral health education (OHE)')

['receiving', 'risk', 'ohe', 'high', 'health', 'caries', 'education', 'oral', 'children', 'hrc']


In [15]:
# Aggregate the dose field of crowd extraction to one major result
# e.g., [[60.0, 120.0, 240.0], [60.0]] -> [60.0, 120.0, 240.0], [[2.0],[2.0]] -> [2.0]
def aggregate_crowd_dose(l):
    if len(l) < 1:
        return [-1]
    if len(l) == 1:
        return l[0]
    max_len_of_element = max([len(element) for element in l])
    aggre_l = []
    for i in range(max_len_of_element):
        temp = []
        for element in l:
            if len(element) >= i+1 :
                temp.append(element[i])
        temp = most_common(temp)
        aggre_l.append(temp)
    return aggre_l

In [16]:
# Compare the bag-of-wrods of two intervention names 
# e.g., s1 = ['control', 'groups', 'hrc'], s2= ['caries', 'education', 'oral', 'children', 'hrc']
# unigram_compare(s1, s2) -> 0.2
def unigram_compare(l1, l2):
    d1 = {key:1 for key in l1}
    length = float(len(l2))
    count = 0
    for word in l2:
        if word in d1:
            count += 1

    return count/length

In [17]:
# view the content of crowd extractions
for key in crowd_extractions.columns.values:
    if key in crowd_gold_columns:
        print key,",", crowd_gold_columns[key]
        print crowd_extractions[key].unique().tolist()
        print gold_extractions[crowd_gold_columns[key]].unique().tolist()
        print

arm_num , arm_num_gold
['two', 'four', 'three', 'seven', 'above_eight', 'six']
[u'two ', u'two', u'four', u'???', u'three', u'seven', u'more than 8', u'six', u'one?']

group_1_dose , group_1_dose
[nan, 'n/a', 'vancomycin 450 mg/day, tobramycin 450 mg/day and colistin 4.5.10(6) units daily', 'ofloxacin 400 mg and amoxicillin 20 g', '0.5 mg', '70 mg/m2 intravenously', '70 mg/m2', '75 mg/die', '10 mg', '40 micrograms initially and 5 micrograms/h thereafter + placebo 0.5 ml', '140 micrograms initially and 5 micrograms/h thereafter + placebo 0.5 ml i.v.', '5mg/kg', '3.2 g of eicosapentaenoic acid and 2.2 g of docosahexaenoic acid', '3.2 g and 2.2 g', '25 mg', '0 mg', '2.6', '0, 2.5, 10, or 40 mg', '50 mg/kg', '4,500,000 allergy units', '15 mg/l', '15 mg/L', '100 mg', '800 mg', '800 micrograms', '50 micrograms', '40mg', '60 mg/d, 120 mg/d, 240 mg/d', '60 mg', '80mg', '400 microg', 'Unknown', '21 microg)', '44microg', '800 microg/day', '800microg', '2 mg', '4 minutes duration', '670/0.75;', '

In [18]:
# calculate the accuracy score of numerical fields
def confusion_crowd_gold_1(crowd_extractions, gold_extractions, crowd_field, gold_field):
    crowd_num_arms, reference_num_arms = {}, {}
    for pmid in all_pmids:
        if pmid not in crowd_extractions.pmid.unique().tolist():
            continue
        cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid][gold_field].values[0]
        try:
            if np.isnan(cur_ref_val):
                continue
        except TypeError:
            pass
        #print("on PMID: %s" % pmid)
        pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid][crowd_field].values
        crowd_responses = []
        for n_arms in pmid_crowd_num_arms:
            #w2n_number = w2n.word_to_num(n_arms)
            w2n_number = Num_to_int(n_arms)
            crowd_responses.append(w2n_number)
        if len(crowd_responses)>0:
            try:
                crowd_num_arms[pmid] = mode(crowd_responses)
            except:
                crowd_num_arms[pmid] = most_common(crowd_responses)

        #w2n_number = w2n.word_to_num(cur_ref_val)
        w2n_number = Num_to_int(cur_ref_val)
        reference_num_arms[pmid] = w2n_number
    y_gold, y_crowd = [], []
    for pmid in all_pmids:
        if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                    crowd_num_arms.keys()]]):
            y_gold.append(reference_num_arms[pmid])
            y_crowd.append(crowd_num_arms[pmid])

    print "crowd column is %s, gold column is %s "%(crowd_field, gold_field)
    print "gold extraction"
    print y_gold
    print 
    print "crowd extraction"
    print y_crowd
    print
    accuracy = my_accuracy_score(y_gold, y_crowd)
    print "accuracy is %s"%accuracy
    #confusion_matrix = classification_report(y, y_hat)
    #print confusion_matrix
    return accuracy

In [19]:
# calculate the accuracy score of frequency and time duration fields
def confusion_crowd_gold_2(crowd_extractions, gold_extractions, crowd_field, gold_field):
    crowd_num_arms, reference_num_arms = {}, {}
    for pmid in all_pmids:
        if pmid not in crowd_extractions.pmid.unique().tolist():
            continue
        #print("on PMID: %s" % pmid)
        cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid][gold_field].values[0]
        try:
            if np.isnan(cur_ref_val):
                continue
        except TypeError:
            pass
        
        pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid][crowd_field].values
        crowd_responses = []
        for n_arms in pmid_crowd_num_arms:
            if isinstance(n_arms, float) or isinstance(n_arms, int):
                w2n_number = Duration_to_int(n_arms)
            else:
                w2n_number = Duration_to_int(n_arms)
                if w2n_number != -1:    
                    w2n_number = 1
                    n_arms = n_arms.encode('utf-8')
                    n_arms_list = n_arms.split(' of ')
                    for sentence_part in n_arms_list:
                        temp_score = Duration_to_int(sentence_part)
                        if temp_score != -1:
                            w2n_number = w2n_number * temp_score
            crowd_responses.append(w2n_number)
        if len(crowd_responses)>0:
            try:
                crowd_num_arms[pmid] = mode(crowd_responses)
            except:
                crowd_num_arms[pmid] = most_common(crowd_responses)

        #w2n_number = w2n.word_to_num(cur_ref_val)
        if isinstance(cur_ref_val, float) or isinstance(cur_ref_val, int):
            w2n_number = Duration_to_int(cur_ref_val)
        else:
            w2n_number = Duration_to_int(cur_ref_val)
            if w2n_number != -1:    
                w2n_number = 1
                cur_ref_val = cur_ref_val.encode('utf-8')
                cur_ref_val_list = cur_ref_val.split(' of ')
                for sentence_part in cur_ref_val_list:
                    temp_score = Duration_to_int(sentence_part)
                    if temp_score != -1:
                        w2n_number = w2n_number * temp_score
       
        reference_num_arms[pmid] = w2n_number
    y_gold, y_crowd = [], []
    for pmid in all_pmids:
        if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                    crowd_num_arms.keys()]]):
            y_gold.append(reference_num_arms[pmid])
            y_crowd.append(crowd_num_arms[pmid])

    print "crowd column is %s, gold column is %s "%(crowd_field, gold_field)
    print "gold extraction"
    print y_gold
    print 
    print "crowd extraction"
    print y_crowd
    print
    #accuracy = accuracy_score(y_gold, y_crowd)
    accuracy = my_accuracy_score(y_gold, y_crowd)
    print "accuracy is %s"%accuracy
    #confusion_matrix = classification_report(y, y_hat)
    #print confusion_matrix
    return accuracy

In [20]:
# calculate the accuracy score of group_dose fields
def confusion_crowd_gold_3(crowd_extractions, gold_extractions, crowd_field, gold_field):
    crowd_num_arms, reference_num_arms = {}, {}
    for pmid in all_pmids:
        if pmid not in crowd_extractions.pmid.unique().tolist():
            continue
        #print("on PMID: %s" % pmid)
        cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid][gold_field].values[0]
        try:
            if np.isnan(cur_ref_val):
                continue
        except TypeError:
            pass
        
        pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid][crowd_field].values
        crowd_responses = []
        for n_arms in pmid_crowd_num_arms:
            w2n_number = Dose_to_int(n_arms)
            if w2n_number != [-1]:    
                crowd_responses.append(w2n_number)
        #print pmid, " : ", crowd_responses
        crowd_num_arms[pmid] = aggregate_crowd_dose(crowd_responses)

        w2n_number = Dose_to_int(cur_ref_val)
        reference_num_arms[pmid] = w2n_number
    y_gold, y_crowd = [], []
    for pmid in all_pmids:
        if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                    crowd_num_arms.keys()]]):
            y_gold.append(reference_num_arms[pmid])
            y_crowd.append(crowd_num_arms[pmid])

    print "crowd column is %s, gold column is %s "%(crowd_field, gold_field)
    print "gold extraction"
    print y_gold
    print 
    print "crowd extraction"
    print y_crowd
    print
    #accuracy = accuracy_score(y_gold, y_crowd)
    accuracy = my_accuracy_score(y_gold, y_crowd)
    print "accuracy is %s"%accuracy
    #confusion_matrix = classification_report(y, y_hat)
    #print confusion_matrix
    return accuracy

In [21]:
# calculate the accuracy score of group_intervention_name fields
def confusion_crowd_gold_4(crowd_extractions, gold_extractions, crowd_field, gold_field):
    crowd_num_arms, reference_num_arms = {}, {}
    for pmid in all_pmids:
        if pmid not in crowd_extractions.pmid.unique().tolist():
            continue
            
        cur_ref_val = gold_extractions[gold_extractions["pmid"] == pmid][gold_field].values[0]
        try:
            if np.isnan(cur_ref_val):
                continue
        except TypeError:
            pass
        
        pmid_crowd_num_arms  = crowd_extractions[crowd_extractions["pmid"] == pmid][crowd_field].values
        crowd_responses = []
        for n_arms in pmid_crowd_num_arms:
            w2n_number = Inter_to_words(n_arms)
            crowd_responses += w2n_number
        #print pmid, " : ", crowd_responses
        crowd_num_arms[pmid] = list(set(crowd_responses))

        w2n_number = Inter_to_words(cur_ref_val)
        reference_num_arms[pmid] = w2n_number
    y_score = []
    print "field names are:%s and %s"%(crowd_field, gold_field)
    for pmid in all_pmids:
        if all([pmid in key_set for key_set in [reference_num_arms.keys(), 
                                                    crowd_num_arms.keys()]]):
            y_gold = reference_num_arms[pmid]
            y_crowd = crowd_num_arms[pmid]
            y_score.append(unigram_compare(y_crowd, y_gold)) 
            print "pmid", pmid, " : "
            print "gold extraction"
            print y_gold 
            print "crowd extraction"
            print y_crowd
    #accuracy = accuracy_score(y_gold, y_crowd)
    accuracy = np.mean(y_score)
    print "accuracy is %s"%accuracy
    #confusion_matrix = classification_report(y, y_hat)
    #print confusion_matrix
    return accuracy

In [22]:
# calculate the accuracy score of numerical fields 
confusion_crowd_gold_1(crowd_extractions, gold_extractions, 'arm_num', 'arm_num_gold')
print "---"*10
confusion_crowd_gold_1(crowd_extractions, gold_extractions, 'shared_sample_size', 'shared_sample_size')
print "---"*10
confusion_crowd_gold_1(crowd_extractions, gold_extractions, 'group_1_sample_size', 'group_1_sample_size')
print "---"*10
confusion_crowd_gold_1(crowd_extractions, gold_extractions, 'group_2_sample_size', 'group_2_sample_size')
print "---"*10
print 

crowd column is arm_num, gold column is arm_num_gold 
gold extraction
[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, -1.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 7.0, 4.0, 3.0, -1.0, 2.0, 8.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 2.0, 6.0, 4.0, 2.0, 2.0, 1.0, 4.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, -1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 6.0]

crowd extraction
[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 7.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 6.0]

accuracy is 0.777777777778
------------------------------
crowd column is shared_sample_size, gold column is shared_sample_size 
gold extraction
[60.0, 96.0, 55.0, 298.0, 70.0, 40.0, 28.

In [23]:
# calculate the accuracy score of time duration fields 
confusion_crowd_gold_2(crowd_extractions, gold_extractions, 'share_duration', 'share_duration')
print "---"*10

crowd column is share_duration, gold column is share_duration 
gold extraction
[7.0, 0.125, 182.0, 0.25, 35.0, 84.0, 120.0, 70.0, 30.0, 360.0, 84.0, 5.0, 105.0, 120.0, 168.0, 28.0, 84.0, 14.0, 360.0, 1825.0, 140.0, 378.0, 42.0, 1740.0, 84.0, 84.0, 360.0, 168.0, 42.0, 42.0, 210.0, 90.0, 360.0, 1825.0, 720.0]

crowd extraction
[-1, 1.0, 392.0, -1, 35.0, 84.0, 120.0, 70.0, 30.0, 360.0, 84.0, 5.0, 105.0, 56.0, 168.0, 14.0, 84.0, 14.0, 360.0, 1825.0, 140.0, 378.0, 42.0, -1, 84.0, 84.0, 360.0, 168.0, 42.0, 42.0, 168.0, 90.0, 360.0, 1825.0, 720.0]

accuracy is 0.771428571429
------------------------------


In [24]:
# calculate the accuracy score of group dose fields 
confusion_crowd_gold_3(crowd_extractions, gold_extractions, 'group_1_dose', 'group_1_dose')
print
confusion_crowd_gold_3(crowd_extractions, gold_extractions, 'group_2_dose', 'group_2_dose')

crowd column is group_1_dose, gold column is group_1_dose 
gold extraction
[[400.0, 20.0], [0.04], [3.0], [6.0], [75.0], [140.0, 5.0], [3.2, 2.2], [25.0], [70.0], [0.0], [50.0], [4.0], [15.0], [100.0], [1000.0], [50.0], [40.0], [60.0], [12.0], [44.0], [800.0], [2.0], [20.0], [10.0, 5.0], [670.0], [0.3], [20.0], [0.5]]

crowd extraction
[[400.0, 450.0], [0.5], [70.0], [-1], [75.0], [40.0], [3.2], [25.0], [-1], [0.0], [50.0], [4.0], [15.0], [100.0], [800.0], [50.0], [40.0], [60.0, 120.0, 240.0], [80.0], [44.0], [800.0], [2.0], [4.0], [-1], [670.0], [0.3], [2.0], [0.5]]

accuracy is 0.535714285714

crowd column is group_2_dose, gold column is group_2_dose 
gold extraction
[[450.0, 450.0, 4.5], [4.0], [20.0], [0.5], [3.8], [150.0], [2.6], [60.0], [3.0], [200.0], [100.0], [120.0], [12.0], [1000.0], [10.0, 5.0], [130.0], [1.0]]

crowd extraction
[[400.0], [-1], [20.0], [0.5], [3.8], [-1], [2.6], [60.0], [3.0], [200.0], [100.0], [120.0, 2.0], [80.0], [1000.0], [-1], [130.0], [1.0]]

accuracy 

0.6470588235294118

In [25]:
# calculate the accuracy score of intervention names, shared route and shared schedule
confusion_crowd_gold_4(crowd_extractions, gold_extractions, "group_1_intervention_name", "group_1_intervention_name")

field names are:group_1_intervention_name and group_1_intervention_name
pmid 347992  : 
gold extraction
['dipropionate', 'beclomethasone']
crowd extraction
['rhinitis', 'perennial', 'extrinsic', 'dipropionate', 'aerosol', 'pediatric', 'intranasal', 'beclomethasone']
pmid 766905  : 
gold extraction
['bcg', 'immunotherapy']
crowd extraction
['immunostimulation', 'bcg', 'immunotherapy']
pmid 807952  : 
gold extraction
['methotrexate']
crowd extraction
['methotrexate', 'radical', 'followed', 'irradiation']
pmid 1683365  : 
gold extraction
['positive', 'cpap', 'continuous', 'mask', 'face', 'pressure', 'therapy', 'airway', 'using']
crowd extraction
['positive', 'cpap', 'continuous', 'pressure', 'therapy', 'airway', 'serial']
pmid 1853859  : 
gold extraction
['dietary', 'intervention']
crowd extraction
['impact', 'nutrition', 'netherlands', 'density', 'healthy', 'guidelines', 'diet', 'high', 'dietary', 'lipoprotein', 'council', 'cholesterol', 'total', 'intervention']
pmid 2049963  : 
gold ext

0.76526790203260786

In [26]:
confusion_crowd_gold_4(crowd_extractions, gold_extractions, "group_2_intervention_name", "group_2_intervention_name")

field names are:group_2_intervention_name and group_2_intervention_name
pmid 347992  : 
gold extraction
['placebo']
crowd extraction
['control', 'dipropionate', 'perennial', '', 'extrinsic', 'rhinitis', 'pediatric', 'beclomethasone']
pmid 766905  : 
gold extraction
['control']
crowd extraction
['control', 'immunostimulation', 'bcg', '']
pmid 807952  : 
gold extraction
['control']
crowd extraction
['methotrexate', 'therapy', 'radiation']
pmid 1683365  : 
gold extraction
['control', 'oxygen', 'mask', 'cpap', 'flow', 'face', 'high', 'therapy']
crowd extraction
['control', 'oxygen', 'positive', 'cpap', 'continuous', 'mask', 'flow', 'face', 'high', 'pressure', 'therapy', 'airway']
pmid 1853859  : 
gold extraction
['control']
crowd extraction
['control', 'impact', 'netherlands', 'density', 'healthy', 'guidelines', 'diet', 'high', 'dietary', 'lipoprotein', 'council', 'cholesterol', 'total', 'nutrition', 'intervention']
pmid 2049963  : 
gold extraction
['vtc', 'colistin', 'vancomycin', 'nonabs

0.72890947436401976

In [27]:
confusion_crowd_gold_4(crowd_extractions, gold_extractions, "shared_route", "shared_route")

field names are:shared_route and shared_route
pmid 2201431  : 
gold extraction
['intravenously']
crowd extraction
['', 'intravenous', 'intravenously']
pmid 3051839  : 
gold extraction
['transdermal', 'intravenous']
crowd extraction
['', 'iv']
pmid 7942261  : 
gold extraction
['intrauterine']
crowd extraction
['', 'na', 'intruterine']
pmid 8047805  : 
gold extraction
['oral']
crowd extraction
['', 'oral']
pmid 8964276  : 
gold extraction
['oral']
crowd extraction
['', 'na', 'oral']
pmid 8977598  : 
gold extraction
['inhaled']
crowd extraction
['', 'inhaled', 'inhalation']
pmid 9109702  : 
gold extraction
['inhaled']
crowd extraction
['', 'inhalation', 'inhaled', 'actuated', 'breath', 'device']
pmid 9549022  : 
gold extraction
['iontophoresis']
crowd extraction
['']
pmid 11166751  : 
gold extraction
['paracervical', 'blocks', 'placement']
crowd extraction
['saline', 'na', 'intravenous']
pmid 15641629  : 
gold extraction
['inhalation']
crowd extraction
['inhaled', 'inhalations', 'inhalati

0.40000000000000002

In [28]:
confusion_crowd_gold_4(crowd_extractions, gold_extractions, "group_1_schedule", "group_1_schedule")

field names are:group_1_schedule and group_1_schedule
pmid 2144115  : 
gold extraction
['15', 'later', 'minutes']
crowd extraction
['']
pmid 2201431  : 
gold extraction
['m2']
crowd extraction
['']
pmid 8977598  : 
gold extraction
['daily', 'times']
crowd extraction
['', 'twice', 'day']
pmid 15977465  : 
gold extraction
['week']
crowd extraction
['', 'week', 'vibration', 'alendronate', 'daily', '20hz', '5mg']
pmid 16163369  : 
gold extraction
['dose']
crowd extraction
['']
pmid 20547822  : 
gold extraction
['twice']
crowd extraction
['']
pmid 21419770  : 
gold extraction
['week']
crowd extraction
['', 'week']
pmid 21698991  : 
gold extraction
['months']
crowd extraction
['']
accuracy is 0.25


0.25