In [87]:
import pandas as pd
from collections import OrderedDict
from sklearn.metrics import f1_score
import numpy as np

In [88]:
def load_figer_data(p):
    mentions = []
    current_sent_id = 0    
    in_mention = False
    starting_idx = -1
    curr_types = set()
    
    curr_i = 0
    with open(p) as input_file:
        for oline in input_file:
            line = oline.strip()
            if len(line) == 0:
                if in_mention:
                    ment_id = "figer-%d-%d-%d" % (current_sent_id, starting_idx, curr_i)
                    mentions.append((ment_id, curr_types))
                    in_mention = False
                curr_i = 0
                in_mention = False                                                            
                current_sent_id += 1
#                 print("Senntence # %d" % current_sent_id)
                continue
            
            word, bio_t = line.split("\t")
            
            if bio_t == "O":
                if in_mention:
                    ment_id = "figer-%d-%d-%d" % (current_sent_id, starting_idx, curr_i)
                    mentions.append((ment_id, curr_types))
                    in_mention = False
                continue
                
            bio, typs = bio_t.split("-")
            if bio == "B":
                
                if in_mention:
                    ment_id = "figer-%d-%d-%d" % (current_sent_id, starting_idx, curr_i)
                    mentions.append((ment_id, curr_types))
                    in_mention = False
                starting_idx = curr_i
                curr_types = set(typs.split(","))
                in_mention = True
            
            curr_i += 1
            
            
    return OrderedDict(mentions)
            
            
            
            
        
        

In [89]:
figer_gold = load_figer_data("/home/haowu4/codes/dataless_finer/python/eval_output/figer_original_gold_test.label")
ucl = load_figer_data("/home/haowu4/codes/dataless_finer/python/eval_output/ucl_prediction_out.out")
figer_out = load_figer_data("/home/haowu4/codes/dataless_finer/python/eval_output/example_output.figer_output")

In [90]:
all_lexicon = {c for b in figer_gold.values() for c in b}
print(len(all_lexicon))

43


In [100]:
all_lexicon

{'/art',
 '/building',
 '/building/hospital',
 '/building/hotel',
 '/building/sports_facility',
 '/education/department',
 '/education/educational_degree',
 '/event',
 '/government/government',
 '/government_agency',
 '/internet/website',
 '/law',
 '/living_thing',
 '/livingthing/animal',
 '/location',
 '/location/city',
 '/location/country',
 '/location/county',
 '/location/province',
 '/medicine/medical_treatment',
 '/medicine/symptom',
 '/military',
 '/news_agency',
 '/organization',
 '/organization/company',
 '/organization/educational_institution',
 '/organization/sports_league',
 '/organization/sports_team',
 '/people/ethnicity',
 '/person',
 '/person/artist',
 '/person/athlete',
 '/person/author',
 '/person/coach',
 '/person/doctor',
 '/person/politician',
 '/person/soldier',
 '/product',
 '/product/camera',
 '/time',
 '/title',
 '/transportation/road',
 '/written_work'}

In [101]:
fine_type = {
#  '/building',
 '/building/hospital',
 '/building/hotel',
 '/building/sports_facility',
 '/education/department',
 '/education/educational_degree',
#  '/event',
 '/government/government',
 '/government_agency',
 '/internet/website',
#  '/law',
#  '/living_thing',
 '/livingthing/animal',
#  '/location',
 '/location/city',
 '/location/country',
 '/location/county',
 '/location/province',
 '/medicine/medical_treatment',
 '/medicine/symptom',
 '/military',
 '/news_agency',
#  '/organization',
 '/organization/company',
 '/organization/educational_institution',
 '/organization/sports_league',
 '/organization/sports_team',
#  '/people/ethnicity',
#  '/person',
 '/person/artist',
 '/person/athlete',
 '/person/author',
 '/person/coach',
 '/person/doctor',
 '/person/politician',
 '/person/soldier',
#  '/product',
 '/product/camera',
#  '/time',
#  '/title',
 '/transportation/road',
#  '/written_work'
}

In [102]:
coarse_type = all_lexicon.difference(fine_type)
len(coarse_type)

13

In [103]:
mention_keys = [a for a in figer_gold]

In [104]:
def generate_ont_hot(typ, annotation):
    ret = [0] * len(mention_keys)
    for i, k in enumerate(mention_keys):
        if typ in annotation[k]:
            ret[i] = 1
    return ret

In [171]:
def calculate_avg_type_f1score(gold_ann, pred_ann, typs):
    ret = []
    lex = {x:i for i,x in enumerate(typs)}
    pred_counts = []
    gold_counts = []
    for t in typs:
        gold = generate_ont_hot(t, gold_ann)
        gold_counts.append(np.sum(gold))
        pred = generate_ont_hot(t, pred_ann)
        pred_counts.append(np.sum(pred))
        f1=f1_score(gold, pred)
        ret.append(f1)
    return ret, gold_counts, pred_counts


def calculate_ir_f1score(gold_ann, pred_ann, typs):
    retrived = set()
    relevant = set()
    
    for mid, anno_typs in gold_ann.iteritems():
        for t in anno_typs:
            if t in typs:
                relevant.add((mid, t))

    for mid, anno_typs in pred_ann.iteritems():
        for t in anno_typs:
            if t in typs:
                retrived.add((mid, t))
                
    intersect = retrived.intersection(relevant)
    p_score = len(intersect) * 1.0 / len(retrived)
    r_score = len(intersect) * 1.0 / len(relevant)
    return p_score, r_score, 2 * p_score * r_score / (r_score+ p_score)

def show_performance_as_pandas(gold_ann, pred_ann, typs):
    f1s , gs, ps = calculate_avg_type_f1score(gold_ann, pred_ann, typs)
    ds = []
    for f1,g,p,t in zip(f1s, gs, ps,typs):
        d = OrderedDict([
            ("Type", t),
            ("F1", f1),
            ("Gold Count", g),
            ("Pred Count", p)
        ])
        ds.append(d)
    return pd.DataFrame.from_dict(ds)


In [185]:
print("UCL Fine")

df = show_performance_as_pandas(figer_gold, ucl, fine_type)

df.sort_values(["F1"], ascending=[False])

UCL Fine


Unnamed: 0,Type,F1,Gold Count,Pred Count
15,/location/county,1.0,1,1
8,/education/educational_degree,1.0,1,1
21,/person/coach,1.0,2,2
1,/location/country,0.909091,12,10
5,/building/sports_facility,0.888889,5,4
12,/organization/educational_institution,0.863636,23,21
4,/organization/sports_team,0.8,31,24
27,/location/city,0.780488,39,43
29,/person/athlete,0.75,11,13
11,/military,0.75,3,5


In [182]:
print("UCL Coarse")
df = show_performance_as_pandas(figer_gold, ucl, coarse_type)
df.sort_values(["Gold Count"], ascending=[False])

UCL Coarse


Unnamed: 0,Type,F1,Gold Count,Pred Count
12,/person,0.90411,244,267
11,/organization,0.767606,131,153
9,/location,0.786026,102,127
1,/time,0.916667,26,22
5,/building,0.666667,16,14
2,/event,0.190476,8,13
7,/written_work,0.375,7,9
8,/art,0.0,7,0
4,/people/ethnicity,0.545455,5,6
3,/product,0.0,4,0


In [183]:
print("Figer Coarse")
df = show_performance_as_pandas(figer_gold, figer_out, fine_type)
df.sort_values(["Gold Count"], ascending=[False])

Figer Coarse


Unnamed: 0,Type,F1,Gold Count,Pred Count
27,/location/city,0.923077,39,39
4,/organization/sports_team,0.641509,31,22
7,/organization/company,0.529412,28,40
12,/organization/educational_institution,0.615385,23,16
1,/location/country,0.956522,12,11
29,/person/athlete,0.3,11,9
28,/government_agency,0.421053,8,11
26,/organization/sports_league,0.615385,8,5
6,/location/province,0.533333,7,8
3,/person/politician,0.0,5,2


In [184]:
print("Figer Coarse")
df = show_performance_as_pandas(figer_gold, figer_out, coarse_type)
df.sort_values(["Gold Count"], ascending=[False])

Figer Coarse


Unnamed: 0,Type,F1,Gold Count,Pred Count
12,/person,0.906318,244,215
11,/organization,0.822642,131,134
9,/location,0.764977,102,115
1,/time,0.761905,26,16
5,/building,0.774194,16,15
2,/event,0.181818,8,14
7,/written_work,0.352941,7,10
8,/art,0.0,7,0
4,/people/ethnicity,0.25,5,3
3,/product,0.0,4,0


In [156]:
zip(coarse_type, calculate_avg_type_f1score(figer_gold, figer_out, coarse_type)[1])

[('/living_thing', 0.99066542322149753),
 ('/time', 0.98020541611150713),
 ('/event', 0.97230130090775713),
 ('/product', 0.98935547091434672),
 ('/people/ethnicity', 0.988020189567135),
 ('/building', 0.98737174621887291),
 ('/title', 0.99465869336578572),
 ('/written_work', 0.98215934274703998),
 ('/art', 0.98138880026412811),
 ('/location', 0.91147950852742166),
 ('/law', 1.0),
 ('/organization', 0.91684574813987207),
 ('/person', 0.92287103873735599)]

In [157]:
print calculate_ir_f1score(figer_gold, figer_out, coarse_type)
print calculate_ir_f1score(figer_gold, figer_out, fine_type)

(0.8205607476635514, 0.7881508078994613, 0.804029304029304)
(0.5236220472440944, 0.6073059360730594, 0.5623678646934461)


In [159]:
print calculate_ir_f1score(figer_gold, ucl, coarse_type)
print calculate_ir_f1score(figer_gold, ucl, fine_type)

(0.7678571428571429, 0.8491921005385996, 0.8064791133844843)
(0.6587677725118484, 0.634703196347032, 0.6465116279069768)
