# SET-UP

In [55]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [15]:
def read_file(path):
    with open(path) as f:
        content = f.readlines()
    return content

In [16]:
! ls ../data

aida-yago2-dataset  emerging.test	     wnut17train.conll
apw_eng_201010.tsv  emerging.test.annotated
apw_eng_201011.tsv  README.txt


In [17]:
wnut_path = "../data/wnut17train.conll"

In [18]:
test_path = "../data/emerging.test.annotated"

In [19]:
import pandas as pd
df = pd.read_csv("../data/wnut17train.conll",sep="\t")

In [20]:
import pandas as pd
import csv
df_test = pd.read_csv(test_path, header = None, delimiter="\t", quoting=csv.QUOTE_NONE, encoding='utf-8',sep="\t")

In [21]:
def get_labels(dataframe):
    labels = []
    for i,row in df.iterrows():
        if(row.O != 'O' and (not isinstance(row.O,float))):
            labels.append(row.O)
    labels = [x[2:len(x)] for x in labels]
    labels = set(labels)
    return labels

In [22]:
labels = get_labels(df)
labels

{'corporation', 'creative-work', 'group', 'location', 'person', 'product'}

In [23]:
def get_words(path):
    content = [x.replace("\t", " ") for x in read_file(path)]
    text = [x.split() for x in content]
    return text

In [24]:
text_train = get_words(wnut_path)
text_test = get_words(test_path)
datasets = [text_train,text_test]

In [25]:
text_test[0:10]

[['&', 'O'],
 ['gt', 'O'],
 [';', 'O'],
 ['*', 'O'],
 ['The', 'O'],
 ['soldier', 'O'],
 ['was', 'O'],
 ['killed', 'O'],
 ['when', 'O'],
 ['another', 'O']]

In [26]:
def squash(list_labels):
    res = []
    for i,x in enumerate(list_labels):
        j = 1
        if(i + j< len(list_labels) and x[1] == 'B' and list_labels[i  + j][1] == 'I'):
            while(i + j < len(list_labels) and list_labels[i + j][1] == 'I'):
                j += 1
            res.append((x[0],x[0] + j -1 ,x[2]))
        elif(x[1] == 'B'):
            res.append([x[0],x[2]])
    return res

In [27]:
def str_i_to_char_i(sent_label):
    res_labels = []
    sent = sent_label[0].split()
    labels = sent_label[1]
    
    for label in labels: 
        if(len(label) == 2):
            start_label_index = label[0]
            end_label_index = label[0]
            label_name = label[1]
        elif(len(label) == 3):
            start_label_index = label[0]
            end_label_index = label[1]
            label_name = label[2]
            


        start_char_index = sum([len(x) for x in sent[0:start_label_index]])    
        end_char_index = start_char_index + sum([len(x) for x in sent[start_label_index:end_label_index + 1]])
        res_labels.append((start_char_index + start_label_index,end_char_index + end_label_index,label_name))
        
            
    return [" ".join(sent), res_labels]

In [28]:
def to_spacy_format(sent_label): 
    sent = sent_label[0]
    labels = sent_label[1]
    res_dict = {'entities': labels}
    return [sent, res_dict]

In [29]:
import nltk
from nltk.tokenize import sent_tokenize 
def create_entities_char_level(text):
    res = []
    sentence = []
    sent_labels = []
    for word_entity_pair in text:
        if(len(word_entity_pair)!=0):
            sentence.append(word_entity_pair[0])
            sent_labels.append(word_entity_pair[1])
        else:
            sent_labels = [(i,x[0], x[2:len(x)]) for i,x in enumerate(sent_labels) if x != 'O' ]
            sent_labels = squash(sent_labels)
            res.append([" ".join(sentence),sent_labels])
            sent_labels = []
            sentence = []
        
    res = [str_i_to_char_i(x) for x in res]
    res = [to_spacy_format(x) for x in res]
    return res

In [30]:
TRAIN_DATA = create_entities_char_level(text_train)
EVAL_DATA = create_entities_char_level(text_test)

In [31]:
TRAIN_DATA[1]

['From Green Newsfeed : AHFA extends deadline for Sage Award to Nov . 5 http://tinyurl.com/24agj38',
 {'entities': [(22, 26, 'group')]}]

In [32]:
EVAL_DATA[0]

['& gt ; * The soldier was killed when another avalanche hit an army barracks in the northern area of Sonmarg , said a military spokesman .',
 {'entities': [(100, 107, 'location')]}]

In [33]:
def compounding(min_batch_size,max_batch_size,len_data):
    factor = 1.001 
    sizes = []
    sizes.append(min_batch_size)
    x = len_data - min_batch_size
    batch_size = min_batch_size
    while x > 0:
        batch_size = batch_size * factor 
        rounded_bs = int(round(batch_size,0))
        x -= rounded_bs
        sizes.append(rounded_bs)
    return sizes
        

In [34]:
def create_batch(train_data):
    if(len(train_data) == 1):
        return train_data
    new_batch = []
    current_size = len(train_data[0][0]) + 1
    new_batch_text = train_data[0][0] + " "
    new_annotations = train_data[0][1]['entities'].copy()
    for i,data in enumerate(train_data):
        text =  data[0]
        annotations = data[1]
        if (i!=0):
            new_batch_text += text + " "
            annotations_batch = [(x[0] + current_size, x[1]+current_size, x[2]) 
                           for x in annotations['entities']]
            new_annotations.extend(annotations_batch)
            annotations_batch = []
            current_size += len(text) + 1
            
            
    new_batch.append(new_batch_text)
    new_batch.append(new_annotations)
    return new_batch
            
            
            

In [35]:
def minibatch(train_data, size):
    batches = []
    index = 0
    for batch_size in size: 
        batch = create_batch(train_data[index:index+batch_size])
        batches.append(batch)
        index += batch_size
    return batches

In [36]:

def get_batches(train_data, model_type):
    max_batch_sizes = {"tagger": 32, "parser": 16, "ner": 5, "textcat": 64}
    max_batch_size = max_batch_sizes[model_type]
    if len(train_data) < 500:
        max_batch_size /= 2
    batch_size = compounding(1, max_batch_size, len(train_data))
    batches = minibatch(train_data, size=batch_size)
    return batches

### TRAIN

In [443]:
import random
import spacy
model = None
n_iter=35

if model is not None:
    nlp1 = spacy.load(model)  # load existing spaCy model
    print("Loaded model '%s'" % model)
else:
    nlp1 = spacy.blank('en')  # create blank Language class
    print("Created blank 'en' model")

#create the built-in pipeline components and add them to the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'ner' not in nlp1.pipe_names:
    print("Add ner pipe")
    ner = nlp1.create_pipe('ner')
    nlp1.add_pipe(ner, last=True)
# otherwise, get it so we can add labels

else:
    ner = nlp1.get_pipe('ner')


Created blank 'en' model
Add ner pipe


In [444]:
# add labels, Trains data based on annotations 
for _, annotations in TRAIN_DATA:
    for ent in annotations.get('entities'):
        ner.add_label(ent[2])

In [445]:
def getTime(start,end):
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_since_start = "Time:  {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
    return time_since_start

In [455]:
def log(start,i,len_):
    tenth = int(len_/20)
    if(i % tenth == 0):
        percent = int(round((i/len_*10),0))
        time_ = getTime(start,time.time())
        print("0%" + "=" *percent + str(percent*10) + "%, " + time_, end="\r" )


In [461]:
optimizer.alpha = 0.0005

In [480]:
from tqdm import tqdm
from spacy.util import decaying
import time
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp1.pipe_names if pipe != 'ner']
dropout = decaying(0.6, 0.2, 1e-4)

with nlp1.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp1.begin_training()
    optimizer.alpha = 0.0001
    losses = {}
    for itn in range(n_iter):
        
        random.shuffle(TRAIN_DATA)
        drop = next(dropout)
        batches  = [x[0] for x  in get_batches(TRAIN_DATA, "ner") if len(x) == 1]
        start = time.time()
        for i,data in enumerate(TRAIN_DATA):
            log(start,i,len(TRAIN_DATA))
            text, annotations = data
            print(text)
            print(annotations)
            nlp1.update(
                [text],  # batch of texts
                [annotations],  # batch of annotations
                drop=0.5,  # dropout 
                sgd=optimizer,  # callable to update weights
                losses=losses)
            
        print(losses)

There was just a fire at work . Today is looking up .
{'entities': []}
Yesterday 's shoot was unreal . Today is just as sick ! Is that even possible ?!
{'entities': []}
RT @BLKICE3 : Free before 10:30 .... Wear All Black People !!!! http://twitpic.com/2pd1vc
{'entities': []}
Going now . Be back later tonight . Thanks for all the Friday Fucking , appreciate it . Make love , not war : condoms are cheaper than guns . XOXO
{'entities': []}
@iPukeRKOs I cant You'll have to Tune into the show this Sunday . Ill give ya a hint He 's a wrestler &amp; has been in the business for 20 + yrs
{'entities': []}
Good start for sharks tonight can only get better . Played with short bench tonight . Thought Rocks well in game early though until ' unseen '
{'entities': [(15, 21, 'group')]}
Win 1 of 4 More Free Tickets to SMITE World Championship in Atlanta , GA Jan 9th-11th ! via @ http://t.co/OB5jwvtu8S
{'entities': [(32, 37, 'group'), (60, 67, 'location'), (70, 72, 'location')]}
right . Five Star Day cam

RT @Yarcom : It 's Guinness O'clock. &lt; -- it 's almost that time over here on the west coast too .
{'entities': [(19, 27, 'corporation')]}
In 8th grade we had a debate on immigration . It was not even a debate it was more like everyone yell at Ashley and make her cry .
{'entities': [(105, 111, 'person')]}
RT @jeanyousux : guys cmon its december revine https://t.co/uOMcfEgoYN
{'entities': []}
Im ' performing tonight at 8 Bond Street in Soho . If your in the city , hit me up , and then swing through around 10ish !
{'entities': [(44, 48, 'location')]}
http://bit.ly/aTTQYq When Pepsi to ring usually confirm to , winning a Nokia 5800 ?
{'entities': [(26, 31, 'corporation'), (71, 81, 'product')]}
RT @PolarBeverages : Before Hurricane Earl comes , stock up on Polar Orange Dry ! :)
{'entities': []}
If Partner left me and BB I would not hunt him down . I'd take his calls but it would be his job to make sure he spent time with BB .
{'entities': [(23, 25, 'person'), (129, 131, 'person')]}
@igo

From Green Newsfeed : AHFA extends deadline for Sage Award to Nov . 5 http://tinyurl.com/24agj38
{'entities': [(22, 26, 'group')]}
@happydaysjack99 @andywills1019 think Wayne has played well until last few games , but I may be biased lol
{'entities': [(38, 43, 'person')]}
We on fire on " ThePhoenixhour " w/ @thephoenixmag Thurs 7-9pm on WKMT-DB @Dagr8fm #1hiphopstation #worldwide http://t.co/Z4wD9yateK
{'entities': []}
RT @cav_athletics : Swim team took 5th place out of 11 teams and got a bronze medal in our 200 free relay at Saginaw Heritage Relays !
{'entities': [(109, 116, 'location')]}
Yesterday my son forgot his jacket at school . Today he remembered to bring home the jacket , but forgot his lunchbox .
{'entities': []}
good friday whatchu got for me @kanyewest
{'entities': []}
'@Lovelylanvin Oh ugh . Should never do #ff before coffee . You should have been in the " tweeps who are friends IRL " Sorry !'
{'entities': []}
June Fest 2010 ~ Forgotten Door LIVE ! http://fb.me/G1pof3lO
{

RT @JUCOFFrenzy : JUCO RB @RealSteel223526 was named 1st Team Offense @njcaa All American http://t.co/Ep9U2tsnS3
{'entities': []}
@DAT_HENNY_CHICK good thing we have DVRs in the 21st century !!
{'entities': []}
RT @LilTwist : RT this if you want me to go back live on Ustream later tonight
{'entities': [(57, 64, 'corporation')]}
@number2jake wanna go to Liffey valley tomorrow ? :) &lt; 3
{'entities': [(25, 31, 'location')]}
FUUCCCKKKK . I WAS SO FUCKING READY FOR MY GOD DAMN ALONE TIME , AND IT TURNS OUT FUCKING MADI HAS AN ILLNESS AND IS HOME W/DAD . GOD . DAMN .
{'entities': [(90, 94, 'person')]}
REFINERY29 STALKS LA'S MOST FASHIONABLE FEMMES FROM LAST WEEK VB'S PARISIAN BREAKFAST . MERCI , MERCI ! XX http://ow.ly/2G4KX
{'entities': []}
@_xoxowriight for thursday right lol
{'entities': []}
wow i just noticed i always feel different everyday yesterday i was depressed , and today im happy i wonder wat tom . is ...... random much ?
{'entities': [(112, 115, 'person')]}
http://t.co/TkGhpZJ

KeyboardInterrupt: 

# EVALUATION 

In [475]:
# test the trained model
for text,entities in EVAL_DATA:
    doc = nlp1(text)
    print(entities.values())
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

dict_values([[(100, 107, 'location')]])
Entities []
dict_values([[(54, 67, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(77, 99, 'group'), (102, 105, 'group')]])
Entities []
dict_values([[(161, 181, 'person')]])
Entities []
dict_values([[(33, 45, 'location')]])
Entities []
dict_values([[(11, 14, 'group'), (50, 62, 'location')]])
Entities []
dict_values([[(11, 14, 'group')]])
Entities []
dict_values([[(164, 176, 'person')]])
Entities [('Defence Spokesman', 'person')]
dict_values([[(6, 30, 'creative-work')]])
Entities []
dict_values([[(83, 92, 'location')]])
Entities []
dict_values([[(99, 103, 'group')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(33, 41, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(4, 8, 'group')]])
Entities []
dict_values([[(19, 23, 'group')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(4, 45, 'group'), (48, 52, 'group')]])
Entiti

dict_values([[(27, 40, 'group'), (58, 72, 'person')]])
Entities [('Jese Rodriguez', 'person')]
dict_values([[]])
Entities []
dict_values([[(7, 16, 'group'), (48, 56, 'person'), (59, 65, 'person'), (70, 78, 'person')]])
Entities []
dict_values([[(2, 14, 'person')]])
Entities []
dict_values([[(33, 58, 'corporation')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(38, 41, 'group'), (47, 61, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(40, 49, 'person')]])
Entities []
dict_values([[(27, 29, 'corporation')]])
Entities []
dict_values([[(16, 30, 'creative-work')]])
Entities []
dict_values([[(51, 55, 'group')]])
Entities []
dict_values([[(16, 22, 'corporation')]])
Entities []
dict_values([[(17, 24, 'creative-work')]])
Entities []
dict_values([[(19, 24, 'person')]])
Entities []
dict_values([[(3, 9, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(

dict_values([[]])
Entities []
dict_values([[(8, 14, 'person'), (85, 95, 'product')]])
Entities []
dict_values([[(20, 33, 'product')]])
Entities []
dict_values([[(2, 11, 'person')]])
Entities []
dict_values([[(99, 107, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(11, 33, 'location')]])
Entities []
dict_values([[(2, 31, 'person'), (108, 121, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(30, 41, 'creative-work')]])
Entities []
dict_values([[(21, 26, 'person'), (48, 57, 'creative-work')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(18, 28, 'product')]])
Entities []
dict_values([[(9, 13, 'group'), (38, 52, 'location')]])
Entities []
dict_values([[(0, 11, 'product'), (14, 17, 'product')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(5, 22, 'creative-work'), (36, 42, 'person'), (49, 57, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(1

dict_values([[(40, 52, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(2, 6, 'person'), (49, 55, 'product')]])
Entities []
dict_values([[(13, 15, 'location')]])
Entities []
dict_values([[(2, 8, 'person')]])
Entities []
dict_values([[(48, 56, 'product')]])
Entities []
dict_values([[(183, 195, 'product'), (206, 227, 'product'), (230, 242, 'person')]])
Entities []
dict_values([[(0, 3, 'product')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(16, 24, 'product')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(8, 15, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(3, 8, 'person'), (29, 44, 'person')]])
Entities []
dict_values([[(2, 19, 'person')]])
Entities []
dict_values([[(88, 97, 'creative-work'), (148, 157, 'creative-work'), (181, 185, 'person'), (203, 235, 'creative-work'), (285, 302, 'creative-work')]])
Entities []
dict_values([[(26, 39, 'product'), (75, 85, 'product')]])
Entities []
dict_values([[]])
Entities []
di

dict_values([[(5, 19, 'person'), (22, 29, 'location')]])
Entities []
dict_values([[(12, 18, 'product'), (116, 122, 'product')]])
Entities []
dict_values([[(75, 80, 'group')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(0, 7, 'location'), (10, 18, 'location')]])
Entities []
dict_values([[(0, 8, 'corporation'), (14, 19, 'person'), (44, 49, 'product')]])
Entities []
dict_values([[(5, 15, 'person'), (18, 40, 'location'), (84, 89, 'person')]])
Entities []
dict_values([[(22, 38, 'location'), (41, 49, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(16, 28, 'location'), (31, 39, 'location')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(22, 46, 'group'), (47, 53, 'group')]])
Entities []
dict_values([[(5, 15,

dict_values([[(17, 27, 'group'), (30, 40, 'group'), (43, 51, 'group'), (54, 69, 'group'), (72, 83, 'group'), (140, 143, 'group')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(64, 70, 'person')]])
Entities []
dict_values([[(2, 12, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(39, 43, 'location')]])
Entities []
dict_values([[(58, 70, 'person'), (130, 134, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(102, 114, 'product')]])
Entities []
dict_values([[(103, 114, 'person')]])
Entities []
dict_values([[(14, 21, 'person')]])
Entities []
dict_values([[(2, 9, 'person'), (16, 30, 'person'), (87, 91, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(2, 14, 'corporation')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(2, 12, 'person')]])
Entities []
dict_values([[(11, 40, 'creative-work')]])
Entities []
dict_values([[(5, 14, 'person'), (38, 51, 'location')]])
Entities []
dic

dict_values([[]])
Entities []
dict_values([[(3, 21, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(12, 26, 'person')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(0, 4, 'person')]])
Entities []
dict_values([[(5, 16, 'person')]])
Entities []
dict_values([[(50, 70, 'creative-work')]])
Entities []
dict_values([[]])
Entities []
dict_values([[(5, 16, 'person')]])
Entities []
dict_values([[(108, 114, 'creative-work')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(20, 32, 'person'), (37, 54, 'corporation')]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[]])
Entities []
dict_values([[(14, 26, 'creative-work'), (29, 44, 'person')]])
Entities []
dict_values([[(5, 20, 'location')]])
Entities []
dict_values([[(14, 17, 'group'), (59, 75, 'product')]])
Entities []
dict_values([[(2, 8, 'creative-work'), (11, 23, 'creative-work'), (29, 33, 'person'), (3

In [537]:
import stanza
from spacy_stanza import StanzaLanguage

snlp = stanza.Pipeline(lang="en",processors='tokenize,ner')
nlp1 = StanzaLanguage(snlp)


2020-05-26 18:13:34 INFO: Loading these models for language: en (English):
| Processor | Package   |
-------------------------
| tokenize  | ewt       |
| ner       | ontonotes |

2020-05-26 18:13:34 INFO: Use device: cpu
2020-05-26 18:13:34 INFO: Loading: tokenize
2020-05-26 18:13:34 INFO: Loading: ner
2020-05-26 18:13:35 INFO: Done loading processors!


In [501]:
def filter_entities(entities):
    {'corporation', '', '', '', '', ''}
    ent = entities['entities']
    new_ents = [x for x in ent if x[2] in ['location','person','group','creative-work'
                                           ,'product']]
    entities['entities']
    return entities

In [529]:
def stanf_to_conLL(label):
    print(label)
    if (label == 'GPE' or label=='FAC'):
        return 'location'
    elif(label=='PERSON'):
        return 'person'
    elif(label=='PRODUCT'):
        return 'product'    
    elif(label=='ORG' or label=='NORP'):
        return 'group'
    elif(label=='WORK_OF_ART'):
        return ' creative-work'
 

In [538]:
fp_global = 0 
fn_global = 0 
tp_global = 0 
stanford  = True
accepted_ents = ['GPE', 'PERSON','ORG','FAC','WORK_OF_ART','NORP','PRODUCT']
for text,entities in EVAL_DATA:
    ent_as_list = list(entities.values())[0]
    if(stanford):
        entities = filter_entities(entities)
        #correct_entities = [(x[0],x[1],stanf_to_conLL(x[2])) for x in ent_as_list]
    correct_entities = ent_as_list
    fp = 0 
    fn = 0 
    tp = 0 
    print('correct entities')
    print(correct_entities)
    correct_text = [text[y[0]:y[1]] for y in correct_entities]
    print(correct_text)
    doc = nlp1(text)
    predicted_entities = [(ent.text, ent.label_) for ent in doc.ents]
    predicted_entities = [x for x in predicted_entities if x[1] in accepted_ents]
    correctly_predicted = 0 
    for i,data in enumerate(predicted_entities): 
        predicted_text, predicted_label = data 
        #predicted_label = stanf_to_conLL(predicted_label)
        print(predicted_text)
        print(predicted_label)
        predicted_label = stanf_to_conLL(predicted_label)
        if(predicted_text in correct_text):
            index_label = correct_text.index(predicted_text)
            if(predicted_label == correct_entities[index_label][2]):
                        tp += 1
                        correctly_predicted += 1 
        else: 
            fp += 1
            
    fn += (len(correct_text) - correctly_predicted)
    
    fp_global += fp
    fn_global += fn
    tp_global += tp 
    
    print("FN: " + str(fn) + " FP: " + str(fp) + " TP: " + str(tp))
    print("FP_GLOBAL: " + str(fp_global))  
    print("FN_GLOBAL: " + str(fn_global))  
    print("TP_GLOBAL: " + str(tp_global))  
    


correct entities
[(100, 107, 'location')]
['Sonmarg']
Sonmarg
GPE
GPE
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 0
FN_GLOBAL: 0
TP_GLOBAL: 1
correct entities
[(54, 67, 'location')]
['Waltengoo Nar']
Waltengoo Nar
FAC
FAC
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 0
FN_GLOBAL: 0
TP_GLOBAL: 2
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 0
FN_GLOBAL: 0
TP_GLOBAL: 2
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 0
FN_GLOBAL: 0
TP_GLOBAL: 2
correct entities
[(77, 99, 'group'), (102, 105, 'group')]
['Avalanche Rescue Teams', 'ART']
the Avalanche Rescue Teams
ORG
ORG
FN: 2 FP: 1 TP: 0
FP_GLOBAL: 1
FN_GLOBAL: 2
TP_GLOBAL: 2
correct entities
[(161, 181, 'person')]
['Colonel Rajesh Kalia']
Defence
ORG
ORG
Rajesh Kalia
PERSON
PERSON
FN: 1 FP: 2 TP: 0
FP_GLOBAL: 3
FN_GLOBAL: 3
TP_GLOBAL: 2
correct entities
[(33, 45, 'location')]
['Gurez sector']
Gurez
GPE
GPE
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 4
FN_GLOBAL: 4
TP_GLOBAL: 2
correct entities
[(11, 14, 'group'), (50, 62, 'location')]
['ANI', 'Gurez sector']
ANI
ORG
ORG
Gure

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 11
FN_GLOBAL: 20
TP_GLOBAL: 35
correct entities
[(7, 13, 'person'), (49, 55, 'location')]
['Cavani', 'Europe']
Cavani
ORG
ORG
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 11
FN_GLOBAL: 22
TP_GLOBAL: 35
correct entities
[(0, 6, 'person')]
['Cheney']
Cheney
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 11
FN_GLOBAL: 22
TP_GLOBAL: 36
correct entities
[(79, 89, 'person'), (94, 108, 'person')]
['Trey Gowdy', 'Jason Chaffetz']
Trey Gowdy
PERSON
PERSON
Jason Chaffetz
PERSON
PERSON
FN: 0 FP: 0 TP: 2
FP_GLOBAL: 11
FN_GLOBAL: 22
TP_GLOBAL: 38
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 11
FN_GLOBAL: 22
TP_GLOBAL: 38
correct entities
[(0, 5, 'person')]
['Dzeko']
Dzeko
ORG
ORG
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 11
FN_GLOBAL: 23
TP_GLOBAL: 38
correct entities
[(6, 16, 'group'), (48, 62, 'group')]
['/ r / news', '/ r / politics']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 11
FN_GLOBAL: 25
TP_GLOBAL: 38
correct entities
[(5, 10, 'person')]
['Lynda']
Lynda
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 11
F

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 21
FN_GLOBAL: 49
TP_GLOBAL: 65
correct entities
[(33, 44, 'location')]
['North Korea']
North Korea
GPE
GPE
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 21
FN_GLOBAL: 49
TP_GLOBAL: 66
correct entities
[]
[]
Lost RC
PRODUCT
PRODUCT
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 22
FN_GLOBAL: 49
TP_GLOBAL: 66
correct entities
[(17, 31, 'person')]
['Peter Hitchens']
Peter Hitchens
PERSON
PERSON
Fervent
ORG
ORG
FN: 0 FP: 1 TP: 1
FP_GLOBAL: 23
FN_GLOBAL: 49
TP_GLOBAL: 67
correct entities
[(33, 44, 'group'), (109, 112, 'product'), (150, 154, 'group')]
['republicans', 'BLD', 'govt']
republicans
NORP
NORP
FN: 2 FP: 0 TP: 1
FP_GLOBAL: 23
FN_GLOBAL: 51
TP_GLOBAL: 68
correct entities
[(4, 32, 'location')]
["Stew ' s Self Service Garage"]
Self Service Garage
WORK_OF_ART
WORK_OF_ART
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 24
FN_GLOBAL: 52
TP_GLOBAL: 68
correct entities
[(22, 30, 'product')]
['Snickers']
Snickers
PRODUCT
PRODUCT
Irish
NORP
NORP
FN: 0 FP: 1 TP: 1
FP_GLOBAL: 25
FN_GLOBAL: 52
TP_GLOBAL: 69
correct enti

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 34
FN_GLOBAL: 79
TP_GLOBAL: 91
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 34
FN_GLOBAL: 79
TP_GLOBAL: 91
correct entities
[(212, 217, 'location')]
['Italy']
Italy
GPE
GPE
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 34
FN_GLOBAL: 79
TP_GLOBAL: 92
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 34
FN_GLOBAL: 79
TP_GLOBAL: 92
correct entities
[(29, 40, 'creative-work')]
['local views']
OP
ORG
ORG
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 35
FN_GLOBAL: 80
TP_GLOBAL: 92
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 35
FN_GLOBAL: 80
TP_GLOBAL: 92
correct entities
[(23, 28, 'person')]
['Mises']
Mises
WORK_OF_ART
WORK_OF_ART
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 35
FN_GLOBAL: 81
TP_GLOBAL: 92
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 35
FN_GLOBAL: 81
TP_GLOBAL: 92
correct entities
[(2, 9, 'person')]
['Maronti']
Maronti
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 35
FN_GLOBAL: 81
TP_GLOBAL: 93
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 35
FN_GLOBAL: 81
TP_GLOBAL: 93

FN: 2 FP: 0 TP: 0
FP_GLOBAL: 50
FN_GLOBAL: 107
TP_GLOBAL: 110
correct entities
[(14, 23, 'corporation')]
['Internode']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 50
FN_GLOBAL: 108
TP_GLOBAL: 110
correct entities
[(173, 185, 'group')]
['the Clippers']
Ticket Operations Coordinators
WORK_OF_ART
WORK_OF_ART
Clippers
ORG
ORG
FN: 1 FP: 2 TP: 0
FP_GLOBAL: 52
FN_GLOBAL: 109
TP_GLOBAL: 110
correct entities
[(29, 38, 'group'), (43, 50, 'group'), (86, 92, 'person')]
['Tottenham', 'Chelsea', 'Giroud']
ITT
ORG
ORG
Tottenham
ORG
ORG
Chelsea
PERSON
PERSON
Giroud
PERSON
PERSON
FN: 1 FP: 1 TP: 2
FP_GLOBAL: 53
FN_GLOBAL: 110
TP_GLOBAL: 112
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 53
FN_GLOBAL: 110
TP_GLOBAL: 112
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 53
FN_GLOBAL: 110
TP_GLOBAL: 112
correct entities
[(37, 42, 'person'), (96, 101, 'group'), (107, 112, 'person')]
['Trump', 'Nazis', 'Trump']
Trump
PERSON
PERSON
Nazis
NORP
NORP
Trump
PERSON
PERSON
FN: 0 FP: 0 TP: 3
FP_GLOBAL: 53
FN_GLOBAL: 110
TP

French
NORP
NORP
Homer
PERSON
PERSON
Cuba
GPE
GPE
FN: 0 FP: 1 TP: 2
FP_GLOBAL: 65
FN_GLOBAL: 137
TP_GLOBAL: 133
correct entities
[]
[]
Universal Basic Income
ORG
ORG
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 137
TP_GLOBAL: 133
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 137
TP_GLOBAL: 133
correct entities
[(16, 21, 'person')]
['Trump']
Trump
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 66
FN_GLOBAL: 137
TP_GLOBAL: 134
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 137
TP_GLOBAL: 134
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 137
TP_GLOBAL: 134
correct entities
[(22, 27, 'person'), (53, 60, 'person')]
['trump', 'snowden']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 139
TP_GLOBAL: 134
correct entities
[(9, 30, 'creative-work')]
['The Day of the Doctor']
The Day of the Doctor
PERSON
PERSON
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 140
TP_GLOBAL: 134
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 66
FN_GLOBAL: 140
TP_GLOBA

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 182
TP_GLOBAL: 155
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 182
TP_GLOBAL: 155
correct entities
[(2, 14, 'person'), (47, 64, 'group'), (76, 90, 'location')]
['Carpetsmoker', 'Eastern Europeans', 'Western Europe']
Eastern Europeans
NORP
NORP
FN: 2 FP: 0 TP: 1
FP_GLOBAL: 82
FN_GLOBAL: 184
TP_GLOBAL: 156
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 184
TP_GLOBAL: 156
correct entities
[(14, 55, 'product')]
['wildcard certificate private key security']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 185
TP_GLOBAL: 156
correct entities
[(2, 9, 'person')]
['Brythan']
Brythan
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 82
FN_GLOBAL: 185
TP_GLOBAL: 157
correct entities
[(2, 11, 'person')]
['Qmechanic']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 186
TP_GLOBAL: 157
correct entities
[(18, 26, 'product')]
['htaccess']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 82
FN_GLOBAL: 187
TP_GLOBAL: 157
correct entities
[(2, 11, 'person')]
[

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 96
FN_GLOBAL: 230
TP_GLOBAL: 165
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 96
FN_GLOBAL: 230
TP_GLOBAL: 165
correct entities
[(23, 30, 'product')]
['Digimon']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 96
FN_GLOBAL: 231
TP_GLOBAL: 165
correct entities
[(229, 236, 'location')]
['Vietnam']
R & R
ORG
ORG
Rest and Recreation
ORG
ORG
Vietnam
GPE
GPE
FN: 0 FP: 2 TP: 1
FP_GLOBAL: 98
FN_GLOBAL: 231
TP_GLOBAL: 166
correct entities
[(30, 35, 'product')]
['Gmail']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 98
FN_GLOBAL: 232
TP_GLOBAL: 166
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 98
FN_GLOBAL: 232
TP_GLOBAL: 166
correct entities
[(187, 204, 'person')]
['the actual doctor']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 98
FN_GLOBAL: 233
TP_GLOBAL: 166
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 98
FN_GLOBAL: 233
TP_GLOBAL: 166
correct entities
[(2, 6, 'person')]
['Paul']
Paul
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 98
FN_GLOBAL: 233
TP_GLOBAL: 167
correct entities
[]
[]
FN: 0 FP: 0 

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 105
FN_GLOBAL: 264
TP_GLOBAL: 182
correct entities
[(30, 42, 'creative-work')]
['The Sopranos']
Sopranos
WORK_OF_ART
WORK_OF_ART
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 106
FN_GLOBAL: 265
TP_GLOBAL: 182
correct entities
[(26, 28, 'person'), (50, 60, 'product')]
['OP', 'VPN server']
OP
ORG
ORG
VPN
ORG
ORG
FN: 2 FP: 1 TP: 0
FP_GLOBAL: 107
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 107
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 107
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 107
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 107
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
Hulk
PERSON
PERSON
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 108
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 108
FN_GLOBAL: 267
TP_GLOBAL: 182
correct entities
[(59, 65, 'group')]
['Vulcan']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 1

NL
GPE
GPE
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 117
FN_GLOBAL: 297
TP_GLOBAL: 194
correct entities
[(2, 8, 'person')]
['Gordon']
Gordon
PERSON
PERSON
Higgs
PERSON
PERSON
FN: 0 FP: 1 TP: 1
FP_GLOBAL: 118
FN_GLOBAL: 297
TP_GLOBAL: 195
correct entities
[(48, 56, 'product')]
['cadillac']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 118
FN_GLOBAL: 298
TP_GLOBAL: 195
correct entities
[(183, 195, 'product'), (206, 227, 'product'), (230, 242, 'person')]
['Third Hallow', 'Cloak of Invisibility', 'Miss Granger']
Demiguise
NORP
NORP
the Third Hallow
WORK_OF_ART
WORK_OF_ART
Granger
PERSON
PERSON
Demiguise
NORP
NORP
FN: 3 FP: 4 TP: 0
FP_GLOBAL: 122
FN_GLOBAL: 301
TP_GLOBAL: 195
correct entities
[(0, 3, 'product')]
['SSO']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 122
FN_GLOBAL: 302
TP_GLOBAL: 195
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 122
FN_GLOBAL: 302
TP_GLOBAL: 195
correct entities
[(16, 24, 'product')]
['Mjölnirs']
Mjölnirs
NORP
NORP
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 122
FN_GLOBAL: 303
TP_GLOBAL: 195
correct entities
[]
[]

Asimov ' s in the 90 '
WORK_OF_ART
WORK_OF_ART
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 136
FN_GLOBAL: 331
TP_GLOBAL: 212
correct entities
[(27, 42, 'product')]
['KeePass Cracker']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 136
FN_GLOBAL: 332
TP_GLOBAL: 212
correct entities
[]
[]
Panda
PERSON
PERSON
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 137
FN_GLOBAL: 332
TP_GLOBAL: 212
correct entities
[]
[]
Peltier Plates
PERSON
PERSON
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 138
FN_GLOBAL: 332
TP_GLOBAL: 212
correct entities
[]
[]
Italian
NORP
NORP
Italian
NORP
NORP
FN: 0 FP: 2 TP: 0
FP_GLOBAL: 140
FN_GLOBAL: 332
TP_GLOBAL: 212
correct entities
[(0, 4, 'group'), (46, 55, 'group')]
['Jedi', 'Dark Side']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 140
FN_GLOBAL: 334
TP_GLOBAL: 212
correct entities
[(333, 345, 'group')]
['the Military']
Naval
ORG
ORG
Military
ORG
ORG
FN: 1 FP: 2 TP: 0
FP_GLOBAL: 142
FN_GLOBAL: 335
TP_GLOBAL: 212
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 142
FN_GLOBAL: 335
TP_GLOBAL: 212
correct entities
[]
[]
Lakebum
PERSON
PERSON
FN: 0

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 162
FN_GLOBAL: 359
TP_GLOBAL: 223
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 162
FN_GLOBAL: 359
TP_GLOBAL: 223
correct entities
[(3, 7, 'person'), (70, 74, 'person')]
['KITT', 'Kitt']
KITT
ORG
ORG
Kitt
PERSON
PERSON
FN: 1 FP: 0 TP: 1
FP_GLOBAL: 162
FN_GLOBAL: 360
TP_GLOBAL: 224
correct entities
[(14, 20, 'person'), (23, 29, 'person'), (45, 51, 'person'), (84, 90, 'person'), (120, 126, 'person')]
['Rachel', 'Audrey', 'Audrey', 'Rachel', 'Audrey']
Rachel
PERSON
PERSON
Audrey
PERSON
PERSON
Audrey
PERSON
PERSON
Rachel
PERSON
PERSON
Audrey
PERSON
PERSON
FN: 0 FP: 0 TP: 5
FP_GLOBAL: 162
FN_GLOBAL: 360
TP_GLOBAL: 229
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 162
FN_GLOBAL: 360
TP_GLOBAL: 229
correct entities
[(5, 16, 'person')]
['jellombooty']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 162
FN_GLOBAL: 361
TP_GLOBAL: 229
correct entities
[(5, 16, 'person')]
['CarolSankar']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 162
FN_GLOBAL: 362
TP_GLOBAL: 229
correct entities
[]
[]
FN:

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 175
FN_GLOBAL: 394
TP_GLOBAL: 240
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 175
FN_GLOBAL: 394
TP_GLOBAL: 240
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 175
FN_GLOBAL: 394
TP_GLOBAL: 240
correct entities
[(16, 28, 'location'), (31, 39, 'location')]
['The Fortezza', 'Florence']
Fortezza
PRODUCT
PRODUCT
Florence
GPE
GPE
FN: 1 FP: 1 TP: 1
FP_GLOBAL: 176
FN_GLOBAL: 395
TP_GLOBAL: 241
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 176
FN_GLOBAL: 395
TP_GLOBAL: 241
correct entities
[(22, 46, 'group'), (47, 53, 'group')]
['The Knifing Tourney 2017', 'Judges']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 176
FN_GLOBAL: 397
TP_GLOBAL: 241
correct entities
[(5, 15, 'corporation')]
['TheVidSpot']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 176
FN_GLOBAL: 398
TP_GLOBAL: 241
correct entities
[(25, 29, 'person')]
['JoJo']
JoJo ' s
ORG
ORG
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 177
FN_GLOBAL: 399
TP_GLOBAL: 241
correct entities
[(5, 17, 'person')]
['GraysonDolan']
FN: 1 FP: 0 TP: 0
FP_GLO

FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 194
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[]
[]
Juvens 90
PERSON
PERSON
FN: 0 FP: 1 TP: 0
FP_GLOBAL: 195
FN_GLOBAL: 436
TP_GLOBAL: 252
correct entities
[(52, 76, 'creative-work'), (79, 90, 'person'), (97, 104, 'creative-work'), (110, 120, 'person')]
['The Angels are Listening', 'Snatam Kaur', 'Suṉi-ai', 'Ajeet Kaur']
Sana tu
PERSON
PERSON
Snatam Kaur
PERSON
PERSON
Suṉi-ai
PERSON
PERSON
Ajeet Kaur
PERSON
PERSON
FN: 2 FP: 1 TP: 2
FP_GLOBAL: 196

Words: ['@', 'TwoBlind', 'Bros', 'Fantastic', 'piece', 'on', '@', 'NBCNightlyNews', 'tonight', '!', '! !', 'Love', 'you', 'guys', '!', '!']
Entities: [('TwoBlind', 'PERSON', 2, 10), ('NBCNightlyNews', 'ORG', 36, 50), ('tonight', 'TIME', 51, 58)]
  doc = nlp1(text)


FN: 2 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 470
TP_GLOBAL: 260
correct entities
[(5, 16, 'person')]
['karibrownnn']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 471
TP_GLOBAL: 260
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 471
TP_GLOBAL: 260
correct entities
[(28, 31, 'person'), (48, 56, 'group')]
['Ney', 'Marshals']
Ney
PERSON
PERSON
Marshals
ORG
ORG
FN: 0 FP: 0 TP: 2
FP_GLOBAL: 210
FN_GLOBAL: 471
TP_GLOBAL: 262
correct entities
[(0, 85, 'product')]
['DELL Inspiron 17 R 5721 Core i 5 - 3317 U 1 . 7 GHz 6 Gb 750 Gb DVDRW 17 . 3 " Laptop']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 472
TP_GLOBAL: 262
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 472
TP_GLOBAL: 262
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 472
TP_GLOBAL: 262
correct entities
[(5, 17, 'creative-work'), (120, 129, 'person')]
['PrisonPlanet', 'Kellyanne']
Kellyanne
GPE
GPE
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 210
FN_GLOBAL: 474
TP_GLOBAL: 262
correct entities
[(7

FN: 1 FP: 0 TP: 0
FP_GLOBAL: 221
FN_GLOBAL: 520
TP_GLOBAL: 275
correct entities
[(2, 9, 'person'), (16, 30, 'person'), (87, 91, 'person')]
['Marcusb', 'RichHomieHuang', 'Alan']
RichHomieHuang
PERSON
PERSON
Alan
PERSON
PERSON
FN: 1 FP: 0 TP: 2
FP_GLOBAL: 221
FN_GLOBAL: 521
TP_GLOBAL: 277
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 221
FN_GLOBAL: 521
TP_GLOBAL: 277
correct entities
[(2, 14, 'corporation')]
['HardCountFOX']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 221
FN_GLOBAL: 522
TP_GLOBAL: 277
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 221
FN_GLOBAL: 522
TP_GLOBAL: 277
correct entities
[(2, 12, 'person')]
['SeanyReidy']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 221
FN_GLOBAL: 523
TP_GLOBAL: 277
correct entities
[(11, 40, 'creative-work')]
['2000 AD At The Cartoon Museum']
The Cartoon Museum
FAC
FAC
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 222
FN_GLOBAL: 524
TP_GLOBAL: 277
correct entities
[(5, 14, 'person'), (38, 51, 'location')]
['davidfrum', 'United States']
the United States
GPE
GPE
FN: 2 FP: 1 TP: 0


FN: 0 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 568
TP_GLOBAL: 286
correct entities
[(47, 58, 'creative-work')]
['SwallaVideo']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 569
TP_GLOBAL: 286
correct entities
[(0, 6, 'person'), (12, 23, 'group')]
['brooke', 'candy girls']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 571
TP_GLOBAL: 286
correct entities
[(36, 46, 'corporation')]
['Bellarmine']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 572
TP_GLOBAL: 286
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 572
TP_GLOBAL: 286
correct entities
[(6, 21, 'creative-work')]
['Up and Vanished']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 573
TP_GLOBAL: 286
correct entities
[(0, 12, 'creative-work')]
['Auto DJ Show']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL: 574
TP_GLOBAL: 286
correct entities
[(0, 13, 'person')]
['Jack Gilinsky']
Jack Gilinsky
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 231
FN_GLOBAL: 574
TP_GLOBAL: 287
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 231
FN_GLOBAL

Trio @ phronesismusic
ORG
ORG
Frankfurt Radio Big Band &
ORG
ORG
JulArgJazz
ORG
ORG
The Behemoth @ EditionRecords
ORG
ORG
FN: 4 FP: 3 TP: 1
FP_GLOBAL: 254
FN_GLOBAL: 623
TP_GLOBAL: 297
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 254
FN_GLOBAL: 623
TP_GLOBAL: 297
correct entities
[(2, 13, 'person')]
['CorporalMum']
CorporalMum
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 254
FN_GLOBAL: 623
TP_GLOBAL: 298
correct entities
[]
[]
Winston
PERSON
PERSON
Bun
PERSON
PERSON
FN: 0 FP: 2 TP: 0
FP_GLOBAL: 256
FN_GLOBAL: 623
TP_GLOBAL: 298
correct entities
[(5, 15, 'person'), (18, 40, 'creative-work')]
['LanaDelRey', 'Music To Watch Boys To']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 256
FN_GLOBAL: 625
TP_GLOBAL: 298
correct entities
[(5, 16, 'person')]
['YasminYonis']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 256
FN_GLOBAL: 626
TP_GLOBAL: 298
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 256
FN_GLOBAL: 626
TP_GLOBAL: 298
correct entities
[(5, 12, 'person')]
['anisasx']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 256
FN_GLOBAL

FN: 3 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 667
TP_GLOBAL: 307
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 667
TP_GLOBAL: 307
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 667
TP_GLOBAL: 307
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 667
TP_GLOBAL: 307
correct entities
[(20, 76, 'creative-work'), (114, 118, 'product')]
['Age of Defenders - Multiplayer Tower Defense and Offense', 'ipad']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 669
TP_GLOBAL: 307
correct entities
[(20, 37, 'creative-work')]
['BeautyAndTheBeast']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 670
TP_GLOBAL: 307
correct entities
[(65, 73, 'person')]
['Isabella']
Isabella
PERSON
PERSON
FN: 0 FP: 0 TP: 1
FP_GLOBAL: 260
FN_GLOBAL: 670
TP_GLOBAL: 308
correct entities
[(5, 21, 'person')]
['climb _ chairman']
FN: 1 FP: 0 TP: 0
FP_GLOBAL: 260
FN_GLOBAL: 671
TP_GLOBAL: 308
correct entities
[(2, 8, 'person'), (11, 18, 'person')]
['chabot', 'cameron']
FN: 2 FP: 0 T

SalHernandez
ORG
ORG
Brett Baier
PERSON
PERSON
Fox News
ORG
ORG
Andrew Napolitano
PERSON
PERSON
Fox News
ORG
ORG
FN: 3 FP: 0 TP: 2
FP_GLOBAL: 270
FN_GLOBAL: 724
TP_GLOBAL: 318
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 270
FN_GLOBAL: 724
TP_GLOBAL: 318
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 270
FN_GLOBAL: 724
TP_GLOBAL: 318
correct entities
[(0, 5, 'person'), (18, 38, 'creative-work'), (41, 45, 'product')]
['Alain', 'Plague Inc : Evolved', 'PS 4']
Alain 3378
PERSON
PERSON
Plague Inc
WORK_OF_ART
WORK_OF_ART
FN: 3 FP: 2 TP: 0
FP_GLOBAL: 272
FN_GLOBAL: 727
TP_GLOBAL: 318
correct entities
[(5, 15, 'person'), (137, 144, 'product')]
['bubblestbh', 'Instagr']
FN: 2 FP: 0 TP: 0
FP_GLOBAL: 272
FN_GLOBAL: 729
TP_GLOBAL: 318
correct entities
[]
[]
FN: 0 FP: 0 TP: 0
FP_GLOBAL: 272
FN_GLOBAL: 729
TP_GLOBAL: 318
correct entities
[(0, 14, 'person')]
['Curious George']
George
PERSON
PERSON
FN: 1 FP: 1 TP: 0
FP_GLOBAL: 273
FN_GLOBAL: 730
TP_GLOBAL: 318
correct entities
[]
[]
FN: 0

In [539]:
precision = tp_global/(tp_global + fp_global) 
recall= tp_global/ (tp_global + fn_global)
f1_score = 2* ((precision*recall)/(precision+recall))
print("Precision: " + str(precision))
print("Recall: " + str(recall))
print("F1 Score: " + str(f1_score))


Precision: 0.5333333333333333
Recall: 0.2965708989805375
F1 Score: 0.38117927337701013


# FLAIR

In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger

# make a sentence
sentence = Sentence('I love Berlin .')

# load the NER tagger
tagger = SequenceTagger.load('ner-ontonotes')

# run NER over sentence
tagger.predict(sentence)

2020-05-27 12:15:37,428 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/release-ner-ontonotes-0/en-ner-ontonotes-v0.4.pt not found in cache, downloading to /tmp/tmp__oypr0_


 29%|██▊       | 432476160/1510100570 [03:54<06:54, 2597845.30B/s] 

In [14]:
print(sentence)
print('The following NER tags are found:')

# iterate over entities and print
for entity in sentence.get_spans('ner'):
    print(entity)

Sentence: "I love Berlin ."   [− Tokens: 4  − Token-Labels: "I love Berlin <S-LOC> ."]
The following NER tags are found:
Span [3]: "Berlin"   [− Labels: LOC (0.9992)]


In [38]:
tagger.predict(TRAIN_DATA[0][0])

[Sentence: "@paulwalk It 's the view from where I 'm living for two weeks . Empire State Building = ESB . Pretty bad storm here last evening ."   [− Tokens: 27  − Token-Labels: "@paulwalk It 's the view from where I 'm living for two weeks . Empire <B-LOC> State <I-LOC> Building <E-LOC> = ESB <S-ORG> . Pretty bad storm here last evening ."]]

In [37]:
TRAIN_DATA[0]

["@paulwalk It 's the view from where I 'm living for two weeks . Empire State Building = ESB . Pretty bad storm here last evening .",
 {'entities': [(64, 85, 'location'), (88, 91, 'location')]}]

# TagMe

In [1]:
import tagme

In [2]:
tagme.GCUBE_TOKEN = "edcf25a7-c492-49b8-b80e-bbee015fc687-843339462"


In [11]:
%%time
lunch_annotations = tagme.annotate("My favourite meal is Mexican burritos.")


CPU times: user 13.6 ms, sys: 0 ns, total: 13.6 ms
Wall time: 546 ms


In [6]:
for ann in lunch_annotations.get_annotations(0.1):
    print(ann)

meal -> Meal (score: 0.2014230340719223)
Mexican -> Mexican cuisine (score: 0.36614900827407837)
burritos -> Burrito (score: 0.28607892990112305)


In [7]:
%%time
tomatoes_mentions = tagme.mentions("I definitely like ice cream better than tomatoes.")

for mention in tomatoes_mentions.mentions:
    print(mention)

ice cream [18,27] lp=0.18749085068702698
tomatoes [40,48] lp=0.004235605709254742
CPU times: user 14.7 ms, sys: 869 µs, total: 15.5 ms
Wall time: 627 ms


In [10]:
# Get relatedness between a pair of entities specified by title.
rels = tagme.relatedness_title(("Barack Obama", "Donald Trump"))
print("Obama and italy have a semantic relation of", rels.relatedness[0].rel)

Obama and italy have a semantic relation of 0.53215491771698
