# Named Entity Recognition

Notebook based on the work of Lee et al. ("LEAN-LIFE: A Label-Efficient Annotation Framework Towards Learning from Explanation," 2020).

Ensure that FastAPI is up and running. [instructions here](../fast_api/readme.md)

# Table of contents

1. Data prep
2. Training
3. Evaluation

 ---
 # 1. Data prep

In [1]:
# imports
import requests
import json
import numpy as np
from scipy import stats

In [2]:
FAST_API_URL = "http://localhost:9000"

---
#### Prepare data for Trigger training

In [3]:
train_with_triggers = json.load(open('explanation_IDRISI-RE-flood_tokenized.json'))
train_with_triggers[:3]

[{'text': 'Korean actress Lee Young-ae know as ‘ Changumi ’ made a contribution of USD 50,000 to support flood relief efforts in Sri Lanka',
  'label': 'O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC',
  'explanation': 'O O O O O O O O O O O O O O O O O O T-0 T-0 T-0 O O'},
 {'text': 'RT @ Vidiyallk : Government seeks # world aid for # FloodRelief ; 4 ministers appointed to work out plans for donor conference in # Colombo short',
  'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O',
  'explanation': 'O O O O O O O O O O O O O O O O O O O O O O T-0 T-0 O O O'},
 {'text': 'First phase of the @ IMCD_officials # FloodRelief program is over . Please direct dry ration donations to Rathmalana Air Force Camp . # FloodSL',
  'label': 'O O O O O O O O O O O O O O O O O O B-LOC O O O O O O',
  'explanation': 'O O O O O O O O O O O O O O O O T-0 T-0 O T-1 T-2 T-2 O O O'}]

In [4]:
len(train_with_triggers)

3484

---
#### Prepare data for Standard training

In [5]:
def remove_triggers(dataset):
    return list(map(
        lambda x: { 'text':x['text'], 'label':x['label'] },
        dataset
    ))

train_without_triggers = remove_triggers(train_with_triggers)
train_without_triggers[:3]

[{'text': 'Korean actress Lee Young-ae know as ‘ Changumi ’ made a contribution of USD 50,000 to support flood relief efforts in Sri Lanka',
  'label': 'O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC'},
 {'text': 'RT @ Vidiyallk : Government seeks # world aid for # FloodRelief ; 4 ministers appointed to work out plans for donor conference in # Colombo short',
  'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O'},
 {'text': 'First phase of the @ IMCD_officials # FloodRelief program is over . Please direct dry ration donations to Rathmalana Air Force Camp . # FloodSL',
  'label': 'O O O O O O O O O O O O O O O O O O B-LOC O O O O O O'}]

In [6]:
len(train_without_triggers)

3484

---
#### Prepare data for evaluation

In [7]:
dev_n_test = json.load(open('dev_IDRISI-RE-flood.json'))
print(len(dev_n_test))
print(dev_n_test[:3])

323
[{'text': 'Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'label': 'O O O B-LOC O O O O O O O O O O'}, {'text': 'Maldives offers financial assistance to flood hit Sri Lanka', 'label': 'O O O O O O O B-LOC I-LOC'}, {'text': '@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [8]:
size = len(dev_n_test)
dev, test = np.split(dev_n_test,
    [int(size*0.5)]
)
dev, test = list(dev), list(test)

print(len(dev)+len(test))
print(len(dev))
print(len(test))
print(dev[:3])
print(test[:3])

323
161
162
[{'text': 'Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'label': 'O O O B-LOC O O O O O O O O O O'}, {'text': 'Maldives offers financial assistance to flood hit Sri Lanka', 'label': 'O O O O O O O B-LOC I-LOC'}, {'text': '@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O'}]
[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'}, {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .', 'label': 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'}, {'text': '@ narendr

In [9]:
test_strings = list(map(
    lambda x: [x['text'], x['label']], test
))
print(len(test_strings))
print(test_strings[:3])

162
[['# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala', 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'], ['India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .', 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'], ['@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods', 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O']]


In [10]:
dev_strings = list(map(
    lambda x: [x['text'], x['label']], dev
))
print(len(dev_strings))
print(dev_strings[:3])

161
[['Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'O O O B-LOC O O O O O O O O O O'], ['Maldives offers financial assistance to flood hit Sri Lanka', 'O O O O O O O B-LOC I-LOC'], ['@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'O O O O O O O O O O O O O O O O O O O O O O O O O']]


In [11]:
test_cyclone = json.load(open('test_IDRISI-RE-cyclone.json'))

print(len(test_cyclone))
print(test_cyclone[:3])

1038
[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .', 'label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'}, {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'}, {'text': 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai', 'label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [12]:
test_cyclone_strings = list(map(
    lambda x: [x['text'], x['label']], test_cyclone
))
test_cyclone_strings[:3]

[['I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'],
 ['Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'],
 ['Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai',
  'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O']]

In [13]:
test_hurricane = json.load(open('test_IDRISI-RE-hurricane.json'))

print(len(test_hurricane))
print(test_hurricane[:3])

1038
[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .', 'label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'}, {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed', 'label': 'B-LOC O O O O O O O B-LOC O O O O O O O O'}, {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [14]:
test_hurricane_strings = list(map(
    lambda x: [x['text'], x['label']], test_hurricane
))
test_hurricane_strings[:3]

[['BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'],
 ['Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'B-LOC O O O O O O O B-LOC O O O O O O O O'],
 ['Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'O O O O O O O O O O O O O O O O O O O O O O O O']]

---
#### Prepare data for prediction

In [15]:
predict_floods = list(map(lambda x: x[0], test_strings))
predict_floods[:3]

['# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
 '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods']

In [16]:
predict_cyclone = list(map(lambda x: x[0], test_cyclone_strings))
predict_cyclone[:3]

['I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai']

In [17]:
predict_hurricane = list(map(lambda x: x[0], test_hurricane_strings))
predict_hurricane[:3]

['BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube']

---
#### Auxiliary functions for human evaluation

In [18]:
def sync_label(label, comparison, pad=False):
    tokens_lab = label.split()
    tokens_com = comparison.split()
    for i, c in enumerate(tokens_lab):
        if pad and c == "O" and tokens_com[i] != "O":
            tokens_lab[i] = "  O  "
        if c == "S-LOC":
            tokens_lab[i] = "B-LOC"
        if c == "E-LOC":
            tokens_lab[i] = "I-LOC"
    label = " ".join(tokens_lab)
    return label

In [19]:
def span_and_avgs(dists):
    if not dists:
        return None, None, None, None
    dists = sorted(dists)
    return dists[0], dists[-1], np.mean(dists), np.median(dists)

In [20]:
def synced_trigger_preds(predict, preds, test_data, pad=False):
    return list(map(lambda v,w,x,y,z:
        {"text":v, "pred_label":sync_label(w, x["label"], pad=pad), "true_label":sync_label(x["label"], w, pad=pad), "key":y, "dist":z},
        predict, preds["class_preds"], test_data, preds["trigger_preds"], preds["distance_preds"]
    ))

In [21]:
def categorize_dists(results):
    correct_dists = []
    wrong_dists = []
    false_negative_dists = []
    false_positive_dists = []
    false_mixed_dists = []
    for result in results:
        if result["pred_label"] == result["true_label"]:
            correct_dists.append(result["dist"])
            continue
        else:
            wrong_dists.append(result["dist"])
        if len(result["pred_label"]) < len(result["true_label"]):
            false_negative_dists.append(result["dist"])
            continue
        if len(result["pred_label"]) > len(result["true_label"]):
            false_positive_dists.append(result["dist"])
            continue
        false_mixed_dists.append(result["dist"])
    return correct_dists, wrong_dists, false_negative_dists, false_positive_dists, false_mixed_dists

In [22]:
def evaluate_chunk(params, instances, threshold, interval, correct):
    print(f"Interval: {threshold-interval:.2f}-{threshold:.2f}. Instances: {len(instances)}. Accuracy: {correct / len(instances):.4f}")
    response = requests.post(
        FAST_API_URL + '/training/trigger/eval/',
        json={
            'params': params,
            'eval_data': instances,
        }
    )
    print(response.text)

In [23]:
def eval_per_dist_interval(results, interval):
    threshold = interval
    correct = 0
    instances = []

    for result in results:
        while result["dist"] > threshold:
            if instances:
                evaluate_chunk(params_trigger_eval, instances, threshold, interval, correct)
                instances = []
                correct = 0
            threshold += interval
        instances.append([result["text"], result["true_label"]])
        if result["pred_label"] == result["true_label"]:
            correct += 1
    evaluate_chunk(params_trigger_eval, instances, threshold, interval, correct)

In [24]:
def entityless_predictions_per_dist_interval(results, interval):
    results = sorted(results, key=lambda k: k["dist"])
    threshold = interval
    distance_preds = {}
    
    for result in results:
        while result["dist"] > threshold:
            threshold += interval
        key = f"{threshold-interval:.2f} - {threshold:.2f}"
        p = 0
        if "B" not in result["pred_label"]:
            p = 1
        t = 0
        if "B" not in result["true_label"]:
            t = 1
        if key in distance_preds:
            distance_preds[key]["total"] += 1
            distance_preds[key]["pred"] += p
            distance_preds[key]["true"] += t
        else:
            distance_preds[key] = {"total": 1, "pred": p, "true": t}
    return distance_preds

In [25]:
def trigkey_dists(results):
    dists_by_trigkey = {}
    for result in results:
        if result["key"] not in dists_by_trigkey:
            dists_by_trigkey[result["key"]] = [result["dist"]]
        else:
            dists_by_trigkey[result["key"]].append(result["dist"])
    return dists_by_trigkey

In [26]:
def evaluate_trigkey(params, trigkey, instances, correct):
    print(f"Trigkey: {trigkey}. Instances: {len(instances)}. Accuracy: {correct / len(instances):.4f}")
    response = requests.post(
        FAST_API_URL + '/training/trigger/eval/',
        json={
            'params': params,
            'eval_data': instances,
        }
    )
    print(response.text)

In [27]:
def eval_per_trigkey(results):
    correct = 0
    instances = {}
    
    for result in results:
        trigkey = result["key"]
        if trigkey in instances:
            instances[trigkey][1].append([result["text"], result["true_label"]])
            if result["pred_label"] == result["true_label"]:
                instances[trigkey][0] += 1
        else:
            c = 0
            if result["pred_label"] == result["true_label"]:
                c = 1
            instances[trigkey] = [c, [[result["text"], result["true_label"]]]]
    
    for trigkey, instance in instances.items():
        evaluate_trigkey(params_trigger_eval, trigkey, instance[1], instance[0])

In [28]:
def entityless_predictions_per_trigkey(results):
    trigkey_preds = {}
    for result in results:
        trigkey = result["key"]
        p = 0
        if "B" not in result["pred_label"]:
            p = 1
        t = 0
        if "B" not in result["true_label"]:
            t = 1
        if trigkey in trigkey_preds:
            trigkey_preds[trigkey]["total"] += 1
            trigkey_preds[trigkey]["pred"] += p
            trigkey_preds[trigkey]["true"] += t
        else:
            trigkey_preds[trigkey] = {"total": 1, "pred": p, "true": t}
    return trigkey_preds

In [29]:
def label_phrases(dataset):
    phrases = []
    for sample in dataset:
        tokens = sample['text'].split()
        labels = sample['label'].split()
        phrase = []
        for i, label in enumerate(labels):
            if label != "O":
                if phrase and label == "B-LOC":
                    phrase = " ".join(phrase)
                    phrases.append(phrase)
                    phrase = []
                phrase.append(tokens[i])
        if phrase:
            phrase = " ".join(phrase)
            phrases.append(phrase)
    return phrases

In [30]:
def trigger_phrases(dataset):
    phrases = []
    for sample in dataset:
        tokens = sample['text'].split()
        triggers = sample['explanation'].split()
        phrase = []
        enum = 0
        for i, trigger in enumerate(triggers):
            if trigger != "O":
                if phrase and trigger[-1] != str(enum):
                    phrase = " ".join(phrase)
                    phrases.append(phrase)
                    phrase = []
                    enum += 1
                phrase.append(tokens[i])
        if phrase:
            phrase = " ".join(phrase)
            phrases.append(phrase)
    return phrases

---
#### Check dataset

In [31]:
# Unique Tweets

#--Event indices--
# Sri Lanka :429
#Midwest US 429:1595
#    Kerala 1595:2990
#  Maryland 2990:
tweets = {}
for sample in train_with_triggers:
    if sample["text"] not in tweets:
        tweets[sample["text"]] = True
len(tweets)

2281

In [32]:
# Named Entities
nameds = label_phrases(train_with_triggers)
nameds[:10]

['Sri Lanka',
 'Colombo',
 'Rathmalana',
 'srilanka',
 'srilanka',
 'Bangladesh',
 'Sri Lanka',
 'Gampaha',
 'SriLanka',
 'SriLanka']

In [33]:
len(nameds)

3106

In [34]:
phrase_count = {}
for phrase in nameds:
    if phrase in phrase_count:
        phrase_count[phrase] += 1
    else:
        phrase_count[phrase] = 1
phrase_count = sorted(list(phrase_count.items()), key=lambda k: k[1], reverse=True)
phrase_count

[('Nebraska', 708),
 ('Kerala', 549),
 ('Maryland', 258),
 ('SriLanka', 108),
 ('Ellicott City', 99),
 ('Sri Lanka', 76),
 ('Iowa', 76),
 ('India', 53),
 ('kerala', 42),
 ('srilanka', 32),
 ('Omaha', 22),
 ('Missouri', 17),
 ('Ernakulam', 17),
 ('Baltimore', 17),
 ('Kansas', 16),
 ('Chengannur', 16),
 ('Kodagu', 13),
 ('South Dakota', 12),
 ('Alappuzha', 12),
 ('UAE', 11),
 ('Karnataka', 11),
 ('Bangladesh', 10),
 ('Israel', 10),
 ('Wisconsin', 10),
 ('Pathanamthitta', 10),
 ('Howard County', 10),
 ('EllicottCity', 10),
 ('NEBRASKA', 9),
 ('Ohio', 9),
 ('Columbus', 9),
 ('Mumbai', 9),
 ('Chennai', 9),
 ('nebraska', 8),
 ('Aluva', 8),
 ('Kochi', 8),
 ('Odisha', 8),
 ('Ellicott', 8),
 ('Patapsco River', 8),
 ('China', 7),
 ('Kalutara', 7),
 ('Pakistan', 7),
 ('Indian', 7),
 ('Texas', 7),
 ('Nebraskas', 7),
 ('USA', 7),
 ('Delhi', 7),
 ('Ratnapura', 6),
 ('Florida', 6),
 ('Puerto Rico', 6),
 ('Fremont', 6),
 ('North Dakota', 6),
 ('Gujarat', 6),
 ('KERALA', 6),
 ('Maharashtra', 6),
 ('Ben

In [35]:
len(phrase_count)

529

In [36]:
named_tokens = " ".join(nameds).split()
named_tokens[:10]

['Sri',
 'Lanka',
 'Colombo',
 'Rathmalana',
 'srilanka',
 'srilanka',
 'Bangladesh',
 'Sri',
 'Lanka',
 'Gampaha']

In [37]:
len(named_tokens)

3475

In [38]:
token_count = {}
for token in named_tokens:
    if token in token_count:
        token_count[token] += 1
    else:
        token_count[token] = 1
token_count = sorted(list(token_count.items()), key=lambda k: k[1], reverse=True)
token_count

[('Nebraska', 710),
 ('Kerala', 549),
 ('Maryland', 261),
 ('City', 112),
 ('SriLanka', 109),
 ('Ellicott', 107),
 ('Sri', 84),
 ('Iowa', 78),
 ('Lanka', 77),
 ('India', 53),
 ('kerala', 42),
 ('srilanka', 32),
 ('Omaha', 23),
 ('Missouri', 20),
 ('Dakota', 18),
 ('Ernakulam', 18),
 ('Baltimore', 18),
 ('Kansas', 16),
 ('Chengannur', 16),
 ('County', 15),
 ('River', 15),
 ('Kodagu', 13),
 ('South', 12),
 ('Alappuzha', 12),
 ('Howard', 12),
 ('UAE', 11),
 ('Karnataka', 11),
 ('Bangladesh', 10),
 ('Israel', 10),
 ('Wisconsin', 10),
 ('Pathanamthitta', 10),
 ('EllicottCity', 10),
 ('NEBRASKA', 9),
 ('Ohio', 9),
 ('Columbus', 9),
 ('Mumbai', 9),
 ('Chennai', 9),
 ('nebraska', 8),
 ('North', 8),
 ('Aluva', 8),
 ('Kochi', 8),
 ('Odisha', 8),
 ('Patapsco', 8),
 ('Matara', 7),
 ('China', 7),
 ('Kalutara', 7),
 ('Pakistan', 7),
 ('Indian', 7),
 ('Texas', 7),
 ('Fremont', 7),
 ('Nebraskas', 7),
 ('USA', 7),
 ('Delhi', 7),
 ('Virginia', 7),
 ('Ratnapura', 6),
 ('Florida', 6),
 ('Puerto', 6),
 ('R

In [39]:
len(token_count)

591

In [40]:
# Trigger Entities
triggers = trigger_phrases(train_with_triggers)
triggers[:10]

['relief efforts in',
 'conference in',
 'donations to',
 'Air',
 'Force Camp',
 'donates',
 'flood victims',
 'In',
 'camp',
 'travelled from North to South of']

In [41]:
len(triggers)

5061

In [42]:
unique_trigger_entities = {}
for phrase in triggers:
    if phrase in unique_trigger_entities:
        unique_trigger_entities[phrase] += 1
    else:
        unique_trigger_entities[phrase] = 1
unique_trigger_entities = sorted(list(unique_trigger_entities.items()), key=lambda k: k[1], reverse=True)
unique_trigger_entities

[('in', 304),
 ('Nebraska', 107),
 ('Iowa', 100),
 ('floods', 89),
 ('Maryland', 72),
 ('flooding', 67),
 ('flood victims', 59),
 ('flood', 58),
 ('flooding in', 57),
 ('to', 54),
 ('state', 47),
 ('from', 46),
 ('flood relief', 39),
 ('Flood Relief', 34),
 ('floods in', 31),
 ('people of', 29),
 ('Missouri', 29),
 ('South Dakota', 28),
 ('homes', 28),
 ('roads', 27),
 ('communities', 26),
 ('Kerala', 25),
 ('states', 23),
 ('District', 21),
 ('Wisconsin', 21),
 ('Mississippi', 21),
 ('district', 20),
 ('across', 20),
 ('farmers', 19),
 ('areas', 17),
 ('Floods', 17),
 ('residents', 17),
 ('flood-hit', 16),
 ('at', 16),
 ('Wyoming', 16),
 ('relief camps', 16),
 ('Ellicott City', 16),
 ('city', 15),
 ('Montana', 15),
 ('CM', 15),
 ('Alappuzha', 15),
 ('In', 14),
 ('community', 14),
 ('town', 14),
 ('Illinois', 14),
 ('Ernakulam', 14),
 ('bridges', 13),
 ('livestock', 13),
 ('help', 12),
 ('Kansas', 12),
 ('Midwest', 12),
 ('area', 12),
 ('districts', 11),
 ('Flood', 11),
 ('houses', 11)

In [43]:
len(unique_trigger_entities)

1909

In [44]:
trigger_tokens = " ".join(triggers).split()
trigger_tokens[:10]

['relief',
 'efforts',
 'in',
 'conference',
 'in',
 'donations',
 'to',
 'Air',
 'Force',
 'Camp']

In [45]:
len(trigger_tokens)

8127

In [46]:
unique_trigger_tokens = {}
for token in trigger_tokens:
    if token in unique_trigger_tokens:
        unique_trigger_tokens[token] += 1
    else:
        unique_trigger_tokens[token] = 1
unique_trigger_tokens = sorted(list(unique_trigger_tokens.items()), key=lambda k: k[1], reverse=True)
unique_trigger_tokens

[('in', 964),
 ('flood', 277),
 ('flooding', 222),
 ('of', 204),
 ('floods', 179),
 ('to', 156),
 ('Nebraska', 155),
 ('Iowa', 124),
 ('relief', 121),
 ('from', 119),
 ('victims', 93),
 ('Flood', 89),
 ('City', 89),
 ('Maryland', 88),
 ('flash', 80),
 ('state', 74),
 ('Ellicott', 71),
 ('Relief', 57),
 ('across', 57),
 ('areas', 55),
 ('at', 54),
 ('people', 53),
 ('Dakota', 49),
 ('camps', 46),
 ('communities', 43),
 ('South', 37),
 ('homes', 37),
 ('for', 36),
 ('flood-hit', 35),
 ('Floods', 34),
 ('through', 34),
 ('Kerala', 34),
 ('Missouri', 32),
 ('states', 32),
 ('district', 31),
 ('affected', 29),
 ('roads', 29),
 ('North', 28),
 ('area', 27),
 ('District', 26),
 ('districts', 25),
 ('’', 25),
 ('s', 25),
 ('residents', 24),
 ('In', 23),
 ('counties', 23),
 ('Flooding', 23),
 ('State', 22),
 ('Flash', 22),
 ('hit', 21),
 ('Wisconsin', 21),
 ('city', 21),
 ('Mississippi', 21),
 ('Ernakulam', 20),
 ('community', 19),
 ('farmers', 19),
 ('are', 19),
 ('Alappuzha', 19),
 ('by', 17)

In [47]:
len(unique_trigger_tokens)

1409

---
---
# 2. Training

Check [this](../fast_api/json_schema.py#L516) json schema for a list of all parameters.

Check [this](../model_training/internal_api/defaults.py) for default values.

## 2.1. Training parameters

### Define training parameters

#### Standard

In [138]:
params_standard_training = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity REcognition
    "task": "ner",
    # when "True" data has to be passed, 
    # "False" when re-training or the data was processed earlier and can be retrieved
    "build_data": True,
    "num_epochs": 10,
    # training batch size
    "batch_size": 10,
    # learning rate
    "learning_rate": 0.01,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
    # random seed
    "seed": 1337,
}

#### TriggerNER

In [139]:
params_trigger_training = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity REcognition
    "task": "ner",
    # when "True" data has to be passed, 
    # "False" when re-training or the data was processed earlier and can be retrieved
    "build_data": True,
    # number of epochs
    "num_epochs": 10,
    # number of pre training epochs
    "pre_train_num_epochs": 20,
    # training batch size
    "batch_size": 10,
    # learning rate
    "learning_rate": 0.01,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
    # random seed
    "seed": 1337,
}

---
## 2.2. Model training

### Run model training

#### Standard

In [145]:
# depending on input size, and computing environment this might take time.
# please check FAST API logs for updates
response = requests.post(
    FAST_API_URL + '/training/standard/ner/api/',
    json={
        'params': params_standard_training,
        'labeled_data': train_without_triggers,
        'dev_data': dev,
        'eval_data': test,
    }
)
# JSON with "save_path" key is returned when successful
response.text

'{"save_path":"/home/yoriyari/LEAN-LIFE/model_api/fast_api/../model_training/trigger_ner/utilities/../../generated_data/saved_models/naive_idrisi_glove.6B.100d_1337_-1.0"}'

#### TriggerNER

In [50]:
# depending on input size, and computing environment this might take time.
# please check FAST API logs for updates
response = requests.post(
    FAST_API_URL + '/training/trigger/api/',
    json={
        'params': params_trigger_training,
        'explanation_triples': train_with_triggers,
        'dev_data': dev,
        'eval_data': test,
    }
)
# JSON with "save_path" key is returned when successful
response.text

'{"save_path":"/home/yoriyari/LEAN-LIFE/model_api/fast_api/../model_training/trigger_ner/utilities/../../generated_data/saved_models/trigger_idrisi_trigger_glove.6B.100d_1337_-1.0"}'

---
---
# 3. Evaluation

## 3.1. Accuracy

### Define evaluation parameters

#### Standard

In [48]:
params_standard_eval = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # evaluation batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

#### TriggerNER

In [49]:
params_trigger_eval = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # evaluation batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

---
### Evaluate Standard performance

#### Floods dev

In [50]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': dev_strings,
    }
)

response.text

'{"precision":98.75,"recall":69.60352422907489,"f1":81.65374677002585}'

#### Floods test

In [51]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_strings,
    }
)

response.text

'{"precision":96.15384615384616,"recall":52.96610169491526,"f1":68.30601092896175}'

#### Cyclone

In [52]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_cyclone_strings,
    }
)

response.text

'{"precision":91.26984126984127,"recall":8.493353028064993,"f1":15.540540540540542}'

#### Hurricane

In [53]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_hurricane_strings,
    }
)

response.text

'{"precision":97.43589743589743,"recall":18.682399213372662,"f1":31.35313531353135}'

---
### Evaluate TriggerNER performance

#### Floods dev

In [54]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': dev_strings,
    }
)

response.text

'{"precision":91.82692307692307,"recall":84.14096916299559,"f1":87.81609195402298}'

#### Floods test

In [55]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_strings,
    }
)

response.text

'{"precision":86.95652173913044,"recall":84.7457627118644,"f1":85.83690987124463}'

#### Cyclone

In [56]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_cyclone_strings,
    }
)

response.text

'{"precision":82.60416666666667,"recall":58.567208271787294,"f1":68.53932584269663}'

#### Hurricane

In [57]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_hurricane_strings,
    }
)

response.text

'{"precision":75.93423019431988,"recall":49.95083579154376,"f1":60.26097271648873}'

---
## 3.2. Predictions

### Define prediction parameters

#### Standard

In [58]:
params_standard_prediction = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # prediction batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

#### TriggerNER

In [59]:
params_trigger_prediction = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # prediction batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

---
### Fetch Standard predictions

#### Floods test

In [60]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_floods,
    }
)

preds_standard_floods = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_floods, preds_standard_floods["class_preds"], test
))

[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods',
  'pred_label': 'O O 

#### Cyclone

In [61]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_cyclone,
    }
)

preds_standard_cyclone = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_cyclone, preds_standard_cyclone["class_preds"], test_cyclone
))

[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O   O     O   O   O   O',
  'true_label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'},
 {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O   O     O   O O O O O O O   O     O  ',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'},
 {'text': 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters 

#### Hurricane

In [62]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_hurricane,
    }
)

preds_standard_hurricane = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_hurricane, preds_standard_hurricane["class_preds"], test_hurricane
))

[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'pred_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'pred_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'true_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O'},
 {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'Hurricane Dori

---
### Fetch TriggerNER predictions

#### Floods test

In [63]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_floods,
    }
)

preds_trigger_floods = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_floods, preds_trigger_floods["class_preds"], test, preds_trigger_floods["trigger_preds"], preds_trigger_floods["distance_preds"]
))

[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Peoples of',
  'dist': 0.5495629906654358},
 {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O   O   O O B-LOC O O O O O O',
  'key': 'FL',
  'dist': 0.6133739352226257},
 {'text': '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it

#### Cyclone

In [64]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_cyclone,
    }
)

preds_trigger_cyclone = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_cyclone, preds_trigger_cyclone["class_preds"], test_cyclone, preds_trigger_cyclone["trigger_preds"], preds_trigger_cyclone["distance_preds"]
))

[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O B-LOC   O   O B-LOC O',
  'true_label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O',
  'key': 'Ngos in',
  'dist': 0.8147938847541809},
 {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC   O  ',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC',
  'key': 'food relief',
  'dist': 0.3597176671028137},
 {'text': 'Thanks to staff and patrons of @ Z

#### Hurricane

In [65]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_hurricane,
    }
)

preds_trigger_hurricane = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_hurricane, preds_trigger_hurricane["class_preds"], test_hurricane, preds_trigger_hurricane["trigger_preds"], preds_trigger_hurricane["distance_preds"]
))

[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'pred_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'floods in Ellicott City',
  'dist': 0.3666766285896301},
 {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'pred_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'true_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'key': 'Strong Red Ale',
  'dist': 0.4629823565483093},
 {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'pred_label': 'O O O O O O O O O O 

---
## 3.3. Experiments

### All results

#### Floods test

In [66]:
results_floods = synced_trigger_preds(predict_floods, preds_trigger_floods, test)
results_floods = sorted(results_floods, key=lambda k: k["dist"])
results_floods

[{'text': 'RT @ CAChirag : Should Central Govt Accept Financial help of 700 Cr from UAE ? # KeralaFloods # UAE # Kerala',
  'pred_label': 'O O O O O O O O O O O O O O B-LOC O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O B-LOC',
  'key': 'relief for',
  'dist': 0.007318167015910149},
 {'text': 'The body of a National Guardsman was found in a Maryland river today , two days after he went missing while attempting to rescue a woman and her cat during devastating flash floods that swept through Ellicott City .',
  'pred_label': 'O O O O O O O O O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O',
  'true_label': 'O O O O O O O O O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O',
  'key': 'scouring',
  'dist': 0.00908314622938633},
 {'text': 'There is a # food requirement for food for 500 people at L.P school Kottappuram , # Aluva . # KeralaFloodRequest # KeralaFloodRelief Tagging @ goonj @ vinayaravind @ KaapiRight 

#### Cyclone

In [67]:
results_cyclone = synced_trigger_preds(predict_cyclone, preds_trigger_cyclone, test_cyclone)
results_cyclone = sorted(results_cyclone, key=lambda k: k["dist"], reverse=False)
results_cyclone

[{'text': 'RT @ zenzele : Anyone who has set up drop off points where citizens can drop off clothes , dried food tents etc for # CycloneIdai victims',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Extension',
  'dist': 0.010127319023013115},
 {'text': 'RT @ BigTimmz : 300 bodies of Zim cyclone victims floating in Mozambique .. local government minister July Moyo -',
  'pred_label': 'O O O O O O O O O O O O B-LOC O O O O O O O',
  'true_label': 'O O O O O O O O O O O O B-LOC O O O O O O O',
  'key': 'flood levels',
  'dist': 0.03203997761011124},
 {'text': 'The Chinese community in Zimbabwe donated cash and goods valued at $ 200,000 during a handover ceremony on Wed to help the victims of # CycloneIdai , a tropical storm that wreaked havoc in the country during the past week .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',


#### Hurricane

In [68]:
results_hurricane = synced_trigger_preds(predict_hurricane, preds_trigger_hurricane, test_hurricane)
results_hurricane = sorted(results_hurricane, key=lambda k: k["dist"])
results_hurricane

[{'text': 'The governor is requesting the availability of federal resources , should Team South Carolina need them , to assist with hurricane preparation efforts .',
  'pred_label': 'O O O O O O O O O O O O O B-LOC O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O',
  'key': 'rivers of',
  'dist': 0.09380368888378143},
 {'text': 'The increase in powerful lightning is becoming very alarming . Is this a global warming symptom ? People and animals are getting killed and injured much more than weve seen ever .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'southwest Iowa',
  'dist': 0.10237621515989304},
 {'text': 'With Hurricane Dorian threatening much of Florida , the Jewish Volunteer Center needs your help . JVC is seeking compassionate people to join the Jewish Volunteer Emergency Response Team ( JVERT ) to assis

---
### Error Analysis

#### Standard Floods test

In [69]:
errors_standard_floods = list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_floods, preds_standard_floods["class_preds"], test
))
errors_standard_floods = [t for t in errors_standard_floods if t["pred_label"] != t["true_label"]]
errors_standard_floods

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': 'FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  'pred_label': 'O O O O   O   O O O   O   O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O',
  'true_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O'},
 {'text': 'Bollywood Khans donated crores to Pakistan floods now silent on Kerala floods . Proud of u Prabhas',
  'pred_label': 'O O O O O   O   O O O O B-LOC O O O O O O',
  'true_label': 'O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': '. @ narendramodi declin

#### TriggerNER Floods test

In [70]:
errors_trigger_floods = synced_trigger_preds(predict_floods, preds_trigger_floods, test, pad=True)
errors_trigger_floods = [t for t in errors_trigger_floods if t["pred_label"] != t["true_label"]]
errors_trigger_floods

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O   O   O O B-LOC O O O O O O',
  'key': 'FL',
  'dist': 0.6133739352226257},
 {'text': '. @ narendramodi declining UAE monetary aid for # KeralaFloodRelief n releasing insufficient Funds to # RebuildKerala is a case of ◼️CARDINAL SIN ◼️POLITICAL HARAKIRI ◼️ACT OF INHUMANITY ◼️TERRIBLE GOVERNANCE ◼️GOVTs ASSAULT ON CITIZENS ◼️MENTAL BANKRUPTCY OF BJP GOVT',
  'pred_label': 'O O O O   O   O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'landslides hit',
  'dist': 0.5692219138145447},
 {'text': 'Comrade Saji Cherian , our MLA from Chengannur constituency who is participating in 

#### Union of Standard and TriggerNER Floods test

In [71]:
errors = []
for s in errors_standard_floods:
    overlap = False
    for t in errors_trigger_floods:
        if s["text"] == t["text"]:
            errors.append({"text":s["text"], "true_label":s["true_label"], "stnd_label":s["pred_label"], "trig_label":t["pred_label"]})
            overlap = True
            break
    if not overlap:
        errors.append({"text":s["text"], "true_label":s["true_label"], "stnd_label":s["pred_label"], "trig_label":s["true_label"]})
for t in errors_trigger_floods:
    if t["text"] in [error["text"] for error in errors]:
        continue
    overlap = False
    for s in errors_standard_floods:
        if s["text"] == t["text"]:
            errors.append({"text":t["text"], "true_label":t["true_label"], "stnd_label":s["pred_label"], "trig_label":t["pred_label"]})
            overlap = True
            break
    if not overlap:
        errors.append({"text":t["text"], "true_label":t["true_label"], "stnd_label":t["true_label"], "trig_label":t["pred_label"]})
errors

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'true_label': '  O   O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O',
  'stnd_label': 'B-LOC O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'trig_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O'},
 {'text': 'FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  'true_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O',
  'stnd_label': 'O O O O   O   O O O   O   O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O',
  'trig_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O'},
 {'text': 'Bollywood Khans donated crores to Pakistan floods now silent on Kerala flood

#### Entities which were correctly predicted by one model but not the other

In [72]:
detected_by_standard = []
detected_by_triggerner = []
hold_for_i_loc = None
for error in errors:
    tokens = error["text"].split()
    true = error["true_label"].split()
    stnd = error["stnd_label"].split()
    trig = error["trig_label"].split()
    for i, label in enumerate(true):
        if label.startswith("B-"):
            if hold_for_i_loc:
                if hold_for_i_loc[0]:
                    detected_by_standard.append(hold_for_i_loc[1:])
                else:
                    detected_by_triggerner.append(hold_for_i_loc[1:])
                hold_for_i_loc = None
            if label == stnd[i] and label != trig[i]:
                hold_for_i_loc = (True, error["text"], i, tokens[i])
            if label == trig[i] and label != stnd[i]:
                hold_for_i_loc = (False, error["text"], i, tokens[i])
        if label == "O" and hold_for_i_loc:
            if hold_for_i_loc[0]:
                detected_by_standard.append(hold_for_i_loc[1:])
            else:
                detected_by_triggerner.append(hold_for_i_loc[1:])
            hold_for_i_loc = None
        if label.startswith("I-") and hold_for_i_loc:
            if hold_for_i_loc[0] and label != stnd[i]:
                hold_for_i_loc = None
            elif not hold_for_i_loc[0] and label != trig[i]:
                hold_for_i_loc = None
print(len(detected_by_standard))
print(len(detected_by_triggerner))

6
85


In [73]:
detected_by_standard

[('# KeralaSOS Kadavantra regional sports club , ( kadavantra ) has excess food so in case some other camps which are in need can contact them Phone no : -9645221111 , 9061110000 # kerala # KeralaFloods # KeralaFloodRelief # KeralaFloodRescue @ CMOKerala # KeralaReliefFund # verified',
  33,
  'kerala'),
 ('The devastating flooding in Maryland over Memorial Day Weekend not only sent a wall of water through downtown Ellicott City , Md. , but reignited a debate over global warming and floods .',
  21,
  'Md.'),
 ('I did my part for our fellow Indians . # Kerala # KeralaFloodRelief # KeralaFloods # donate # whatyoucan',
  10,
  'Kerala'),
 ('Earlier # Apple also donated ₹ 7 Crore for Kerala Flood Relief . # Kerala # KeralaFloodRelief # GoogleForIndia',
  14,
  'Kerala'),
 ('RT @ CAChirag : Should Central Govt Accept Financial help of 700 Cr from UAE ? # KeralaFloods # UAE # Kerala',
  21,
  'Kerala'),
 ('TVM UPDATE : SUPPLIES NEEDED AT KINFRA APPAREL PARK READ TO EAT FOOD PACKETS AND WATE

In [74]:
detected_by_triggerner

[('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  11,
  'Kerala'),
 ('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  16,
  'India'),
 ('FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  4,
  'KERALA'),
 ('FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  8,
  'Bangalore'),
 ('Bollywood Khans donated crores to Pakistan floods now silent on Kerala floods . Proud of u Prabhas',
  5,
  'Pakistan'),
 ('More than 220,000 people in Kerala , India have now been displaced by MASSIVE FLOOD

#### False positives

In [75]:
fps_by_standard = []
fps_by_triggerner = []
hold_for_i_loc = None
for error in errors:
    tokens = error["text"].split()
    true = error["true_label"].split()
    stnd = error["stnd_label"].split()
    trig = error["trig_label"].split()
    for i, label in enumerate(true):
        if label == "O":
            if stnd[i].startswith("B-"):
                fps_by_standard.append((error["text"], i, tokens[i]))
            if trig[i].startswith("B-"):
                fps_by_triggerner.append((error["text"], i, tokens[i]))
print(len(fps_by_standard))
print(len(fps_by_triggerner))

4
22


In [76]:
fps_by_standard

[('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  0,
  'India'),
 ('RT @ dinesh_rajini : Anyone please help them . # KeralaSOS , # KeralaFloods , # KeralaFloods , # Kerala , # KeralaFloodRelief',
  19,
  'Kerala'),
 ('My brothers , aunt and Grand-Parents are stuck in Moozhikkakadavu_pariyaram , Chalakkudy . Location - 10.308208,76.351140 . Phone - +918075659446 . Please RT so some1 can help them . Grandparents health getting worse . Havent had proper food in 2 days . # KeralaFloods # KeralaSOS',
  9,
  'Moozhikkakadavu_pariyaram'),
 ('2 ) A team of swayamsevaks takes care of providing food and shelter , especially in Kendriya Vidyalayas and other schools where many have taken refuge . # Sewavibhag # KeralaFloodRelief',
  16,
  'Kendriya')]

In [77]:
fps_by_triggerner

[('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  0,
  'India'),
 ('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  13,
  'Thailands'),
 ('# BhopalWithKerala Here is the list of various collection centres at Bhopal where you can donate materials for the flood victims of Kerala . The collection center at AIIMS is at SU Office . Contact Sajith ( 9400480126 ) if you wish to drop your donations at AIIMS . # KeralaReliefFund',
  31,
  'SU'),
 ('As a relief measure to the people of severely flood hit Kerala , 9 lakhs litres of filtered drinking water sent to kerala by a water special train from Ratlam to Palghat.All 15 tanks filled with potable water & quality of water of each tank tested # helpinghand # KeralaFloodRelief',
  31,
  'Palghat.All'),
 ('A Family Need urgent Help . contact number:81138

---
### Distance ranges and averages

#### Floods test

In [78]:
correct, wrong, fns, fps, mixed = categorize_dists(results_floods)

print("ALL:", span_and_avgs(preds_trigger_floods["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.007318167015910149, 1.5393142700195312, 0.562642913484187, 0.5306781530380249)
COR: (0.00908314622938633, 1.532187819480896, 0.5672643788986735, 0.5147483944892883) 117
WRO: (0.007318167015910149, 1.5393142700195312, 0.550627103406522, 0.5564592480659485) 45
FNs: (0.03536854684352875, 1.5393142700195312, 0.5632911107756875, 0.5615236163139343)
FPs: (0.024331141263246536, 1.221520185470581, 0.5562866181135178, 0.5145175457000732)
MIX: (0.007318167015910149, 0.6964780688285828, 0.35189811792224646, 0.35189811792224646)


#### Cyclone

In [79]:
correct, wrong, fns, fps, mixed = categorize_dists(results_cyclone)

print("ALL:", span_and_avgs(preds_trigger_cyclone["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.010127319023013115, 1.9497393369674683, 0.582924752248548, 0.5292729139328003)
COR: (0.010127319023013115, 1.5650776624679565, 0.5855863977587407, 0.5325711369514465) 553
WRO: (0.08894459903240204, 1.9497393369674683, 0.5798899275740398, 0.5202752351760864) 485
FNs: (0.08894459903240204, 1.9497393369674683, 0.5736643144289653, 0.5171010494232178)
FPs: (0.1969078779220581, 1.8872004747390747, 0.6167959782141673, 0.5134379863739014)
MIX: (0.1928565800189972, 1.5596479177474976, 0.5611488607621962, 0.5384873151779175)


#### Hurricane

In [80]:
correct, wrong, fns, fps, mixed = categorize_dists(results_hurricane)

print("ALL:", span_and_avgs(preds_trigger_hurricane["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.09380368888378143, 1.577452301979065, 0.5984589654040704, 0.5394184291362762)
COR: (0.10237621515989304, 1.577452301979065, 0.5997721154074837, 0.5397631525993347) 599
WRO: (0.09380368888378143, 1.5160787105560303, 0.5966672185884793, 0.5390737056732178) 439
FNs: (0.09380368888378143, 1.487603783607483, 0.5858266129028423, 0.5319508910179138)
FPs: (0.20924046635627747, 1.4783200025558472, 0.631042383722405, 0.615945041179657)
MIX: (0.23599475622177124, 1.5160787105560303, 0.6681870534306481, 0.5284000039100647)


---
### Distance ranges per trigger key

#### All test datasets

In [81]:
trigkey_dists_all = trigkey_dists(results_floods+results_cyclone+results_hurricane)

In [82]:
trigkey_dists_all_sorted = sorted(trigkey_dists_all.items(), key=lambda k: len(k[1]), reverse=True)
for key, value in trigkey_dists_all_sorted:
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

aid for (62 instances)
(0.3565983176231384, 0.6640754342079163, 0.4974628773427779, 0.49907051026821136)
Center (51 instances)
(0.2686313986778259, 0.7943455576896667, 0.517889106390523, 0.5126776695251465)
surrounding (47 instances)
(0.1928565800189972, 0.8392031192779541, 0.4840906297272824, 0.4756549894809723)
has been send off to (35 instances)
(0.3808169364929199, 0.8775812387466431, 0.5031196406909397, 0.4841640591621399)
destroyed state (29 instances)
(0.31480714678764343, 0.6141843199729919, 0.4854974232870957, 0.5217439532279968)
flooding in parts of eastern (25 instances)
(0.20889054238796234, 0.7124873399734497, 0.44010600984096526, 0.4087555706501007)
ChildFund (23 instances)
(0.1307573914527893, 0.8186234831809998, 0.4128246896940729, 0.3879714012145996)
in flood affected regions of (23 instances)
(0.47596850991249084, 1.007007360458374, 0.5964240807553997, 0.5369107723236084)
HADR operations in (20 instances)
(0.12022939324378967, 0.5354222655296326, 0.3651120975613594, 0

#### Floods test

In [83]:
trigkey_dists_floods = trigkey_dists(results_floods)

In [84]:
trigkey_dists_floods_sorted = sorted(trigkey_dists_floods.items(), key=lambda k: len(k[1]), reverse=True)
for key, value in trigkey_dists_floods_sorted:
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

Peoples of (4 instances)
(0.3373427987098694, 0.637721061706543, 0.476185567677021, 0.4648392051458359)
in Vypeen (2 instances)
(0.024331141263246536, 0.3470855951309204, 0.18570836819708347, 0.18570836819708347)
HADR operations in (2 instances)
(0.12022939324378967, 0.5354222655296326, 0.3278258293867111, 0.3278258293867111)
will donate (2 instances)
(0.16911208629608154, 0.2378338873386383, 0.20347298681735992, 0.20347298681735992)
sends medical assistance (2 instances)
(0.21371586620807648, 0.5519764423370361, 0.3828461542725563, 0.3828461542725563)
Sri . (2 instances)
(0.22623126208782196, 0.34657222032546997, 0.28640174120664597, 0.28640174120664597)
Highway 00 near Bellevue (2 instances)
(0.27785244584083557, 0.3999151587486267, 0.33888380229473114, 0.33888380229473114)
Kottayam district of (2 instances)
(0.29781386256217957, 0.731507420539856, 0.5146606415510178, 0.5146606415510178)
people from (2 instances)
(0.30389153957366943, 0.3078947961330414, 0.3058931678533554, 0.3058931

#### Cyclone

In [85]:
trigkey_dists_cyclone = trigkey_dists(results_cyclone)

In [86]:
trigkey_dists_cyclone_sorted = sorted(trigkey_dists_cyclone.items(), key=lambda k: len(k[1]), reverse=True)
for key, value in trigkey_dists_cyclone_sorted:
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

surrounding (31 instances)
(0.1928565800189972, 0.8392031192779541, 0.4606323295062588, 0.4517415463924408)
destroyed state (25 instances)
(0.31480714678764343, 0.6141843199729919, 0.4877553606033325, 0.5335791110992432)
aid for (22 instances)
(0.3881165385246277, 0.6367505788803101, 0.5115425126119093, 0.5105254948139191)
ChildFund (21 instances)
(0.1307573914527893, 0.7726932764053345, 0.3980543975319181, 0.3879714012145996)
in flood affected regions of (20 instances)
(0.47596850991249084, 0.7853813767433167, 0.560721592605114, 0.5368008613586426)
Center (18 instances)
(0.2686313986778259, 0.7943455576896667, 0.5189210457934273, 0.494525209069252)
area surroundings of (13 instances)
(0.32943853735923767, 0.5695584416389465, 0.4218709147893466, 0.4203476309776306)
Missing from (12 instances)
(0.3081904947757721, 0.5464975833892822, 0.4218696281313896, 0.40770086646080017)
group (11 instances)
(0.3554522693157196, 0.7152473330497742, 0.4753641757098111, 0.44837117195129395)
Aryanadu pr

#### Hurricane

In [87]:
trigkey_dists_hurricane = trigkey_dists(results_hurricane)

In [88]:
trigkey_dists_hurricane_sorted = sorted(trigkey_dists_hurricane.items(), key=lambda k: len(k[1]), reverse=True)
for key, value in trigkey_dists_hurricane_sorted:
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

aid for (38 instances)
(0.3565983176231384, 0.6640754342079163, 0.4904068590779054, 0.4923182278871536)
Center (33 instances)
(0.3028624355792999, 0.7061203718185425, 0.5173262303525751, 0.5153437852859497)
has been send off to (25 instances)
(0.3808169364929199, 0.8775812387466431, 0.5115613758563995, 0.4841640591621399)
flooding in parts of eastern (19 instances)
(0.31886687874794006, 0.7124873399734497, 0.44069521834975794, 0.4078202247619629)
surrounding (16 instances)
(0.2814282178878784, 0.8168511986732483, 0.5295410864055157, 0.533111184835434)
People (14 instances)
(0.4073941111564636, 0.8640113472938538, 0.556374151791845, 0.5603800415992737)
Maldives (10 instances)
(0.2995936870574951, 0.5694178342819214, 0.37046225368976593, 0.3593597859144211)
HADR operations in (9 instances)
(0.287171870470047, 0.4643925428390503, 0.3705599341127608, 0.37912577390670776)
Attorney General (9 instances)
(0.4013945758342743, 0.8690780997276306, 0.6196030080318451, 0.6690441966056824)
Air Nati

(0.23794059455394745, 0.6004983186721802, 0.4192194566130638, 0.4192194566130638)
provide help in (2 instances)
(0.24570849537849426, 0.34429043531417847, 0.29499946534633636, 0.29499946534633636)
Extension (2 instances)
(0.2702694535255432, 0.7187539935112, 0.4945117235183716, 0.4945117235183716)
Welcome (2 instances)
(0.2704755663871765, 0.35555121302604675, 0.31301338970661163, 0.31301338970661163)
districts in (2 instances)
(0.2719154357910156, 0.42459991574287415, 0.3482576757669449, 0.3482576757669449)
in flood-ravaged (2 instances)
(0.2727722227573395, 0.3115840256214142, 0.29217812418937683, 0.29217812418937683)
Nebraska City to (2 instances)
(0.2747078537940979, 0.3673626780509949, 0.3210352659225464, 0.3210352659225464)
Flood Relief Program (2 instances)
(0.2780955135822296, 0.3104138672351837, 0.29425469040870667, 0.29425469040870667)
Kottayam district of (2 instances)
(0.29937291145324707, 0.32538318634033203, 0.31237804889678955, 0.31237804889678955)
Seen In (2 instances)


---
### Performance per trigger key

#### All test datasets

In [89]:
eval_per_trigkey(results_floods+results_cyclone+results_hurricane)

Trigkey: relief for. Instances: 3. Accuracy: 0.6667
{"precision":66.66666666666666,"recall":66.66666666666666,"f1":66.66666666666666}
Trigkey: scouring. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: in Vypeen. Instances: 11. Accuracy: 0.3636
{"precision":73.33333333333333,"recall":52.38095238095239,"f1":61.11111111111111}
Trigkey: trip to. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: situation in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Department of Health and Human Services. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: is facing. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: one. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: flood relief work. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: HADR operatio

{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: camp at. Instances: 11. Accuracy: 0.5455
{"precision":90.9090909090909,"recall":71.42857142857143,"f1":80.00000000000001}
Trigkey: hands over. Instances: 15. Accuracy: 0.6667
{"precision":100.0,"recall":72.22222222222221,"f1":83.87096774193547}
Trigkey: in the district. Instances: 4. Accuracy: 0.2500
{"precision":75.0,"recall":60.0,"f1":66.66666666666667}
Trigkey: visited flood-affected areas. Instances: 9. Accuracy: 0.5556
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: at EDAYARANMULA. Instances: 2. Accuracy: 0.5000
{"precision":66.66666666666666,"recall":66.66666666666666,"f1":66.66666666666666}
Trigkey: parts of flood affected areas in. Instances: 16. Accuracy: 0.7500
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Aid. Instances: 5. Accuracy: 0.4000
{"precision":100.0,"recall":55.55555555555556,"f1":71.42857142857143}
Trigkey: Flash Floods Surge Through Town in. Instances: 1. Accuracy: 0.0000
{

{"precision":63.63636363636363,"recall":63.63636363636363,"f1":63.63636363636363}
Trigkey: surrounding. Instances: 47. Accuracy: 0.4043
{"precision":72.91666666666666,"recall":53.84615384615385,"f1":61.94690265486726}
Trigkey: headed to Fullerton. Instances: 8. Accuracy: 0.8750
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Aryanadu province of. Instances: 13. Accuracy: 0.5385
{"precision":71.42857142857143,"recall":76.92307692307693,"f1":74.07407407407409}
Trigkey: Sappers. Instances: 3. Accuracy: 0.3333
{"precision":83.33333333333334,"recall":83.33333333333334,"f1":83.33333333333334}
Trigkey: landslides in State Of. Instances: 4. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: military ship. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: villages. Instances: 8. Accuracy: 0.6250
{"precision":83.33333333333334,"recall":71.42857142857143,"f1":76.92307692307693}
Trigkey: map. Instances: 6. Accuracy: 0.5000
{"precision":100.0,"rec

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Panama City. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: working in. Instances: 13. Accuracy: 0.0769
{"precision":57.14285714285714,"recall":30.76923076923077,"f1":40.0}
Trigkey: American Red Cross. Instances: 3. Accuracy: 0.3333
{"precision":60.0,"recall":42.857142857142854,"f1":49.99999999999999}
Trigkey: under water. Instances: 8. Accuracy: 0.6250
{"precision":66.66666666666666,"recall":80.0,"f1":72.72727272727272}
Trigkey: despatched to. Instances: 9. Accuracy: 0.5556
{"precision":100.0,"recall":28.57142857142857,"f1":44.44444444444444}
Trigkey: evacuation plan. Instances: 5. Accuracy: 0.2000
{"precision":75.0,"recall":50.0,"f1":60.0}
Trigkey: nuclear power plant. Instances: 2. Accuracy: 0.5000
{"precision":50.0,"recall":66.66666666666666,"f1":57.14285714285714}
Trigkey: rescue. Instances: 3. Accuracy: 0.3333
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: am from East. Inst

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Haj Committee. Instances: 4. Accuracy: 0.5000
{"precision":83.33333333333334,"recall":71.42857142857143,"f1":76.92307692307693}
Trigkey: Base outside of Omaha. Instances: 5. Accuracy: 0.2000
{"precision":80.0,"recall":44.44444444444444,"f1":57.142857142857146}
Trigkey: will be going to. Instances: 4. Accuracy: 0.5000
{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: Attorney General. Instances: 12. Accuracy: 0.7500
{"precision":80.0,"recall":66.66666666666666,"f1":72.72727272727272}
Trigkey: Air Force. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: rose in. Instances: 4. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: has been asked. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: rice. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Office. Instances: 1. Accuracy: 1.0000
{"

{"precision":75.0,"recall":60.0,"f1":66.66666666666667}
Trigkey: flood affected Pandalam. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: community in. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: farmer. Instances: 9. Accuracy: 0.6667
{"precision":83.33333333333334,"recall":62.5,"f1":71.42857142857143}
Trigkey: headquarters in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Floods affected people. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: ’ s radio. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: made in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: contribute. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: state of emergency was. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}

{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: Raising Canes. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: Districts in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Locals in. Instances: 2. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: goods in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Cattlemens. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: ’ s coffee output. Instances: 2. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: blizzard in western. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flash floods in Ellicott City. Instances: 3. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Southern. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: state electricity board officer. Instances

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Health Minister. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Correctional Youth Institute. Instances: 1. Accuracy: 0.0000
{"precision":66.66666666666666,"recall":100.0,"f1":80.0}
Trigkey: is flood hit. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: area of. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: searching. Instances: 2. Accuracy: 0.5000
{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: reservation. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: victim.all the way from. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: FLOOD WATERS in. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: floods struck. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood affected area. Instances: 1. Ac

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: leaving from. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Adamannu. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}


#### Floods test

In [90]:
eval_per_trigkey(results_floods)

Trigkey: relief for. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: scouring. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: in Vypeen. Instances: 2. Accuracy: 0.5000
{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: trip to. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: situation in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Department of Health and Human Services. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: is facing. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: one. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: flood relief work. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: HADR operations in. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0

{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Flash Floods Surge Through Town in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: floods across. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: FL. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":100.0,"f1":66.66666666666667}
Trigkey: its downtown. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Medicine Flood Relief Donation Management Center. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: needed.Drop. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: house is in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: omaha. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: families in flood hit. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":10

#### Cyclone

In [91]:
eval_per_trigkey(results_cyclone)

Trigkey: Extension. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood levels. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: eastern. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: displaced. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: other countries. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: southwest Iowa. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: ChildFund. Instances: 21. Accuracy: 0.6190
{"precision":73.33333333333333,"recall":61.111111111111114,"f1":66.66666666666667}
Trigkey: Farm Hotline. Instances: 7. Accuracy: 0.5714
{"precision":72.72727272727273,"recall":61.53846153846154,"f1":66.66666666666667}
Trigkey: county seat of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: HADR operations in. Instances: 9. Ac

{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: omaha. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: development. Instances: 10. Accuracy: 0.4000
{"precision":90.9090909090909,"recall":58.82352941176471,"f1":71.42857142857143}
Trigkey: Victims in. Instances: 4. Accuracy: 0.5000
{"precision":50.0,"recall":20.0,"f1":28.571428571428573}
Trigkey: Residents of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: CPIM of. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: hospitals. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: flood disaster in. Instances: 4. Accuracy: 0.5000
{"precision":100.0,"recall":25.0,"f1":40.0}
Trigkey: death toll in. Instances: 7. Accuracy: 0.4286
{"precision":75.0,"recall":23.076923076923077,"f1":35.29411764705882}
Trigkey: in Howard County. Instances: 2. Accuracy: 0.0000
{"precision":33.3333333333333

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: From. Instances: 4. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: ’ s Rural America Relief. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Forecast. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood in Kerala. Instances: 9. Accuracy: 0.5556
{"precision":80.0,"recall":50.0,"f1":61.53846153846154}
Trigkey: historic flood in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Medicine Flood Relief Donation Management Center. Instances: 4. Accuracy: 0.2500
{"precision":100.0,"recall":28.57142857142857,"f1":44.44444444444444}
Trigkey: Air National Guard. Instances: 5. Accuracy: 0.4000
{"precision":87.5,"recall":58.333333333333336,"f1":70.0}
Trigkey: Flash floods struck. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: flood crisis in. Instances: 1. Accuracy: 1.0000
{"precis

{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Trigkey: RELIEF. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: washed out parts of. Instances: 3. Accuracy: 0.3333
{"precision":75.0,"recall":100.0,"f1":85.71428571428571}
Trigkey: flash flooding sweeps through Ellicott City. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: donation site at. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Flooding Slams. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":80.0,"f1":88.88888888888889}
Trigkey: operations in. Instances: 1. Accuracy: 0.0000
{"precision":66.66666666666666,"recall":66.66666666666666,"f1":66.66666666666666}
Trigkey: who. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: attack where. Instances: 4. Accuracy: 0.0000
{"precision":40.0,"recall":50.0,"f1":44.44444444444444}
Trigkey: Controlled Burn Season. Instances: 1. Accuracy: 

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: coming in from. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: team in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: its Main Street. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: tv stations. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Seen In. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: financial aid to. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: at EDAYARANMULA. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: sounds alert. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: headed out to. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: relief centers allover. Instances: 1. Accuracy: 0.0000
{"precisio

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: blizzard in western. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flash floods in Ellicott City. Instances: 3. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Southern. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: state electricity board officer. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: coasts. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Meet me in. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: disasters. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Missing in. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}


#### Hurricane

In [92]:
eval_per_trigkey(results_hurricane)

Trigkey: rivers of. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: southwest Iowa. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Homes. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: foreign aid. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: From. Instances: 5. Accuracy: 0.2000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: landslides in State Of. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: lived in. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: hands over. Instances: 8. Accuracy: 0.6250
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: Farm Hotline. Instances: 5. Accuracy: 0.2000
{"precision":100.0,"recall":25.0,"f1":40.0}
Trigkey: CPIM of. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Flash Flood i

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: NGO in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: hospital in Kearney. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: set out from. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Ministers. Instances: 3. Accuracy: 0.3333
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: visited flood-affected areas. Instances: 6. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: From Lincoln. Instances: 7. Accuracy: 0.4286
{"precision":83.33333333333334,"recall":45.45454545454545,"f1":58.823529411764696}
Trigkey: Missing from. Instances: 8. Accuracy: 0.5000
{"precision":60.0,"recall":37.5,"f1":46.15384615384615}
Trigkey: times in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: signs. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: ai

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Manitoba to. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Victims in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: cleaned in. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":100.0,"f1":66.66666666666667}
Trigkey: house in. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: Veterans. Instances: 4. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Dakotas. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: folks of. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: populations. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: taken. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Peoples. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":20.0

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: another country. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: agencies. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":25.0,"f1":40.0}
Trigkey: ’ s Lobby Day For Planned Parenthood. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Synod Disaster relief. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Health Minister. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: set up in. Instances: 7. Accuracy: 0.7143
{"precision":100.0,"recall":62.5,"f1":76.92307692307692}
Trigkey: Correctional Youth Institute. Instances: 1. Accuracy: 0.0000
{"precision":66.66666666666666,"recall":100.0,"f1":80.0}
Trigkey: landed at. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: is flood hit. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: country of. Inst

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: store in Hays. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood-ravaged Valley. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Farm. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: sounds alert. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: torrents devastate. Instances: 3. Accuracy: 0.3333
{"precision":66.66666666666666,"recall":40.0,"f1":49.99999999999999}
Trigkey: ground. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: has extended. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: other countries. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: tornado drill day for Iowa. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: States. Instances: 1. Accuracy: 0.0000
{"precision":

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: declared in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: leaving from. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Southern. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Adamannu. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}


---
### Performance per distance interval

#### Floods test

In [93]:
eval_per_dist_interval(results_floods, 0.1)

Interval: 0.00-0.10. Instances: 9. Accuracy: 0.5556
{"precision":76.92307692307693,"recall":76.92307692307693,"f1":76.92307692307693}
Interval: 0.10-0.20. Instances: 6. Accuracy: 0.6667
{"precision":85.71428571428571,"recall":75.0,"f1":80.0}
Interval: 0.20-0.30. Instances: 11. Accuracy: 0.6364
{"precision":82.35294117647058,"recall":82.35294117647058,"f1":82.35294117647058}
Interval: 0.30-0.40. Instances: 23. Accuracy: 0.9130
{"precision":95.45454545454545,"recall":91.30434782608695,"f1":93.33333333333334}
Interval: 0.40-0.50. Instances: 22. Accuracy: 0.7273
{"precision":86.20689655172413,"recall":83.33333333333334,"f1":84.7457627118644}
Interval: 0.50-0.60. Instances: 24. Accuracy: 0.6250
{"precision":86.20689655172413,"recall":75.75757575757575,"f1":80.64516129032258}
Interval: 0.60-0.70. Instances: 24. Accuracy: 0.7083
{"precision":82.05128205128204,"recall":88.88888888888889,"f1":85.33333333333333}
Interval: 0.70-0.80. Instances: 12. Accuracy: 0.5833
{"precision":92.85714285714286,

#### Cyclone

In [94]:
eval_per_dist_interval(results_cyclone, 0.1)

Interval: 0.00-0.10. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Interval: 0.10-0.20. Instances: 24. Accuracy: 0.7500
{"precision":75.0,"recall":60.0,"f1":66.66666666666667}
Interval: 0.20-0.30. Instances: 59. Accuracy: 0.5932
{"precision":85.71428571428571,"recall":65.06024096385542,"f1":73.97260273972603}
Interval: 0.30-0.40. Instances: 138. Accuracy: 0.5507
{"precision":86.0655737704918,"recall":60.69364161849711,"f1":71.18644067796609}
Interval: 0.40-0.50. Instances: 221. Accuracy: 0.4389
{"precision":80.60606060606061,"recall":48.717948717948715,"f1":60.730593607305934}
Interval: 0.50-0.60. Instances: 240. Accuracy: 0.5417
{"precision":80.82191780821918,"recall":57.096774193548384,"f1":66.91871455576559}
Interval: 0.60-0.70. Instances: 110. Accuracy: 0.5455
{"precision":84.12698412698413,"recall":63.85542168674698,"f1":72.60273972602738}
Interval: 0.70-0.80. Instances: 74. Accuracy: 0.5270
{"precision":85.71428571428571,"recall":60.0,"f1

#### Hurricane

In [95]:
eval_per_dist_interval(results_hurricane, 0.1)

Interval: 0.00-0.10. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Interval: 0.10-0.20. Instances: 16. Accuracy: 0.5000
{"precision":100.0,"recall":28.57142857142857,"f1":44.44444444444444}
Interval: 0.20-0.30. Instances: 62. Accuracy: 0.6452
{"precision":75.55555555555556,"recall":59.64912280701754,"f1":66.66666666666666}
Interval: 0.30-0.40. Instances: 142. Accuracy: 0.5915
{"precision":74.52830188679245,"recall":57.66423357664233,"f1":65.02057613168724}
Interval: 0.40-0.50. Instances: 214. Accuracy: 0.5327
{"precision":81.03448275862068,"recall":43.51851851851852,"f1":56.626506024096386}
Interval: 0.50-0.60. Instances: 204. Accuracy: 0.5980
{"precision":75.0,"recall":49.23076923076923,"f1":59.44272445820434}
Interval: 0.60-0.70. Instances: 119. Accuracy: 0.5042
{"precision":67.3913043478261,"recall":48.4375,"f1":56.36363636363637}
Interval: 0.70-0.80. Instances: 81. Accuracy: 0.6543
{"precision":78.33333333333333,"recall":60.256410256410255,"f1":68.115942028

---
### Mann-Whitney U Test

#### Between datasets' overall distances

In [96]:
dists_floods = preds_trigger_floods["distance_preds"]
dists_cyclone = preds_trigger_cyclone["distance_preds"]
dists_hurricane = preds_trigger_hurricane["distance_preds"]

In [97]:
stats.mannwhitneyu(dists_floods, dists_cyclone)

MannwhitneyuResult(statistic=81875.0, pvalue=0.5913491178016943)

In [98]:
stats.mannwhitneyu(dists_floods, dists_hurricane)

MannwhitneyuResult(statistic=78934.0, pvalue=0.20992162197619058)

In [99]:
stats.mannwhitneyu(dists_cyclone, dists_hurricane)

MannwhitneyuResult(statistic=519941.5, pvalue=0.16906479084288906)

#### Between distances for correct and incorrect predictions within a dataset

In [100]:
correct_dists_floods, wrong_dists_floods, _, _, _ = categorize_dists(results_floods)
correct_dists_cyclone, wrong_dists_cyclone, _, _, _ = categorize_dists(results_cyclone)
correct_dists_hurricane, wrong_dists_hurricane, _, _, _ = categorize_dists(results_hurricane)

In [101]:
stats.mannwhitneyu(correct_dists_floods, wrong_dists_floods)

MannwhitneyuResult(statistic=2666.0, pvalue=0.9017911640574201)

In [102]:
stats.mannwhitneyu(correct_dists_cyclone, wrong_dists_cyclone)

MannwhitneyuResult(statistic=136149.0, pvalue=0.6711460750243489)

In [103]:
stats.mannwhitneyu(correct_dists_hurricane, wrong_dists_hurricane)

MannwhitneyuResult(statistic=132755.0, pvalue=0.7894713647523119)

---
### Trigger keys and tendency to predict no NEs

#### All test datasets

In [104]:
trigkeys_all = entityless_predictions_per_trigkey(results_floods+results_cyclone+results_hurricane)
len(trigkeys_all)

613

In [105]:
sorted(trigkeys_all.items(), key=lambda k: k[1]["total"], reverse=True)

[('aid for', {'total': 62, 'pred': 39, 'true': 19}),
 ('Center', {'total': 51, 'pred': 20, 'true': 12}),
 ('surrounding', {'total': 47, 'pred': 19, 'true': 12}),
 ('has been send off to', {'total': 35, 'pred': 19, 'true': 13}),
 ('destroyed state', {'total': 29, 'pred': 17, 'true': 14}),
 ('flooding in parts of eastern', {'total': 25, 'pred': 14, 'true': 8}),
 ('ChildFund', {'total': 23, 'pred': 12, 'true': 8}),
 ('in flood affected regions of', {'total': 23, 'pred': 11, 'true': 6}),
 ('HADR operations in', {'total': 20, 'pred': 9, 'true': 6}),
 ('Maldives', {'total': 20, 'pred': 15, 'true': 15}),
 ('Missing from', {'total': 20, 'pred': 13, 'true': 6}),
 ('People', {'total': 18, 'pred': 7, 'true': 3}),
 ('Kodagu flood victims', {'total': 17, 'pred': 5, 'true': 4}),
 ('Air National Guard', {'total': 16, 'pred': 9, 'true': 7}),
 ('parts of flood affected areas in', {'total': 16, 'pred': 12, 'true': 9}),
 ('group', {'total': 15, 'pred': 6, 'true': 1}),
 ('is committed to support', {'total

#### Floods test

In [106]:
trigkeys_floods = entityless_predictions_per_trigkey(results_floods)
len(trigkeys_floods)

134

In [107]:
sorted(trigkeys_floods.items(), key=lambda k: k[1]["total"], reverse=True)

[('Peoples of', {'total': 4, 'pred': 2, 'true': 2}),
 ('in Vypeen', {'total': 2, 'pred': 1, 'true': 1}),
 ('HADR operations in', {'total': 2, 'pred': 0, 'true': 0}),
 ('will donate', {'total': 2, 'pred': 0, 'true': 0}),
 ('sends medical assistance', {'total': 2, 'pred': 0, 'true': 0}),
 ('Sri .', {'total': 2, 'pred': 0, 'true': 0}),
 ('Highway 00 near Bellevue', {'total': 2, 'pred': 2, 'true': 2}),
 ('Kottayam district of', {'total': 2, 'pred': 1, 'true': 0}),
 ('people from', {'total': 2, 'pred': 1, 'true': 1}),
 ('ChildFund', {'total': 2, 'pred': 2, 'true': 2}),
 ('Sunrise hospital', {'total': 2, 'pred': 1, 'true': 1}),
 ('aid for', {'total': 2, 'pred': 1, 'true': 0}),
 ('Extension', {'total': 2, 'pred': 1, 'true': 1}),
 ('has been send off to', {'total': 2, 'pred': 1, 'true': 2}),
 ('Air National Guard', {'total': 2, 'pred': 1, 'true': 1}),
 ('airlifted to', {'total': 2, 'pred': 0, 'true': 0}),
 ('’ s Lobby Day For Planned Parenthood', {'total': 2, 'pred': 2, 'true': 2}),
 ('landsli

#### Cyclone

In [108]:
trigkeys_cyclone = entityless_predictions_per_trigkey(results_cyclone)
len(trigkeys_cyclone)

398

In [109]:
sorted(trigkeys_cyclone.items(), key=lambda k: k[1]["total"], reverse=True)

[('surrounding', {'total': 31, 'pred': 11, 'true': 6}),
 ('destroyed state', {'total': 25, 'pred': 15, 'true': 11}),
 ('aid for', {'total': 22, 'pred': 15, 'true': 6}),
 ('ChildFund', {'total': 21, 'pred': 10, 'true': 6}),
 ('in flood affected regions of', {'total': 20, 'pred': 9, 'true': 4}),
 ('Center', {'total': 18, 'pred': 7, 'true': 5}),
 ('area surroundings of', {'total': 13, 'pred': 6, 'true': 6}),
 ('Missing from', {'total': 12, 'pred': 7, 'true': 2}),
 ('group', {'total': 11, 'pred': 5, 'true': 0}),
 ('Aryanadu province of', {'total': 10, 'pred': 4, 'true': 3}),
 ('development', {'total': 10, 'pred': 5, 'true': 3}),
 ('Ramthakur College', {'total': 10, 'pred': 5, 'true': 4}),
 ('fund Relief', {'total': 10, 'pred': 6, 'true': 2}),
 ('HADR operations in', {'total': 9, 'pred': 4, 'true': 4}),
 ('Maldives', {'total': 9, 'pred': 7, 'true': 7}),
 ('in Vypeen', {'total': 9, 'pred': 2, 'true': 1}),
 ('Kodagu flood victims', {'total': 9, 'pred': 5, 'true': 3}),
 ('flood in Kerala', {'t

#### Hurricane

In [110]:
trigkeys_hurricane = entityless_predictions_per_trigkey(results_hurricane)
len(trigkeys_hurricane)

403

In [111]:
sorted(trigkeys_hurricane.items(), key=lambda k: k[1]["total"], reverse=True)

[('aid for', {'total': 38, 'pred': 23, 'true': 13}),
 ('Center', {'total': 33, 'pred': 13, 'true': 7}),
 ('has been send off to', {'total': 25, 'pred': 14, 'true': 8}),
 ('flooding in parts of eastern', {'total': 19, 'pred': 9, 'true': 5}),
 ('surrounding', {'total': 16, 'pred': 8, 'true': 6}),
 ('People', {'total': 14, 'pred': 6, 'true': 3}),
 ('Maldives', {'total': 10, 'pred': 8, 'true': 8}),
 ('HADR operations in', {'total': 9, 'pred': 5, 'true': 2}),
 ('Attorney General', {'total': 9, 'pred': 9, 'true': 8}),
 ('Air National Guard', {'total': 9, 'pred': 7, 'true': 6}),
 ('hands over', {'total': 8, 'pred': 4, 'true': 2}),
 ('Ngos in', {'total': 8, 'pred': 3, 'true': 3}),
 ('HC', {'total': 8, 'pred': 7, 'true': 4}),
 ('Missing from', {'total': 8, 'pred': 6, 'true': 4}),
 ('is committed to support', {'total': 8, 'pred': 5, 'true': 5}),
 ('Flood Relief Operations', {'total': 8, 'pred': 6, 'true': 2}),
 ('flood relief work in', {'total': 7, 'pred': 3, 'true': 4}),
 ('remote parts', {'tot

---
### Distances and tendency to predict no NEs

#### All test datasets

In [112]:
entityless_predictions_per_dist_interval(results_floods+results_cyclone+results_hurricane, 0.1)

{'0.00 - 0.10': {'total': 13, 'pred': 3, 'true': 3},
 '0.10 - 0.20': {'total': 46, 'pred': 25, 'true': 16},
 '0.20 - 0.30': {'total': 132, 'pred': 60, 'true': 44},
 '0.30 - 0.40': {'total': 303, 'pred': 147, 'true': 103},
 '0.40 - 0.50': {'total': 457, 'pred': 247, 'true': 152},
 '0.50 - 0.60': {'total': 468, 'pred': 222, 'true': 147},
 '0.60 - 0.70': {'total': 253, 'pred': 97, 'true': 70},
 '0.70 - 0.80': {'total': 167, 'pred': 80, 'true': 48},
 '0.80 - 0.90': {'total': 123, 'pred': 63, 'true': 50},
 '0.90 - 1.00': {'total': 84, 'pred': 43, 'true': 30},
 '1.00 - 1.10': {'total': 52, 'pred': 25, 'true': 18},
 '1.10 - 1.20': {'total': 51, 'pred': 24, 'true': 13},
 '1.20 - 1.30': {'total': 40, 'pred': 15, 'true': 12},
 '1.30 - 1.40': {'total': 22, 'pred': 14, 'true': 11},
 '1.40 - 1.50': {'total': 15, 'pred': 12, 'true': 10},
 '1.50 - 1.60': {'total': 9, 'pred': 3, 'true': 3},
 '1.60 - 1.70': {'total': 1, 'pred': 1, 'true': 0},
 '1.80 - 1.90': {'total': 1, 'pred': 0, 'true': 0},
 '1.90 -

#### Floods test

In [113]:
entityless_predictions_per_dist_interval(results_floods, 0.1)

{'0.00 - 0.10': {'total': 9, 'pred': 1, 'true': 2},
 '0.10 - 0.20': {'total': 6, 'pred': 0, 'true': 0},
 '0.20 - 0.30': {'total': 11, 'pred': 1, 'true': 2},
 '0.30 - 0.40': {'total': 23, 'pred': 10, 'true': 9},
 '0.40 - 0.50': {'total': 22, 'pred': 6, 'true': 7},
 '0.50 - 0.60': {'total': 24, 'pred': 5, 'true': 5},
 '0.60 - 0.70': {'total': 24, 'pred': 5, 'true': 4},
 '0.70 - 0.80': {'total': 12, 'pred': 3, 'true': 1},
 '0.80 - 0.90': {'total': 14, 'pred': 2, 'true': 2},
 '0.90 - 1.00': {'total': 5, 'pred': 0, 'true': 0},
 '1.00 - 1.10': {'total': 1, 'pred': 0, 'true': 0},
 '1.10 - 1.20': {'total': 5, 'pred': 0, 'true': 0},
 '1.20 - 1.30': {'total': 3, 'pred': 1, 'true': 1},
 '1.50 - 1.60': {'total': 3, 'pred': 1, 'true': 1}}

#### Cyclone

In [114]:
entityless_predictions_per_dist_interval(results_cyclone, 0.1)

{'0.00 - 0.10': {'total': 3, 'pred': 2, 'true': 1},
 '0.10 - 0.20': {'total': 24, 'pred': 13, 'true': 10},
 '0.20 - 0.30': {'total': 59, 'pred': 26, 'true': 16},
 '0.30 - 0.40': {'total': 138, 'pred': 62, 'true': 43},
 '0.40 - 0.50': {'total': 221, 'pred': 114, 'true': 67},
 '0.50 - 0.60': {'total': 240, 'pred': 108, 'true': 71},
 '0.60 - 0.70': {'total': 110, 'pred': 36, 'true': 26},
 '0.70 - 0.80': {'total': 74, 'pred': 28, 'true': 13},
 '0.80 - 0.90': {'total': 54, 'pred': 23, 'true': 21},
 '0.90 - 1.00': {'total': 30, 'pred': 12, 'true': 8},
 '1.00 - 1.10': {'total': 24, 'pred': 7, 'true': 8},
 '1.10 - 1.20': {'total': 18, 'pred': 8, 'true': 8},
 '1.20 - 1.30': {'total': 18, 'pred': 5, 'true': 4},
 '1.30 - 1.40': {'total': 11, 'pred': 7, 'true': 6},
 '1.40 - 1.50': {'total': 7, 'pred': 5, 'true': 5},
 '1.50 - 1.60': {'total': 4, 'pred': 1, 'true': 1},
 '1.60 - 1.70': {'total': 1, 'pred': 1, 'true': 0},
 '1.80 - 1.90': {'total': 1, 'pred': 0, 'true': 0},
 '1.90 - 2.00': {'total': 1,

#### Hurricane

In [115]:
entityless_predictions_per_dist_interval(results_hurricane, 0.1)

{'0.00 - 0.10': {'total': 1, 'pred': 0, 'true': 0},
 '0.10 - 0.20': {'total': 16, 'pred': 12, 'true': 6},
 '0.20 - 0.30': {'total': 62, 'pred': 33, 'true': 26},
 '0.30 - 0.40': {'total': 142, 'pred': 75, 'true': 51},
 '0.40 - 0.50': {'total': 214, 'pred': 127, 'true': 78},
 '0.50 - 0.60': {'total': 204, 'pred': 109, 'true': 71},
 '0.60 - 0.70': {'total': 119, 'pred': 56, 'true': 40},
 '0.70 - 0.80': {'total': 81, 'pred': 49, 'true': 34},
 '0.80 - 0.90': {'total': 55, 'pred': 38, 'true': 27},
 '0.90 - 1.00': {'total': 49, 'pred': 31, 'true': 22},
 '1.00 - 1.10': {'total': 27, 'pred': 18, 'true': 10},
 '1.10 - 1.20': {'total': 28, 'pred': 16, 'true': 5},
 '1.20 - 1.30': {'total': 19, 'pred': 9, 'true': 7},
 '1.30 - 1.40': {'total': 11, 'pred': 7, 'true': 5},
 '1.40 - 1.50': {'total': 8, 'pred': 7, 'true': 5},
 '1.50 - 1.60': {'total': 2, 'pred': 1, 'true': 1}}

---
### Difference between trigger entities and trigger keys

In [116]:
trigger_entity_strings = [t[0] for t in unique_trigger_entities]
print(len(trigger_entity_strings))
print(trigger_entity_strings[:3])

1909
['in', 'Nebraska', 'Iowa']


In [117]:
trigger_keys = ['flood affected regions', 'Pathanamthita', 'Aid', 'Falls City', 'in Hays', 'Frederick County in', 'Southern', 'washed out', 'kitchen', 'another country', 'flooding in eastern', 'between Iowa', 'Church', 'flood-hit areas', 'Ladies of', 'donate to', 'operations in', 'give aid to flood-hit', 'south and north', 'around', 'in Lynch', 'TRAIN BETWEEN', 'it recovers from', 'in Cochin', 'Metro in', 'People from South India', 'Pakistan', 'Maryland flooding', 'public', 'raised in', 'landslide relief', 'state of emergency for', 'flood relief campaign', 'its flood', 'Hindon to', 'Spent', 'residents of Pierce', 'rain-soaked', 'deployment to', 'flash flood emergency has been issued for', 'lives', 'governor of', 'east of', 'city where roads', 'States', 'stations', 'at EDAYARANMULA', 'Kuthiyathodu', 'nations', 'ESUs', 'WJZ', 'eastern part of', 'flood relief account', 'Farmers Union Foundation', 'in relief camps across', 'its surroundings', 'Near malakara temple aranmula', 'Train to', 'flash flooding hits', 'flash floods swept through', 'will be send', 'waters in', 'CMs Flood relief fund', 'convoy from', 'Terriers', 'Honerable District Collector', 'for parts of', 'flood relief material', 'donating', 'is on site in', 'delivering', 'plants in', 'move over', 'remote parts', 'Nagarkovil', 'sets up', 'bank in', 'areas across southern', 'Richardson Counties', 'United Way', 'floods in central', 'to Aranmula in', 'storm', 'Calicut', 'in pongalakariyil colony of', 'its Main Street', 'Pumphouse at', 'University of', 'bases in Nebraska', 'relief for', 'flash flooding ripped through', 'Flood-Hit', 'required at Rajiv Gandhi stadium', 'agencies', 'Government of', 'farm acres', 'FLOODING IN MARYLAND', 'girl', 'in Boyd County', 'Residents', 'State Patrol', 'attack where', 'State Disaster Management Authority', 'refugees in', 'Bihar', 'populations', 'Pathanamthitta district', 'From', 'Corp', 'Ambassador to', 'Lower 00 In Nebraska', 'are stationed in', 'Farm', 'Gov .', 'Breweries throughout', 'Washington counties in', 'Nebraska City to', 'Madapura in', 'Football Team', 'forest', 'flood zone', 'city hit by', 'Took', 'rescue efforts across', 'Pala to', 'will be going to', 'Force Camp', 'haul to', 'Air', 'CM relief fund', 'Medical Center', 'Spencer', 'Erie', 'flood-ravaged Valley', 'taken', 'airlifted to', 'here in Nebraska', 'mishap', 'despatched to', 'collect', 'country of', 'city neighborhood', 'store in Hays', 'pradesh mahila congress', 'Red Cross of Nebraska', 'floodwaters in Maryland ’ s', 'govt announces', 'reported in', 'from trivandrum', 'room', 'Medicine Flood Relief Donation Management Center', 'lottery', 'flood relief fund for', 'Ratmalana', 'roads in Eastern', 'Town Hit by', 'flash flood emergency has been issued for Ellicott City in', 'river in Nebraska', 'distributed at', 'floods in rural', 'stuck at', 'in Fremont', 'landslides in State Of Kerala of', 'HC', 'flooding from Ellicott City', 'grapples to recover', 'landslides to', 'Flooding Destroys', 'coming in from', 'rails', 'visited', 'tributaries into', 'cattle', 'is committed to support', 'in SW', 'flash flood emergency has been issued for Ellicott City in Howard County', 'Serves', 'relief aid from', 'flood relief camps', 'its downtown', 'Flooding submerged parts of', 'state of emergency', 'Little Patuxtent in Laurel', 'Tekamah south to', 'set out from', 'Marylands', 'village of Idukki district', '’ s Floods', 'Hotel Livia Tower', 'needed.Drop', 'Attorney General', 'Flood-Affected', 'Flood Relief Operations', 'Red Cross of', 'Tamil Nadu', 'Nationals in', 'headed from', 'in Ellicot City', 'doctors', 'work in', 'distributed at UC College', 'was discovered in', 'land in', 'flood-affected areas of', 'camps across flood-affected', 'in kochi', 'omaha', 'hit by', 'displaced', 'is flood hit', 'flood hits Ellicott City', 'cm', 'areas across', 'area surroundings of', 'downtown', 'near Alangad', 'Region', 'push toward', 'Times of', 'American Red Cross', 'Congress Office', 'Gujarat', 'NE cut off from', 'Captain & crew of', 'Haripad', 'smashed into Bangladesh', 'Kumbalam', 'Malayali Association', 'Chenganoor', 'flash floods rage through Ellicott City', 'drinking water', 'Indoor Stadium', 'rescue camps', 'arrived', 'Group in', 'house is in', 'AOL', 'nebraska', 'Collection centres in Chennai', 'taking place in', 'Across parts of', 'Aranmula', 'parishes', 'flood hits', 'main street in Ellicott City', 'rushed through downtown Ellicott City', 'BRANCH', 'Catonsville', 'who', 'Car street', 'donation to', 'city is ravaged by flood waters', 'Offutt in', 'outside Baltimore', 'branch', 'will leave for', 'missing', 'FL', 'Dam', 'Volunteers', 'SriLankas', 'portions of', 'landed at', 'in Florida', 'LasVegas', 'discovered along', 'from Northwest', 'competitors', 'of India', 'in Chalakudy', 'Flash Flood in Elliot City', 'NE Office', 'site', 'along', 'Air Force', 'camps across', 'bases in', 'flooding swept through Ellicott City', 'flood relief in Kerala', 'Diocese of', 'Sri .', 'citizens in', 'Apt', 'Shelter in', 'flood-ravaged', 'area of', 'flood-hit state', 'heading to flood-affected', 'students from', 'in Preston', '’ s Lobby Day For Planned Parenthood', 'downtown of', 'donation', 'FLOOD RELIEF', 'group', 'Its', 'Guard', 'missing amid', 'was found', 'CMs', 'above', 'Bhubaneswar', 'between Texas', 'flooding throughout central', 'Visited', 'farmer', 'move', 'HADR operations in', 'post', 'located on HWY 00', 'farmlands', 'Forecast', 'chapter', 'Flooding Slams', 'rescue work at', 'shipping', 'provide help in', 'Chief Ministers', 'remote', 'University', 'Shopping Center', 'Army National Guard', 'police', 'radio', '’ s radio', 'it was hit with', 'northwest', 'District Committee', 'surged through', 'flood rescue in', 'under water', 'heading', 'Center', 'issued for', 'Locals in', 'disappeared amid', 'devastated by', 'citizen of', 'other state', 'Flooding Ravages', 'Strong Red Ale', 'Niobrara', 'have sent', 'landslides hit', 'Woman', 'KODAGU', 'tornado drill day for Iowa', 'are stuck at', 'flash floods devastated Maryland ’ s', 'have been evacuated', 'faces', 'flooding from', 'AmeriCorps', 'smashed into', 'communities around', 'from Laurel', 'donations for', 'agricultural relief efforts', 'evacuated in', 'FLOODING devastates Ellicott City', 'Universitys', 'history', 'provide relief to', 'scouring', 'populace', 'Karnataka flood', 'my mother Indias land', 'Flooding In', 'suburb', 'was recovered in', 'Welcome', 'strikes Bangladesh', 'Preservation', 'lanes on US-00 at Kenilworth Avenue', 'Diego-Based', 'Flood Relief Program', 'allapuzha', 'Thrissur district', 'carrying', 'coverage', 'Companies in', 'serving in', 'Veterans', 'centres', 'KANSAS', 'flash flooding devastate Ellicott City', 'collected in', 'transport to', 'friends', 'flood event', 'Gujarat floods', 'Fireforce', 'Nebraska flooding victims', 'flood Relief', 'brought', 'rice production in', 'in flood affected Palakkads', '0000+relief camps', 'on-ground', 'Salvation Army', 'Mangalam Dam village', 'home state of', 'Farmers Union', '’ s districts', 'one', 'state electricity board officer', 'Strike', 'in Marylands', 'Town', 'in Panchayat Community Hall', 'tv stations', 'from Lillington', 'in MT School', 'will donate', 'flood-affected', 'Kerala Samajam', 'PARTS OF SOUTHERN NEBRASKA', 'Auto Center Stewartville', 'flood-hit state of', 'juvenile treatment center', 'finance minister of', 'at Thiruvalla', 'Floods affected people', 'Kodagu Flood Relief', 'Kunnamkulam', 'Air Force Base in Florida', 'MLAs', 'North Dakota headed for', 'frm whr', 'People from South', 'parks', 'women in', 'Flash flood smashes into Ellicott City', 'Karnatakas flood-hit', 'its cattle', 'it ’ s', 'CNTRL/E', 'Nebraska workers', 'Sappers', 'health camps at', 'mill town', 'flooding of', 'The University of', 'families in flood hit', 'SouthEast', 'in effect for portions of', 'from where', '’ s southern state', 'landslides in State Of', 'water in Cheverly', 'made in', 'RELIEF', 'disasters', 'flash flood slammed into', 'Base near', 'floating down', 'has extended', 'heading from', 'Panchayat office', 'Govt of', 'situation in', 'chengannur', 'rain pounding', 'journalist', 'leaving from', 'from Karunagapally', 'NGO in', 'dispatching', 'Controlled Burn Season', 'travelled from North to South of', '’ s agricultural sector', 'taking place in Yatagampitiya', 'Marathahalli', 'Fremont residents', 'nearby Camps', 'amid', 'Animal Shelter', 'Catch us', 'Ellicott City flooding in', 'Flash Floods Rip Through Ellicott City', 'truck driver', 'flash flooding sweeps through Ellicott City', 'town of', 'reliefcamps', 'folks of', 'Air Base in Nebraska', 'soldiers of', 'Oil Marketing', 'Distress Relief Fund Of', 'Air Force Base in', 'Kodagu flood victims', 'Flood Relief Trip', 'Scouts', 'villages across', 'Flash Flood Tears Through', 'rural', 'flooding devastates Ellicott City', 'FREMONT', 'Farm Hotline', 'in India ’ s southern state of', 'area Tharangam rescue centre', 'nurse', 'Palakkad', 'Hospice and Palliative', 'Tirupati Railway Station', 'flood crisis in', 'Dakotas', 'Base outside of Omaha', 'praying for', 'Flash floods struck', 'located', 'Odisha to', 'Is Coming To', 'Bridges washed away in', 'days in', 'in flood-devastated', 'north of Preston', 'flash flooding has devastated', 'Headed to', 'volunteers of', 'Valiyakadu village', 'rainfalls in Texas', 'in parts of', 'INTO', 'govt . hospitals', 'flooding devastates', 'forecasts', 'DELAWARE', 'ChildFund', 'be sent to Ernakulam', 'Panhandle Research Center', 'community kitchen in', 'victim.all the way from', 'in Nebraska and', 'concert', 'mission in', 'here', 'have been sent', 'flash floods swept through Ellicott City', 'in Pitabaddra', 'from Muthoot hospital in', 'departed to', 'in Thrissur district', 'heroes of', 'towards flood affected', 'U of', 'rescue camps near by', 'school', 'bridge in', 'mosque housing', 'extends assistance', 'Community', 'rehabilitation', 'Officers of IOCL', 'floods in Ellicott City', 'arranged by NPOL', 'distribution in', 'authorities',
                'flash flooding rips through central', 'Post', 'Health Minister', 'organization in', 'has released', 'Govts', 'in Northeast', 'rains across', 'Floods/disaster', 'operations from', 'wells', 'hands over', 'families near', 'relief activities in', 'western Charles counties in', 'trails', 'flash floods in Ellicott City', 'searching', 'appreciates', 'communities from', 'camp in', 'started for', 'Stewartville Auto Center Stewartville', 'Flood Damage', '’ s response', 'Citizens', 'Secretary of Agriculture', 'reaches flood-hit', 'FLOOD WATERS in', 'have been sent from', 'TRAIN BETWEEN BHUBANESWAR', 'different districts of', 'Flood Warning in west', 'Pallipad village of', 'businesses in', 'canvass', 'conference in', 'Nebraska to', 'people for', 'carried', 'communities across', 'Bhubaneswar via', 'is from', 'in northern', 'reports from', 'Collection centres in', 'flooding relief', 'Hauling', 'Mumbai to', 'in Vypeen', 'street poles on', 'President of', 'origin in', 'parts in', 'Came frm', 'Colorado', 'county seat of Howard County', 'people from', 'has been asked', 'signs', 'Pampampallam in', 'work going on in', 'trauma relief camps', 'in flood affected areas across', 'BOONE', 'home town in', 'fishermen', 'North Central', 'peeps', 'In flood-ravaged', 'nuclear power plant', 'NU has', 'Mobile homes', 'flooding in areas of', 'flash flooding sweeps through', 'Water level has receded in Pandalam', 'flood affected area', 'disaster relief', 'floods hitting', 'family in', 'Hit By', 'financial aid to', 'house in', 'show', 'Bar Exchange', 'medical relief camp', 'Andaman', 'Samajam', 'dist .', 'flood waters covered', 'overtook', 'Lakeview FFA Chapter in Columbus', 'happening in', 'Operations in Flood Stricken', 'aid from', '’ s Ambassador', 'to flood affected', 'rain storm', 'From Lincoln', 'headed out to', 'representatives from', 'Bank on', 'citizens of', 'Efforts in', 'flood affected Pandalam', 'Auburn', 'Association', 'Kottayam district of', 'Resident commissioner of', 'reservation', 'northern', 'Double up Food Bucks program', 'tribal colony', 'sell in', 'was glowing', 'Laurel', 'floods rip through Ellicott City', 'Indiana', 'stayed in', 'flash flooding devastates Ellicott City', 'traveling in and', 'flood victim', 'aroiund', 'is devastated by', 'Kushalnagar in', 'Baltimore suburb', 'Fire Service', 'Bureau', 'flood victims from', 'neighborhood', 'SDM Offices in', 'Amb', 'rivers of', 'Vegas', 'goods in', 'SDRF of', 'floodwaters surge through', 'in flood affected regions of', 'Synod Disaster relief', 'state of emergency was', 'E-CNTRL', 'rose in', 'flash flooding devastate', 'Gurgaon', 'USA', 'are stuck at Paravoor', 'Little Patuxtent in', 'Food Bank', 'Rehabilitation', 'Dundalk', 'into flood affected areas of', 'NEBRASKA', 'county seat of', 'flood-battered', 'report from', 'facing', 'Khalsa Aid', 'rain flooded', 'flash flooding swept through', 'forestland', 'County Executive', 'other countries', 'declared in', 'Department of Health and Human Services', 'workers', 'Flash Floods Surge Through Town in', 'relief camps of', 'FLOODING devastates', 'west of Baltimore', 'heatwave', 'floods across', 'health camp', 'floods hit', 'home in flood-struck', 'Roads in Virginia', 'Lakeview FFA Chapter in', 'place', 'Jharkhand', 'Pathadippalam', 'flood affected people', 'Foreign Minister', 'Transco Limited', 'Homes', 'PMC', 'relief centers allover', 'rice', 'Rain', 'Flash floods ripped through', 'Iowa flooding victims', 'Tyndall', 'southwest Iowa', 'are going to', 'Build', 'blizzard in western', 'rehabilitation work in', 'take place in Iowa', 'Beatrice areas', 'gave', 'torrents devastate', 'relief work in', 'miles across', 'LINCOLN', 'rushed through downtown', 'villages in', 'Bengaluru friends', 'parishioners', 'Embassy', 'kallishery kollattu', 'Sent', 'heads for', 'place in', 'Nagarjuna', 'items from', 'coasts', 'premises', 'Colonies in', 'providing', 'evacuees from', 'quake relief', 'e-Health center', 'Refinery', 'farmland in', 'of Bihar', 'Landslide in', '’ s floodwaters', 'livestock across', 'evacuation plan', 'Lillington', 'TV stations', 'Rajiv gandhi indoor stadium', 'South', 'Kumbalam in', 'is in', 'Adamannu', 'Residents of', 'Alapuzha', 'side', 'team in', 'American Red Cross of Nebraska', 'missing from Ellicott City', 'Hit by', 'sends medical assistance', 'Peoples of', 'flooding event hit', 'Relif Camps', 'Community Action', 'hit', 'Achankovil rivers are flooded', 'Weather', 'rescue centres in flood hit', 'destroyed state', 'stand with', 'trip to', 'flood victim of Nebraska', 'is facing', 'was airlifted', 'working in', 'Valley', '’ s coffee output', 'faced', 'UAE', 'Flood Waters', 'flood stricken', 'take place in', 'Iowa residents', 'victims of flood in', 'Districts in', 'families living in', 'in Ellicott', 'Custer County', 'food truck owner', 'camp at', 'Gov of', 'hit Ellicott City', 'flood warning', 'CivilAirPatrol', 'despatched', 'landed in', 'deployed at Kalutara', 'flash flood in Ellicott City', 'came', 'strike in', 'flight from', 'flood relief work in', 'Financial Corporation', 'of southern', 'floods in Marylands', 'towns in', 'areas near', 'China', 'USA where', 'NISA', 'transported', 'CycloneMora in', 'flash floods devastated', 'trapped in flood hit', 'sites in', 'Flash Floods Rip Through', 'settled in', 'counties from across', 'rescue', 'United States', 'President of Maldives', 'state lines', 'Air National Guard', 'Pradesh Congress Committee', 'flood disaster in', 'headquarters in', 'off red alert', 'Neyyar Dam', 'Ernakulam from', 'Kollupitiya stations', 'Highway 00 near Bellevue', 'flood-affected areas', 'Western Iowa', 'Peroorkada Area Committee', 'Sri Lankan', 'Disaster Relief Fund', 'heads to', 'they', 'which ravaged', 'waters have receded in', 'its residents', 'earthquake of', 'disbursed in', 'Specialized Equipment Program', 'Flash floods', 'travel to', 'officials of', 'near villages', 'Cmp', 'Flood/Landslide', 'food relief', 'At this', 'Infrastructure of state', 'take', 'flash floods devastate', 'proceed to', 'flash flooding strikes Ellicott City', 'is undergoing', 'community in', 'church in', 'suburbs', 'visited flood-affected areas', 'homes/farms in', 'CISF Unit ASG', 'cattleman', 'Correctional Youth Institute', 'released', 'floods sink', 'Arattupuzha', 'Aluva region', 'Matara District', 'Roads in', 'other foreign', 'Karnataka coasts', 'ground', 'pray for', 'North Carolina to Omaha', 'counties in Maryland', 'Maine to', 'shops', 'In Maryland', 'distribute in flood effected area', 'surrounding', 'iowa', 'cut off from', 'gets stuck in', 'flood relief work', 'Uni', 'Nation of', 'drinking water in Iowa', 'flash floods rage through', 'missing in Ellicott City', 'State of Emergency in', 'storm victims', 'be sent to', 'Family', 'Foreign Aid Received by', 'centre at', 'peroorkada', 'potholes', 'Peoples', 'volunteers', 'chengannur kerala', 'Haryana', 'brought flash flooding to', 'province of Gujarath', 'Main', 'Chief', '’ s governor', 'in South', 'floods ravaged', 'Vellappara village of', 'to flood-ravaged', 'flood in Kerala', 'Runza', 'donate for', 'drought relief in', 'aid by', 'tornado drill day for', 'in Manchester', 'sound in', 'infrastructure in flood-hit', 'flooding rips through', 'issued for Iowa', 'Malampuzhas', 'Chengannur in', 'trip to Iowa', 'ministers', 'GECBH', 'To', 'flash flooding strikes', 'Nationals in Texas', 'contribute', 'power cuts', 'historic flood in', 'Mosques in', 'collecting', 'at Cessna Business park', 'help for', 'within', 'headed to Nebraska', 'donation site at', 'has been send off to', 'Company', 'flash floods struck', 'toll plazas in', 'across Nebraska', 'Dept . of Ag', 'reaches', 'Palakkad district', 'parts of flood affected areas in', 'in Omaha', 'Flash floods in', 'farm', 'naval base relief camp', 'crop', 'attack', 'Maldives', 'Metro Area', 'Amala', 'on way to', 'washed out Ellicott City', 'bring', 'rivers are flooded', 'rescue operations in', 'was airlifted to', 'brought flash flooding to Ellicott City', 'Sivananda', 'in Jind', 'Reservation in', 'in Lourdes matha hospital', 'Missing from', 'Israel ’ s Ambassador', 'county in', 'distribution', 'map', 'Pahiyangala', 'FM', 'Ngos in', 'Victims in', 'working in flood-stricken areas of SW', 'save', 'am from East', 'Fields', 'Hurricane Season for', 'flood devastation', 'Medical Camp', 'in flood-ravaged', 'regions', 'rain forecast', 'disaster of', 'sounds alert', 'water line', 'off to', 'SE', 'Iowa ,', 'Minister for Public Works', 'jodupala in', 'northeast', 'flooding across Nebraska', 'floods struck', 'flood hit', 'in southeast', 'hospital in Kearney', 'Deputy', 'Florida', 'Panama City', 'KERALA', 'required at', 'traveling through', 'flood hit state', 'aranmula', 'Divine Care Centre', 'team', 'flying', 'Medicine', 'Ministers', 'colony in', 'State of emergency declared in', 'Office', 'PENNSYLVANIA', 'times in', 'landscape', 'Pampa', 'flood victim of', 'victims in dnkoluwaththa in', 'medical camps in', 'island in', 'NE State', 'to Flood-Stricken', 'drinking water in', 'reservations in', 'flood affected regions of', 'highway', 'Raising Canes', 'on site in', 'girls', 'Districts', 'flood areas around', 'in Imperial', 'Folks in', 'Briarcroft Lane', 'areas surrounding', 'hospitals', 'washed out parts of', 'flooding in parts of eastern', 'Ministry officials', 'Flooding Slams Ellicott City', 'Aid from', 'Flash floods in Maryland', 'Missing in', 'community in Lincoln', 'Air Base in', 'Flood in', 'ranchers in', 'flooding in areas of Missouri', 'in Flood-Hit', 'Deseeya Seva Bharati', 'take to', 'on', 'International', 'went', 'Mangalore', 'in flood-affected areas in', 'rescue plans for', 'Welcome to', 'legislators', 'Rains', 'announces', 'GrandIsland', 'Cattlemens Disaster Relief Fund', 'leaving', 'shelter at', 'foreign aid', 'recovers from floods', 'Hyderabad', 'NE College of Technical Ag', 'head to', 'development', 'swimming pool', 'in Ashland', 'people in flood-hit', 'Ramthakur College', 'Maryland state', 'County official', 'head west', 'aerial survey of',
                'UCCollege', 'river in', 'Department of Education', 'People', 'ERNAKULAM', 'floods swept through', 'flooding impacting', 'travel from', 'hectares of paddy', 'flood waters in Ellicott City', 'fellow state', 'receeed in', 'at Bambalapitiya', 'Begumpet airport', 'Relief Commissioner', 'petrol pumps in', 'water in', 'section of', 'relief aid to', 'lived in', 'flooding throughout', 'floods affect', 'Flood-hit', 'death toll in', 'Health Department', 'our', 'activities at', 'from Western', 'FLOODING', 'indoor stadium', 'should be delivered at', 'Disaster Resource Center in', 'register', 'Near malakara temple', 'Flash Flood Tears Through Ellicott City', 'in Kerala', 'Domestic', 'donations center in', 'across Central', 'was in', 'in eastern', 'information', 'happening', 'india', 'Meet me in', 'PARTS OF SOUTHERN', 'military ship', 'finance minister', 'toll plazas at Paliekkara in', 'going to', 'Ship from', 'which has been ravaged by', 'have impacted', 'thanks', 'Water level has receded in', 'shore county', 'tour', 'districts in', 'missing persons', 'submerges', 'DONATES', 'in Ratnapura', 'Nettor stores', 'Norfolk', 'communities from Montana', 'CPIM of', 'missing from', 'levee systems in', 'south of', 'flash flooding devastates', 'in Glenwood', 'Impact', 'where churches', 'stands', 'stations in', 'flood areas around chengannur', 'surrounding area', 'Railroad', 'rainfalls in', 'Ratings and Research', 'floodwaters in', 'rip through', 'Cattlemens', 'Save', 'In flood-hit', 'in Howard County', 'school communities', 'swept through', 'western parts of', 'Tax Commissioner', 'accept financial aid', 'levee', 'CMDRF', 'its main street', 'Broadcasters', 'Manitoba to', 'out of Marshall', 'is sinking', 'Allepy', 'flooding victims in', 'to Eastern', 'Bangladesh', 'set up in', 'Haskell Ag Lab', 'Haj Committee', 'flooding around', 'are here from', 'Islamic Foundation', 'Nebraska/Southwest', 'Fishermans', 'eastern', 'leaving for', 'toll plazas at Paliekkara', 'coal workers', 'Sunrise hospital', 'flooding in Nebraska', 'Flash flooding in', 'part of', 'Extension', 'SPCA', 'aid for', 'Aryanadu province of', 'partner', 'hav 0NationPark', 'have died', 'Millaniya', 'Relief for', 'Flash floods ripped through Ellicott City', 'citizens', 'Lower 00 In', 'flood levels', '’ s Rural America Relief', 'Financial Assistance to', 'in flood-affected', 'Distribution centre', 'villages', 'Battallion of', 'area of Columbus', 'lives there', 'Bulathsinhala', 'in the district', 'headed to Fullerton', 'Home Minister of', 'cleaned in', 'needed', 'are missing in', 'called', 'Seen In', 'stood with', 'fund Relief', 'them', 'Air Force Base', 'Flash Flood Smashes Into', 'Flash flood smashes into', 'in that flood', 'Iowa flood relief', 'deployed at', 'Feeding', 'Coimbatore', 'Arattupuzha area Tharangam rescue centre', 'hit by floods', 'took place in']
print(len(trigger_keys))
print(trigger_keys[:3])

1289
['flood affected regions', 'Pathanamthita', 'Aid']


In [118]:
intersection = []
disjunct_keys = []
disjunct_entities = []
for entity in trigger_entity_strings:
    if entity in trigger_keys:
        intersection.append(entity)
    else:
        disjunct_entities.append(entity)
for trigkey in trigger_keys:
    if trigkey not in intersection:
        disjunct_keys.append(trigkey)
print(len(intersection))
print(len(disjunct_keys))
print(len(disjunct_entities))

1277
12
632


In [119]:
intersection

['conference in',
 'Air',
 'Force Camp',
 'travelled from North to South of',
 'rice production in',
 'flying',
 'visited',
 'Ministry officials',
 'in the district',
 'remote',
 'evacuees from',
 'President of Maldives',
 'President of',
 'Financial Assistance to',
 'volunteers of',
 'branch',
 'cleaned in',
 'Captain & crew of',
 'Erie',
 'Cmp',
 'thanks',
 'in Ratnapura',
 'BRANCH',
 'Group in',
 'relief aid from',
 'flood levels',
 'floods hitting',
 'landslide relief',
 'Shopping Center',
 'Kollupitiya stations',
 'at Bambalapitiya',
 'stations',
 'appreciates',
 'Flood/Landslide',
 'military ship',
 'arrived',
 'was glowing',
 'Scouts',
 'off to',
 'goods in',
 'mishap',
 'strikes Bangladesh',
 'on way to',
 'relief aid to',
 'announces',
 'women in',
 'Millaniya',
 'Matara District',
 'SriLankas',
 'deployed at',
 'Ratmalana',
 'deployed at Kalutara',
 'operations in',
 'Israel ’ s Ambassador',
 'to Flood-Stricken',
 '’ s Ambassador',
 'Ambassador to',
 'districts in',
 'authori

In [120]:
disjunct_keys

['Lower 00 In Nebraska',
 'American Red Cross',
 'Indoor Stadium',
 'located on HWY 00',
 'lanes on US-00 at Kenilworth Avenue',
 'flood Relief',
 '0000+relief camps',
 'trauma relief camps',
 'Highway 00 near Bellevue',
 'homes/farms in',
 'hav 0NationPark',
 'Lower 00 In']

In [121]:
disjunct_entities

['in',
 'Nebraska',
 'Iowa',
 'floods',
 'Maryland',
 'flooding',
 'flood victims',
 'flood',
 'flooding in',
 'to',
 'state',
 'from',
 'flood relief',
 'Flood Relief',
 'floods in',
 'people of',
 'Missouri',
 'South Dakota',
 'homes',
 'roads',
 'communities',
 'Kerala',
 'states',
 'District',
 'Wisconsin',
 'Mississippi',
 'district',
 'across',
 'farmers',
 'areas',
 'Floods',
 'residents',
 'flood-hit',
 'at',
 'Wyoming',
 'relief camps',
 'Ellicott City',
 'city',
 'Montana',
 'CM',
 'Alappuzha',
 'In',
 'community',
 'town',
 'Illinois',
 'Ernakulam',
 'bridges',
 'livestock',
 'help',
 'Kansas',
 'Midwest',
 'area',
 'districts',
 'Flood',
 'houses',
 'Flood Victims',
 'Flooding',
 'Alaska',
 'Utah',
 'Idaho',
 'North Dakota',
 'Maine',
 'Vermont',
 'Delaware',
 'Hawaii',
 'country',
 'flash flooding',
 'Farm Bureau',
 'Louisiana',
 'Minnesota',
 'flooding in Arkansas',
 'City',
 'Chengannur',
 'rain',
 'flooding in Ellicott City',
 'flash floods',
 'where',
 'people in',
 'h

---
### Is distance a measure of familiarity?

#### Test data built from trigger-annotated training data

In [122]:
test_from_training = []

for sample in train_with_triggers:
    if sample["text"] in test_from_training:
        continue
    if "T" in sample["explanation"]:
        test_from_training.append(sample["text"])

len(test_from_training)

1742

In [123]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': test_from_training,
    }
)

In [124]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.0016128562856465578,
 1.7994288206100464,
 0.3228389412644116,
 0.019577819854021072)

In [125]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_training = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    test_from_training, c, t, d
))
results_training = sorted(results_training, key=lambda k: k["dist"], reverse=False)
results_training

[{'text': 'Marylands governor declared a state of emergency as flash flooding has devastated the town of Ellicott City # tictocnews',
  'pred_label': 'O O O O O O O O O O O O O O O B-LOC E-LOC O O',
  'key': 'flash flooding has devastated',
  'dist': 0.0016128562856465578},
 {'text': 'NDCS is # NebraskaStrong . Our NDCS teammates are helping flood victims across Nebraska and Iowa . These photos capture our assistant warden at the Nebraska Correctional Youth Institute sandbagging and helping others get to a safer place out of the flooding . # NebraskaFlood2019',
  'pred_label': 'O O O O O O O O O O O O O S-LOC O S-LOC O O O O O O O O O S-LOC O O O O O O O O O O O O O O O O O O O',
  'key': 'Correctional Youth Institute',
  'dist': 0.0017634712858125567},
 {'text': 'Were getting ready to take calls and donations on air ! We are raising funds to help cover our costs to travel to Nebraska for a Service Learning project to help Drew Wolfe clean up his farm from the floods . Please call 228-

#### Test data built from unique trigger entities

In [126]:
len(trigger_entity_strings)

1909

In [127]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': trigger_entity_strings,
    }
)

In [128]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.18904532492160797,
 1.9221434593200684,
 0.9324059118738719,
 0.9363695979118347)

In [129]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_triggers = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    trigger_entity_strings, c, t, d
))
results_triggers = sorted(results_triggers, key=lambda k: k["dist"])
results_triggers

[{'text': 'victims',
  'pred_label': 'O',
  'key': 'donating',
  'dist': 0.18904532492160797},
 {'text': 'washed out parts of',
  'pred_label': 'O O O O',
  'key': 'floodwaters in Maryland ’ s',
  'dist': 0.2196042388677597},
 {'text': 'flooding in Maryland',
  'pred_label': 'O O S-LOC',
  'key': 'torrents devastate',
  'dist': 0.22062736749649048},
 {'text': 'on the way to',
  'pred_label': 'O O O O',
  'key': 'Association',
  'dist': 0.23210249841213226},
 {'text': 'on way to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.2326667308807373},
 {'text': 'Karnataka flood',
  'pred_label': 'S-LOC O',
  'key': 'Pumphouse at',
  'dist': 0.24749211966991425},
 {'text': 'EllicottCity',
  'pred_label': 'O',
  'key': 'Forecast',
  'dist': 0.29843321442604065},
 {'text': 'Farm Hotline',
  'pred_label': 'O O',
  'key': 'taking place in',
  'dist': 0.299810528755188},
 {'text': 'flood-hit state of',
  'pred_label': 'O O O',
  'key': 'donation site at',
  'dist': 0.3364724814891815}

#### Test data built from unique trigger keys

In [130]:
len(trigger_keys)

1289

In [131]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': trigger_keys,
    }
)

In [132]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.2196042388677597,
 1.8392953872680664,
 0.9165785686973457,
 0.9285454154014587)

In [133]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_trigkeys = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    trigger_keys, c, t, d
))
results_trigkeys = sorted(results_trigkeys, key=lambda k: k["dist"])
results_trigkeys

[{'text': 'washed out parts of',
  'pred_label': 'O O O O',
  'key': 'floodwaters in Maryland ’ s',
  'dist': 0.2196042388677597},
 {'text': 'on way to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.2326667457818985},
 {'text': 'Karnataka flood',
  'pred_label': 'S-LOC O',
  'key': 'Pumphouse at',
  'dist': 0.24749216437339783},
 {'text': 'Farm Hotline',
  'pred_label': 'O O',
  'key': 'taking place in',
  'dist': 0.2998104989528656},
 {'text': 'flood-hit state of',
  'pred_label': 'O O O',
  'key': 'donation site at',
  'dist': 0.3364725410938263},
 {'text': 'Kodagu flood victims',
  'pred_label': 'S-LOC O O',
  'key': 'Strong Red Ale',
  'dist': 0.35525813698768616},
 {'text': 'CPIM of',
  'pred_label': 'O O',
  'key': 'donation site at',
  'dist': 0.35589686036109924},
 {'text': '’ s agricultural sector',
  'pred_label': 'O O O O',
  'key': 'Flood-Affected',
  'dist': 0.35799670219421387},
 {'text': '’ s response',
  'pred_label': 'O O O',
  'key': 'Flood-Affected',


#### Difference between predictions for trigger entities and trigger keys

In [134]:
unique_trigger_results = [result for result in results_triggers if result not in results_trigkeys]
print(len(unique_trigger_results))
unique_trigger_results

1013


[{'text': 'victims',
  'pred_label': 'O',
  'key': 'donating',
  'dist': 0.18904532492160797},
 {'text': 'flooding in Maryland',
  'pred_label': 'O O S-LOC',
  'key': 'torrents devastate',
  'dist': 0.22062736749649048},
 {'text': 'on the way to',
  'pred_label': 'O O O O',
  'key': 'Association',
  'dist': 0.23210249841213226},
 {'text': 'on way to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.2326667308807373},
 {'text': 'Karnataka flood',
  'pred_label': 'S-LOC O',
  'key': 'Pumphouse at',
  'dist': 0.24749211966991425},
 {'text': 'EllicottCity',
  'pred_label': 'O',
  'key': 'Forecast',
  'dist': 0.29843321442604065},
 {'text': 'Farm Hotline',
  'pred_label': 'O O',
  'key': 'taking place in',
  'dist': 0.299810528755188},
 {'text': 'flood-hit state of',
  'pred_label': 'O O O',
  'key': 'donation site at',
  'dist': 0.3364724814891815},
 {'text': '’ s agricultural sector',
  'pred_label': 'O O O O',
  'key': 'Flood-Affected',
  'dist': 0.35799673199653625},
 {'tex

In [135]:
unique_trigkey_results = [result for result in results_trigkeys if result not in results_triggers]
print(len(unique_trigkey_results))
unique_trigkey_results

393


[{'text': 'on way to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.2326667457818985},
 {'text': 'Karnataka flood',
  'pred_label': 'S-LOC O',
  'key': 'Pumphouse at',
  'dist': 0.24749216437339783},
 {'text': 'Farm Hotline',
  'pred_label': 'O O',
  'key': 'taking place in',
  'dist': 0.2998104989528656},
 {'text': 'flood-hit state of',
  'pred_label': 'O O O',
  'key': 'donation site at',
  'dist': 0.3364725410938263},
 {'text': '’ s agricultural sector',
  'pred_label': 'O O O O',
  'key': 'Flood-Affected',
  'dist': 0.35799670219421387},
 {'text': 'near Alangad',
  'pred_label': 'O S-LOC',
  'key': 'is sinking',
  'dist': 0.3692305386066437},
 {'text': 'Tekamah south to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.37374138832092285},
 {'text': 'between Iowa',
  'pred_label': 'O S-LOC',
  'key': 'in Preston',
  'dist': 0.39391741156578064},
 {'text': 'flood-affected areas of',
  'pred_label': 'O O O',
  'key': 'donation site at',
  'dist': 0.3973977

In [136]:
differing_predictions = []
for result in unique_trigger_results:
    for comparison in unique_trigkey_results:
        if result["text"] == comparison["text"]:
            diff = {"text": result["text"], 
                    "trigger_pred": result["pred_label"], "trigkey_pred": comparison["pred_label"],
                    "trigger_key": result["key"], "trigkey_key": comparison["key"],
                    "trigger_dist": result["dist"], "trigkey_dist": comparison["dist"]
            }
            differing_predictions.append(diff)
print(len(differing_predictions))

381


In [137]:
differing_predictions

[{'text': 'on way to',
  'trigger_pred': 'O O O',
  'trigkey_pred': 'O O O',
  'trigger_key': 'Association',
  'trigkey_key': 'Association',
  'trigger_dist': 0.2326667308807373,
  'trigkey_dist': 0.2326667457818985},
 {'text': 'Karnataka flood',
  'trigger_pred': 'S-LOC O',
  'trigkey_pred': 'S-LOC O',
  'trigger_key': 'Pumphouse at',
  'trigkey_key': 'Pumphouse at',
  'trigger_dist': 0.24749211966991425,
  'trigkey_dist': 0.24749216437339783},
 {'text': 'Farm Hotline',
  'trigger_pred': 'O O',
  'trigkey_pred': 'O O',
  'trigger_key': 'taking place in',
  'trigkey_key': 'taking place in',
  'trigger_dist': 0.299810528755188,
  'trigkey_dist': 0.2998104989528656},
 {'text': 'flood-hit state of',
  'trigger_pred': 'O O O',
  'trigkey_pred': 'O O O',
  'trigger_key': 'donation site at',
  'trigkey_key': 'donation site at',
  'trigger_dist': 0.3364724814891815,
  'trigkey_dist': 0.3364725410938263},
 {'text': '’ s agricultural sector',
  'trigger_pred': 'O O O O',
  'trigkey_pred': 'O O 

#### Test data built from gibberish

In [138]:
gen = np.random.default_rng(seed=1337)
test_gibberish = [
    " ".join(["".join(gen.choice(
    ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",
    "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z",
    "0","1","2","3","4","5","6","7","8","9"], 
    gen.integers(1,20))) for _ in range(gen.integers(1,40))]) for _ in range(2000)
]
test_gibberish[:3]

['tLh5Y6R2cHDMKVRej OAgO3xIZUFvYdVQBD3 BYz4c3CAy wcslEs 79yvVNcFE8Wt 8jwwIsHSNZ 9rBvYZUVm3cF MoUByOci0N4tR7Tn 6IL cPqFu27SryaDHqJE bwXfPw OOTsHI moJK7UkMfmmTZsuc goJIqurfs nYji Bq4ehRSenQBoKq 0IP8B4CYMI76 fWpjheRXRJZwP fPRigdL O9sYlv1rbclN8OlOEh 59gVd0BTdXqGT z7AJJ8Pw1GvD',
 'Br0ymF1U TOK26vdxeFQV i5ipo3jRtmsOLDS veD86DUkz1teGyDauf',
 'yFN8EtTWfuVCmbtB1Ou 8wQG8WOlFBW Cs0uGvzLkSv4JACxg EsR5y6dLH']

In [139]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': test_gibberish,
    }
)

In [140]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.20258137583732605, 1.59308660030365, 0.536500399261713, 0.5144401490688324)

In [141]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_gibberish = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    test_gibberish, c, t, d
))
results_gibberish = sorted(results_gibberish, key=lambda k: k["dist"])
results_gibberish

[{'text': '7gNdx9Xnx8qSU ukqLI SGeP ttYzfBIukGj iANjMH8VwaMDW ekeXCPyByrgG0O 3Hu1UGh3sCC7GA FCluOJMcCO8e2z JHH9n XsrjL9tUVMLqPbBED D0EtiVGc TWrddc9RWp OcyXTdviwCC ASD9QznbwzQCqWw q7k9YTq7 xoblrvEsSNYenpyu T6PVP2H29cN1XJdha KIqRuMJA8BByZm9nh1d ELw5xLeYKFaki coo31 mXEOmOcVdY6 sdDoQR51UiGPbPOWVz',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'aid for',
  'dist': 0.20258137583732605},
 {'text': '7Ji NaUa kp41D5ielTtUC APBP 9C DlpWiC t5zA AqnxiX8dqd80Qi7fX7D Uie46JgsCigcWnDFR7 ZX2kGaeoRam gK y0gazs4f2TX9z 58Ouy3WZ45SQSeXOxZj CdEB1Lg 47JdAOKhLBCtkR 2p895SNvqi QPjJDWEfp4O2qKjz7Kr GE5BBZAF0 xdZ5nPjp 6erPEm64K98K XylvnHmYFLRKdr8YN GX19g0iuLRYQsoX0g zahY8LNxZ j 23xeZIWtKsBW c45ZCjNtN 5GQFnnfWnTwY tCIK m1ZbMtExw Evf6q 8LS1ldMqvVQjRUcI EbSKf sM9N5jscHz 8jQQOSYxJoxiWb owYoLzCf3r RKjxf83Nf7LngN9Fu sFiQLqy7jRGinIiZaT WlUSWPp',
  'pred_label': 'O S-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'who',
  'dist': 0.23996005952358246},
 {

#### Test data built from previous datasets with prefixed/suffixed tokens

In [142]:
test_prefix_suffix_period = []
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix_period.append([f"{text} ." for text in tests])
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix_period.append([f". {text} ." for text in tests])

In [143]:
responses_periods = []
for tests in test_prefix_suffix_period:
    response = requests.post(
        FAST_API_URL + '/training/trigger/predict/',
        json={
            'params': params_trigger_prediction,
            'prediction_data': tests,
        }
    )
    responses_periods.append(response)

In [144]:
for response in responses_periods:
    dists = response.json()["distance_preds"]
    print(span_and_avgs(dists))

(0.5403556823730469, 0.6217598915100098, 0.5840626112472864, 0.5841069519519806)
(0.5354064106941223, 0.6445638537406921, 0.5840141722117775, 0.5861979722976685)
(0.5385076999664307, 0.6463330388069153, 0.5825868634819296, 0.5838336050510406)
(0.0038506474811583757, 1.413163185119629, 0.5266770775464388, 0.4957013875246048)
(0.039727918803691864, 1.1330493688583374, 0.5215540817245853, 0.5218175053596497)
(0.03972790390253067, 1.2662866115570068, 0.5317265791706292, 0.531076192855835)
(0.1740882694721222, 1.0298818349838257, 0.4643789376914501, 0.4571429342031479)
(0.3797118067741394, 0.5301669239997864, 0.5024271141967656, 0.505545049905777)
(0.37650859355926514, 0.5361681580543518, 0.4995198408303233, 0.5072194039821625)
(0.3769838213920593, 0.533667802810669, 0.5012729660049798, 0.5035249590873718)
(0.11149566620588303, 0.9806789755821228, 0.49339874045566223, 0.4820280075073242)
(0.15646801888942719, 0.9206334948539734, 0.4941589112680397, 0.47761356830596924)
(0.15646801888942719,

In [145]:
for response in responses_periods:
    dists = response.json()["distance_preds"]
    print(span_and_avgs(dists))

(0.5403556823730469, 0.6217598915100098, 0.5840626112472864, 0.5841069519519806)
(0.5354064106941223, 0.6445638537406921, 0.5840141722117775, 0.5861979722976685)
(0.5385076999664307, 0.6463330388069153, 0.5825868634819296, 0.5838336050510406)
(0.0038506474811583757, 1.413163185119629, 0.5266770775464388, 0.4957013875246048)
(0.039727918803691864, 1.1330493688583374, 0.5215540817245853, 0.5218175053596497)
(0.03972790390253067, 1.2662866115570068, 0.5317265791706292, 0.531076192855835)
(0.1740882694721222, 1.0298818349838257, 0.4643789376914501, 0.4571429342031479)
(0.3797118067741394, 0.5301669239997864, 0.5024271141967656, 0.505545049905777)
(0.37650859355926514, 0.5361681580543518, 0.4995198408303233, 0.5072194039821625)
(0.3769838213920593, 0.533667802810669, 0.5012729660049798, 0.5035249590873718)
(0.11149566620588303, 0.9806789755821228, 0.49339874045566223, 0.4820280075073242)
(0.15646801888942719, 0.9206334948539734, 0.4941589112680397, 0.47761356830596924)
(0.15646801888942719,

In [146]:
test_prefix_suffix = []
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix.append([f"Marylands {text} tictocnews" for text in tests])
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix.append([f"Flooding {text} >" for text in tests])

In [147]:
responses = []
for tests in test_prefix_suffix:
    response = requests.post(
        FAST_API_URL + '/training/trigger/predict/',
        json={
            'params': params_trigger_prediction,
            'prediction_data': tests,
        }
    )
    responses.append(response)

In [148]:
for response in responses:
    dists = response.json()["distance_preds"]
    print(span_and_avgs(dists))

(0.037885867059230804, 0.08386216312646866, 0.05571355574108936, 0.05476929619908333)
(0.03525048494338989, 0.11563178896903992, 0.058898158931318736, 0.05833583325147629)
(0.035254333168268204, 0.10640809684991837, 0.05691398871140214, 0.05588447116315365)
(0.018689358606934547, 0.27150586247444153, 0.06487864622125566, 0.05536979250609875)
(0.019429374486207962, 0.11245529353618622, 0.04310422337233234, 0.040940236300230026)
(0.019429387524724007, 0.12159772217273712, 0.044219437602179536, 0.042692236602306366)
(0.03063548542559147, 0.16578158736228943, 0.04669772004242986, 0.0455461572855711)
(1.6572110652923584, 1.777621865272522, 1.6803632069517065, 1.679309368133545)
(1.6451066732406616, 1.7801575660705566, 1.6816010742747944, 1.6758934259414673)
(1.651989459991455, 1.7794760465621948, 1.6810647625913968, 1.6794974207878113)
(0.8370346426963806, 2.041158437728882, 1.8137348799289437, 1.867862343788147)
(0.8136669993400574, 2.0300819873809814, 1.670717253344664, 1.7344484329223633