# Named Entity Recognition

Notebook based on the work of Lee et al. ("LEAN-LIFE: A Label-Efficient Annotation Framework Towards Learning from Explanation," 2020).

Ensure that FastAPI is up and running. [instructions here](../fast_api/readme.md)

# Table of contents

1. Data prep
2. Training
3. Evaluation

 ---
 # 1. Data prep

In [1]:
# imports
import requests
import json
import numpy as np
from scipy import stats

In [2]:
FAST_API_URL = "http://localhost:9000"

---
#### Prepare data for Trigger training

In [3]:
train_with_triggers = json.load(open('explanation_IDRISI-RE-flood_tokenized.json'))
train_with_triggers[:3]

[{'text': 'Korean actress Lee Young-ae know as ‘ Changumi ’ made a contribution of USD 50,000 to support flood relief efforts in Sri Lanka',
  'label': 'O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC',
  'explanation': 'O O O O O O O O O O O O O O O O O O T-0 T-0 T-0 O O'},
 {'text': 'RT @ Vidiyallk : Government seeks # world aid for # FloodRelief ; 4 ministers appointed to work out plans for donor conference in # Colombo short',
  'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O',
  'explanation': 'O O O O O O O O O O O O O O O O O O O O O O T-0 T-0 O O O'},
 {'text': 'First phase of the @ IMCD_officials # FloodRelief program is over . Please direct dry ration donations to Rathmalana Air Force Camp . # FloodSL',
  'label': 'O O O O O O O O O O O O O O O O O O B-LOC O O O O O O',
  'explanation': 'O O O O O O O O O O O O O O O O T-0 T-0 O T-1 T-2 T-2 O O O'}]

In [4]:
len(train_with_triggers)

3484

---
#### Prepare data for Standard training

In [5]:
def remove_triggers(dataset):
    return list(map(
        lambda x: { 'text':x['text'], 'label':x['label'] },
        dataset
    ))

train_without_triggers = remove_triggers(train_with_triggers)
train_without_triggers[:3]

[{'text': 'Korean actress Lee Young-ae know as ‘ Changumi ’ made a contribution of USD 50,000 to support flood relief efforts in Sri Lanka',
  'label': 'O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC'},
 {'text': 'RT @ Vidiyallk : Government seeks # world aid for # FloodRelief ; 4 ministers appointed to work out plans for donor conference in # Colombo short',
  'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O'},
 {'text': 'First phase of the @ IMCD_officials # FloodRelief program is over . Please direct dry ration donations to Rathmalana Air Force Camp . # FloodSL',
  'label': 'O O O O O O O O O O O O O O O O O O B-LOC O O O O O O'}]

In [6]:
len(train_without_triggers)

3484

---
#### Prepare data for evaluation

In [7]:
dev_n_test = json.load(open('dev_IDRISI-RE-flood.json'))
print(len(dev_n_test))
print(dev_n_test[:3])

323
[{'text': 'Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'label': 'O O O B-LOC O O O O O O O O O O'}, {'text': 'Maldives offers financial assistance to flood hit Sri Lanka', 'label': 'O O O O O O O B-LOC I-LOC'}, {'text': '@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [8]:
size = len(dev_n_test)
dev, test = np.split(dev_n_test,
    [int(size*0.5)]
)
dev, test = list(dev), list(test)

print(len(dev)+len(test))
print(len(dev))
print(len(test))
print(dev[:3])
print(test[:3])

323
161
162
[{'text': 'Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'label': 'O O O B-LOC O O O O O O O O O O'}, {'text': 'Maldives offers financial assistance to flood hit Sri Lanka', 'label': 'O O O O O O O B-LOC I-LOC'}, {'text': '@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O'}]
[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'}, {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .', 'label': 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'}, {'text': '@ narendr

In [9]:
test_strings = list(map(
    lambda x: [x['text'], x['label']], test
))
print(len(test_strings))
print(test_strings[:3])

162
[['# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala', 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'], ['India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .', 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'], ['@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods', 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O']]


In [10]:
dev_strings = list(map(
    lambda x: [x['text'], x['label']], dev
))
print(len(dev_strings))
print(dev_strings[:3])

161
[['Medical camp at Mathugama today . # FloodSL @ ippfsar # srhr # volunteers', 'O O O B-LOC O O O O O O O O O O'], ['Maldives offers financial assistance to flood hit Sri Lanka', 'O O O O O O O B-LOC I-LOC'], ['@ AnoopCilantro @ CANSouthAsia The # FloodSL death toll now officially passed 200 . Several people missing . Relief operations delayed with intermittent heavy showers', 'O O O O O O O O O O O O O O O O O O O O O O O O O']]


In [11]:
test_cyclone = json.load(open('test_IDRISI-RE-cyclone.json'))

print(len(test_cyclone))
print(test_cyclone[:3])

1038
[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .', 'label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'}, {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'}, {'text': 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai', 'label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [12]:
test_cyclone_strings = list(map(
    lambda x: [x['text'], x['label']], test_cyclone
))
test_cyclone_strings[:3]

[['I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'],
 ['Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'],
 ['Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai',
  'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O']]

In [13]:
test_hurricane = json.load(open('test_IDRISI-RE-hurricane.json'))

print(len(test_hurricane))
print(test_hurricane[:3])

1038
[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .', 'label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'}, {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed', 'label': 'B-LOC O O O O O O O B-LOC O O O O O O O O'}, {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube', 'label': 'O O O O O O O O O O O O O O O O O O O O O O O O'}]


In [14]:
test_hurricane_strings = list(map(
    lambda x: [x['text'], x['label']], test_hurricane
))
test_hurricane_strings[:3]

[['BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'],
 ['Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'B-LOC O O O O O O O B-LOC O O O O O O O O'],
 ['Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'O O O O O O O O O O O O O O O O O O O O O O O O']]

---
#### Prepare data for prediction

In [15]:
predict_floods = list(map(lambda x: x[0], test_strings))
predict_floods[:3]

['# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
 '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods']

In [16]:
predict_cyclone = list(map(lambda x: x[0], test_cyclone_strings))
predict_cyclone[:3]

['I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters affected by Cyclone Idai @ UNZimbabwe @ HigherLifeFDN @ WFP @ ChengetoAfrica @ IRCEurope # CycloneIdai']

In [17]:
predict_hurricane = list(map(lambda x: x[0], test_hurricane_strings))
predict_hurricane[:3]

['BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube']

---
#### Auxiliary functions for human evaluation

In [18]:
def sync_label(label, comparison, pad=False):
    tokens_lab = label.split()
    tokens_com = comparison.split()
    for i, c in enumerate(tokens_lab):
        if pad and c == "O" and tokens_com[i] != "O":
            tokens_lab[i] = "  O  "
        if c == "S-LOC":
            tokens_lab[i] = "B-LOC"
        if c == "E-LOC":
            tokens_lab[i] = "I-LOC"
    label = " ".join(tokens_lab)
    return label

In [19]:
def span_and_avgs(dists):
    if not dists:
        return None, None, None, None
    dists = sorted(dists)
    return dists[0], dists[-1], np.mean(dists), np.median(dists)

In [20]:
def synced_trigger_preds(predict, preds, test_data, pad=False):
    return list(map(lambda v,w,x,y,z:
        {"text":v, "pred_label":sync_label(w, x["label"], pad=pad), "true_label":sync_label(x["label"], w, pad=pad), "key":y, "dist":z},
        predict, preds["class_preds"], test_data, preds["trigger_preds"], preds["distance_preds"]
    ))

In [21]:
def categorize_dists(results):
    correct_dists = []
    wrong_dists = []
    false_negative_dists = []
    false_positive_dists = []
    false_mixed_dists = []
    for result in results:
        if result["pred_label"] == result["true_label"]:
            correct_dists.append(result["dist"])
            continue
        else:
            wrong_dists.append(result["dist"])
        if len(result["pred_label"]) < len(result["true_label"]):
            false_negative_dists.append(result["dist"])
            continue
        if len(result["pred_label"]) > len(result["true_label"]):
            false_positive_dists.append(result["dist"])
            continue
        false_mixed_dists.append(result["dist"])
    return correct_dists, wrong_dists, false_negative_dists, false_positive_dists, false_mixed_dists

In [22]:
def evaluate_chunk(params, instances, threshold, interval, correct):
    print(f"Interval: {threshold-interval:.2f}-{threshold:.2f}. Instances: {len(instances)}. Accuracy: {correct / len(instances):.4f}")
    response = requests.post(
        FAST_API_URL + '/training/trigger/eval/',
        json={
            'params': params,
            'eval_data': instances,
        }
    )
    print(response.text)

In [23]:
def eval_per_dist_interval(results, interval):
    threshold = interval
    correct = 0
    instances = []

    for result in results:
        while result["dist"] > threshold:
            if instances:
                evaluate_chunk(params_trigger_eval, instances, threshold, interval, correct)
                instances = []
                correct = 0
            threshold += interval
        instances.append([result["text"], result["true_label"]])
        if result["pred_label"] == result["true_label"]:
            correct += 1
    evaluate_chunk(params_trigger_eval, instances, threshold, interval, correct)

In [24]:
def trigkey_dists(results):
    dists_by_trigkey = {}
    for result in results:
        if result["key"] not in dists_by_trigkey:
            dists_by_trigkey[result["key"]] = [result["dist"]]
        else:
            dists_by_trigkey[result["key"]].append(result["dist"])
    return dists_by_trigkey

In [25]:
def evaluate_trigkey(params, trigkey, instances, correct):
    print(f"Trigkey: {trigkey}. Instances: {len(instances)}. Accuracy: {correct / len(instances):.4f}")
    response = requests.post(
        FAST_API_URL + '/training/trigger/eval/',
        json={
            'params': params,
            'eval_data': instances,
        }
    )
    print(response.text)

In [26]:
def eval_per_trigkey(results):
    correct = 0
    instances = {}
    
    for result in results:
        trigkey = result["key"]
        if trigkey in instances:
            instances[trigkey][1].append([result["text"], result["true_label"]])
            if result["pred_label"] == result["true_label"]:
                instances[trigkey][0] += 1
        else:
            c = 0
            if result["pred_label"] == result["true_label"]:
                c = 1
            instances[trigkey] = [c, [[result["text"], result["true_label"]]]]
    
    for trigkey, instance in instances.items():
        evaluate_trigkey(params_trigger_eval, trigkey, instance[1], instance[0])

In [27]:
def entityless_predictions_per_trigkey(results):
    trigkey_preds = {}
    for result in results:
        trigkey = result["key"]
        p = 0
        if "B" not in result["pred_label"]:
            p = 1
        t = 0
        if "B" not in result["true_label"]:
            t = 1
        if trigkey in trigkey_preds:
            trigkey_preds[trigkey]["total"] += 1
            trigkey_preds[trigkey]["pred"] += p
            trigkey_preds[trigkey]["true"] += t
        else:
            trigkey_preds[trigkey] = {"total": 1, "pred": p, "true": t}
    return trigkey_preds

In [28]:
def label_phrases(dataset):
    phrases = []
    for sample in dataset:
        tokens = sample['text'].split()
        labels = sample['label'].split()
        phrase = []
        for i, label in enumerate(labels):
            if label != "O":
                if phrase and label == "B-LOC":
                    phrase = " ".join(phrase)
                    phrases.append(phrase)
                    phrase = []
                phrase.append(tokens[i])
        if phrase:
            phrase = " ".join(phrase)
            phrases.append(phrase)
    return phrases

In [29]:
def trigger_phrases(dataset):
    phrases = []
    for sample in dataset:
        tokens = sample['text'].split()
        triggers = sample['explanation'].split()
        phrase = []
        enum = 0
        for i, trigger in enumerate(triggers):
            if trigger != "O":
                if phrase and trigger[-1] != str(enum):
                    phrase = " ".join(phrase)
                    phrases.append(phrase)
                    phrase = []
                    enum += 1
                phrase.append(tokens[i])
        if phrase:
            phrase = " ".join(phrase)
            phrases.append(phrase)
    return phrases

---
#### Check dataset

In [30]:
# Unique Tweets

#--Event indices--
# Sri Lanka :429
#Midwest US 429:1595
#    Kerala 1595:2990
#  Maryland 2990:
tweets = {}
for sample in train_with_triggers:
    if sample["text"] not in tweets:
        tweets[sample["text"]] = True
len(tweets)

2281

In [31]:
# Named Entities
nameds = label_phrases(train_with_triggers)
nameds[:10]

['Sri Lanka',
 'Colombo',
 'Rathmalana',
 'srilanka',
 'srilanka',
 'Bangladesh',
 'Sri Lanka',
 'Gampaha',
 'SriLanka',
 'SriLanka']

In [32]:
len(nameds)

3106

In [33]:
phrase_count = {}
for phrase in nameds:
    if phrase in phrase_count:
        phrase_count[phrase] += 1
    else:
        phrase_count[phrase] = 1
phrase_count = sorted(list(phrase_count.items()), key=lambda k: k[1], reverse=True)
phrase_count

[('Nebraska', 708),
 ('Kerala', 549),
 ('Maryland', 258),
 ('SriLanka', 108),
 ('Ellicott City', 99),
 ('Sri Lanka', 76),
 ('Iowa', 76),
 ('India', 53),
 ('kerala', 42),
 ('srilanka', 32),
 ('Omaha', 22),
 ('Missouri', 17),
 ('Ernakulam', 17),
 ('Baltimore', 17),
 ('Kansas', 16),
 ('Chengannur', 16),
 ('Kodagu', 13),
 ('South Dakota', 12),
 ('Alappuzha', 12),
 ('UAE', 11),
 ('Karnataka', 11),
 ('Bangladesh', 10),
 ('Israel', 10),
 ('Wisconsin', 10),
 ('Pathanamthitta', 10),
 ('Howard County', 10),
 ('EllicottCity', 10),
 ('NEBRASKA', 9),
 ('Ohio', 9),
 ('Columbus', 9),
 ('Mumbai', 9),
 ('Chennai', 9),
 ('nebraska', 8),
 ('Aluva', 8),
 ('Kochi', 8),
 ('Odisha', 8),
 ('Ellicott', 8),
 ('Patapsco River', 8),
 ('China', 7),
 ('Kalutara', 7),
 ('Pakistan', 7),
 ('Indian', 7),
 ('Texas', 7),
 ('Nebraskas', 7),
 ('USA', 7),
 ('Delhi', 7),
 ('Ratnapura', 6),
 ('Florida', 6),
 ('Puerto Rico', 6),
 ('Fremont', 6),
 ('North Dakota', 6),
 ('Gujarat', 6),
 ('KERALA', 6),
 ('Maharashtra', 6),
 ('Ben

In [34]:
len(phrase_count)

529

In [35]:
named_tokens = " ".join(nameds).split()
named_tokens[:10]

['Sri',
 'Lanka',
 'Colombo',
 'Rathmalana',
 'srilanka',
 'srilanka',
 'Bangladesh',
 'Sri',
 'Lanka',
 'Gampaha']

In [36]:
len(named_tokens)

3475

In [37]:
token_count = {}
for token in named_tokens:
    if token in token_count:
        token_count[token] += 1
    else:
        token_count[token] = 1
token_count = sorted(list(token_count.items()), key=lambda k: k[1], reverse=True)
token_count

[('Nebraska', 710),
 ('Kerala', 549),
 ('Maryland', 261),
 ('City', 112),
 ('SriLanka', 109),
 ('Ellicott', 107),
 ('Sri', 84),
 ('Iowa', 78),
 ('Lanka', 77),
 ('India', 53),
 ('kerala', 42),
 ('srilanka', 32),
 ('Omaha', 23),
 ('Missouri', 20),
 ('Dakota', 18),
 ('Ernakulam', 18),
 ('Baltimore', 18),
 ('Kansas', 16),
 ('Chengannur', 16),
 ('County', 15),
 ('River', 15),
 ('Kodagu', 13),
 ('South', 12),
 ('Alappuzha', 12),
 ('Howard', 12),
 ('UAE', 11),
 ('Karnataka', 11),
 ('Bangladesh', 10),
 ('Israel', 10),
 ('Wisconsin', 10),
 ('Pathanamthitta', 10),
 ('EllicottCity', 10),
 ('NEBRASKA', 9),
 ('Ohio', 9),
 ('Columbus', 9),
 ('Mumbai', 9),
 ('Chennai', 9),
 ('nebraska', 8),
 ('North', 8),
 ('Aluva', 8),
 ('Kochi', 8),
 ('Odisha', 8),
 ('Patapsco', 8),
 ('Matara', 7),
 ('China', 7),
 ('Kalutara', 7),
 ('Pakistan', 7),
 ('Indian', 7),
 ('Texas', 7),
 ('Fremont', 7),
 ('Nebraskas', 7),
 ('USA', 7),
 ('Delhi', 7),
 ('Virginia', 7),
 ('Ratnapura', 6),
 ('Florida', 6),
 ('Puerto', 6),
 ('R

In [38]:
len(token_count)

591

In [39]:
# Trigger Entities
triggers = trigger_phrases(train_with_triggers)
triggers[:10]

['relief efforts in',
 'conference in',
 'donations to',
 'Air',
 'Force Camp',
 'donates',
 'flood victims',
 'In',
 'camp',
 'travelled from North to South of']

In [40]:
len(triggers)

5061

In [41]:
unique_trigger_entities = {}
for phrase in triggers:
    if phrase in unique_trigger_entities:
        unique_trigger_entities[phrase] += 1
    else:
        unique_trigger_entities[phrase] = 1
unique_trigger_entities = sorted(list(unique_trigger_entities.items()), key=lambda k: k[1], reverse=True)
unique_trigger_entities

[('in', 304),
 ('Nebraska', 107),
 ('Iowa', 100),
 ('floods', 89),
 ('Maryland', 72),
 ('flooding', 67),
 ('flood victims', 59),
 ('flood', 58),
 ('flooding in', 57),
 ('to', 54),
 ('state', 47),
 ('from', 46),
 ('flood relief', 39),
 ('Flood Relief', 34),
 ('floods in', 31),
 ('people of', 29),
 ('Missouri', 29),
 ('South Dakota', 28),
 ('homes', 28),
 ('roads', 27),
 ('communities', 26),
 ('Kerala', 25),
 ('states', 23),
 ('District', 21),
 ('Wisconsin', 21),
 ('Mississippi', 21),
 ('district', 20),
 ('across', 20),
 ('farmers', 19),
 ('areas', 17),
 ('Floods', 17),
 ('residents', 17),
 ('flood-hit', 16),
 ('at', 16),
 ('Wyoming', 16),
 ('relief camps', 16),
 ('Ellicott City', 16),
 ('city', 15),
 ('Montana', 15),
 ('CM', 15),
 ('Alappuzha', 15),
 ('In', 14),
 ('community', 14),
 ('town', 14),
 ('Illinois', 14),
 ('Ernakulam', 14),
 ('bridges', 13),
 ('livestock', 13),
 ('help', 12),
 ('Kansas', 12),
 ('Midwest', 12),
 ('area', 12),
 ('districts', 11),
 ('Flood', 11),
 ('houses', 11)

In [42]:
len(unique_trigger_entities)

1909

In [43]:
trigger_tokens = " ".join(triggers).split()
trigger_tokens[:10]

['relief',
 'efforts',
 'in',
 'conference',
 'in',
 'donations',
 'to',
 'Air',
 'Force',
 'Camp']

In [44]:
len(trigger_tokens)

8127

In [45]:
unique_trigger_tokens = {}
for token in trigger_tokens:
    if token in unique_trigger_tokens:
        unique_trigger_tokens[token] += 1
    else:
        unique_trigger_tokens[token] = 1
unique_trigger_tokens = sorted(list(unique_trigger_tokens.items()), key=lambda k: k[1], reverse=True)
unique_trigger_tokens

[('in', 964),
 ('flood', 277),
 ('flooding', 222),
 ('of', 204),
 ('floods', 179),
 ('to', 156),
 ('Nebraska', 155),
 ('Iowa', 124),
 ('relief', 121),
 ('from', 119),
 ('victims', 93),
 ('Flood', 89),
 ('City', 89),
 ('Maryland', 88),
 ('flash', 80),
 ('state', 74),
 ('Ellicott', 71),
 ('Relief', 57),
 ('across', 57),
 ('areas', 55),
 ('at', 54),
 ('people', 53),
 ('Dakota', 49),
 ('camps', 46),
 ('communities', 43),
 ('South', 37),
 ('homes', 37),
 ('for', 36),
 ('flood-hit', 35),
 ('Floods', 34),
 ('through', 34),
 ('Kerala', 34),
 ('Missouri', 32),
 ('states', 32),
 ('district', 31),
 ('affected', 29),
 ('roads', 29),
 ('North', 28),
 ('area', 27),
 ('District', 26),
 ('districts', 25),
 ('’', 25),
 ('s', 25),
 ('residents', 24),
 ('In', 23),
 ('counties', 23),
 ('Flooding', 23),
 ('State', 22),
 ('Flash', 22),
 ('hit', 21),
 ('Wisconsin', 21),
 ('city', 21),
 ('Mississippi', 21),
 ('Ernakulam', 20),
 ('community', 19),
 ('farmers', 19),
 ('are', 19),
 ('Alappuzha', 19),
 ('by', 17)

In [46]:
len(unique_trigger_tokens)

1409

---
---
# 2. Training

Check [this](../fast_api/json_schema.py#L516) json schema for a list of all parameters.

Check [this](../model_training/internal_api/defaults.py) for default values.

## 2.1. Standard training

### Define training parameters

#### Standard

In [47]:
params_standard_training = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity REcognition
    "task": "ner",
    # when "True" data has to be passed, 
    # "False" when re-training or the data was processed earlier and can be retrieved
    "build_data": True,
    "num_epochs": 10,
    # training batch size
    "batch_size": 10,
    # learning rate
    "learning_rate": 0.01,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
    # random seed
    "seed": 1337,
}

#### TriggerNER

In [48]:
params_trigger_training = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity REcognition
    "task": "ner",
    # when "True" data has to be passed, 
    # "False" when re-training or the data was processed earlier and can be retrieved
    "build_data": True,
    # number of epochs
    "num_epochs": 10,
    # number of pre training epochs
    "pre_train_num_epochs": 20,
    # training batch size
    "batch_size": 10,
    # learning rate
    "learning_rate": 0.01,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
    # random seed
    "seed": 1337,
}

---
## 2.2. TriggerNER training

### Run model training

#### Standard

In [49]:
# depending on input size, and computing environment this might take time.
# please check FAST API logs for updates
response = requests.post(
    FAST_API_URL + '/training/standard/ner/api/',
    json={
        'params': params_standard_training,
        'labeled_data': train_without_triggers,
        'dev_data': dev,
        'eval_data': test,
    }
)
# JSON with "save_path" key is returned when successful
response.text

'{"save_path":"/home/yoriyari/LEAN-LIFE/model_api/fast_api/../model_training/trigger_ner/utilities/../../generated_data/saved_models/naive_idrisi_glove.6B.100d_1337_-1.0"}'

#### TriggerNER

In [50]:
# depending on input size, and computing environment this might take time.
# please check FAST API logs for updates
response = requests.post(
    FAST_API_URL + '/training/trigger/api/',
    json={
        'params': params_trigger_training,
        'explanation_triples': train_with_triggers,
        'dev_data': dev,
        'eval_data': test,
    }
)
# JSON with "save_path" key is returned when successful
response.text

'{"save_path":"/home/yoriyari/LEAN-LIFE/model_api/fast_api/../model_training/trigger_ner/utilities/../../generated_data/saved_models/trigger_idrisi_trigger_glove.6B.100d_1337_-1.0"}'

---
---
# 3. Evaluation

## 3.1. Accuracy

### Define evaluation parameters

#### Standard

In [51]:
params_standard_eval = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # evaluation batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

#### TriggerNER

In [52]:
params_trigger_eval = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # evaluation batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

---
### Evaluate Standard performance

#### Floods dev

In [53]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': dev_strings,
    }
)

response.text

'{"precision":98.85057471264368,"recall":75.77092511013215,"f1":85.785536159601}'

#### Floods test

In [54]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_strings,
    }
)

response.text

'{"precision":94.44444444444444,"recall":57.6271186440678,"f1":71.57894736842105}'

#### Cyclone

In [55]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_cyclone_strings,
    }
)

response.text

'{"precision":86.66666666666667,"recall":2.880354505169867,"f1":5.575411007862759}'

#### Hurricane

In [56]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/eval/',
    json={
        'params': params_standard_eval,
        'eval_data': test_hurricane_strings,
    }
)

response.text

'{"precision":96.25468164794007,"recall":25.27040314650934,"f1":40.03115264797508}'

---
### Evaluate TriggerNER performance

#### Floods dev

In [57]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': dev_strings,
    }
)

response.text

'{"precision":89.90825688073394,"recall":86.34361233480176,"f1":88.08988764044945}'

#### Floods test

In [58]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_strings,
    }
)

response.text

'{"precision":88.53211009174312,"recall":81.77966101694916,"f1":85.02202643171807}'

#### Cyclone

In [59]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_cyclone_strings,
    }
)

response.text

'{"precision":86.65919282511211,"recall":57.09010339734121,"f1":68.83348174532503}'

#### Hurricane

In [60]:
response = requests.post(
    FAST_API_URL + '/training/trigger/eval/',
    json={
        'params': params_trigger_eval,
        'eval_data': test_hurricane_strings,
    }
)

response.text

'{"precision":81.58379373848987,"recall":43.55948869223206,"f1":56.794871794871796}'

---
## 3.2. Predictions

### Define prediction parameters

#### Standard

In [61]:
params_standard_prediction = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_standard",
    # a string name representing the dataset name
    "dataset_name": "idrisi",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # prediction batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

#### TriggerNER

In [62]:
params_trigger_prediction = {
    # a string name representing the model name
    "experiment_name": "idrisi_ner_soft_match",
    # a string name representing the dataset name
    "dataset_name": "idrisi_trigger",
    # task type - "ner" for Named Entity Recognition
    "task": "ner",
    # prediction batch size
    "batch_size": 10,
    # embedding to be used for training. usual default: "glove.6B.100d"
    "embeddings": "glove.6B.100d",
    # embedding dimension of the "embeddings" provided
    "emb_dim": 100,
    # number of hidden dimensions
    "hidden_dim": 200,
}

---
### Fetch Standard predictions

#### Floods test

In [63]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_floods,
    }
)

preds_standard_floods = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_floods, preds_standard_floods["class_preds"], test
))

[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'O O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500 crores today that makes it 920 crores against the demand of 1000 crores . # KeralaFloodRelief # KeralaFloods',
  'pred_label': 'O O O O O O 

#### Cyclone

In [64]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_cyclone,
    }
)

preds_standard_cyclone = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_cyclone, preds_standard_cyclone["class_preds"], test_cyclone
))

[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O   O     O   O   O   O',
  'true_label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O'},
 {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O   O     O   O O O O O O O   O     O  ',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC'},
 {'text': 'Thanks to staff and patrons of @ ZimLibrary_zw and citizens of Zimre Park for the generous clothing donations to our brothers and sisters 

#### Hurricane

In [65]:
response = requests.post(
    FAST_API_URL + '/training/standard/ner/predict/',
    json={
        'params': params_standard_prediction,
        'prediction_data': predict_hurricane,
    }
)

preds_standard_hurricane = response.json()

list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_hurricane, preds_standard_hurricane["class_preds"], test_hurricane
))

[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'pred_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'pred_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'true_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O'},
 {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O'},
 {'text': 'Hurricane Dori

---
### Fetch TriggerNER predictions

#### Floods test

In [66]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_floods,
    }
)

preds_trigger_floods = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_floods, preds_trigger_floods["class_preds"], test, preds_trigger_floods["trigger_preds"], preds_trigger_floods["distance_preds"]
))

[{'text': '# KeralaFloods : 21-Year-Old College Student Hanan Hamid , Trolled For Selling Fish , Donates Rs 1.5 lakh to CM ’ s Relief Fund . Hats Off to U # Hanan ὄF # Meem4Kerala',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Floods/disaster',
  'dist': 0.08039283007383347},
 {'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O   O   O O B-LOC O O O O O O',
  'key': 'North Dakota headed for',
  'dist': 0.4281405508518219},
 {'text': '@ narendramodi Your prompt response is highly appreciable PM Modi ! HM @ rajnathsingh sanctioned 100 crore immidiate relief then released another 320 crores & now youve released another 500

#### Cyclone

In [67]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_cyclone,
    }
)

preds_trigger_cyclone = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_cyclone, preds_trigger_cyclone["class_preds"], test_cyclone, preds_trigger_cyclone["trigger_preds"], preds_trigger_cyclone["distance_preds"]
))

[{'text': 'I fear that the emergency situation caused by # cycloneidai is distracting us from the escalating insurgency in Cabo Delgado # Mozambique .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O B-LOC   O   O B-LOC O',
  'true_label': 'O O O O O O O O O O O O O O O O O O B-LOC I-LOC O B-LOC O',
  'key': 'Flood Relief Program',
  'dist': 0.7129563689231873},
 {'text': 'Last Thursday police officer Constable Edward Dhumukwa ( 32 ) stationed at the Silver Stream command centre was arrested and appeared in court for alleged looting of donations valued at tens of thousands of United States dollars earmarked for # cycloneIdai victims in Chipinge .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC   O  ',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC I-LOC O O O O O O O B-LOC I-LOC',
  'key': 'food relief',
  'dist': 0.02692124806344509},
 {'text': 'Thanks to staff and 

#### Hurricane

In [68]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': predict_hurricane,
    }
)

preds_trigger_hurricane = response.json()

list(map(lambda v,w,x,y,z:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True), "key":y, "dist":z},
    predict_hurricane, preds_trigger_hurricane["class_preds"], test_hurricane, preds_trigger_hurricane["trigger_preds"], preds_trigger_hurricane["distance_preds"]
))

[{'text': 'BREAKING : Governor McMaster has declared a State of Emergency in South Carolina ahead of Hurricane Dorian . Given the strength and unpredictability of the storm , we must prepare for every possible scenario , ” he said .',
  'pred_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC I-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'floods in Ellicott City',
  'dist': 0.015230085700750351},
 {'text': 'Alabama National Guard Ready to Send Support to Florida to Assist with Hurricane Dorian Relief If Needed',
  'pred_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'true_label': 'B-LOC O O O O O O O B-LOC O O O O O O O O',
  'key': '’ s Rural America Relief',
  'dist': 0.7963294982910156},
 {'text': 'Ongoing damage proving to be greater than expected . Local 10s Jenise Fernandez reports live from the eye of Hurricane Dorian via @ YouTube',
  'pred_label': 'O O O O 

---
## 3.3. Experiments

### All results

#### Floods test

In [69]:
results_floods = synced_trigger_preds(predict_floods, preds_trigger_floods, test)
results_floods = sorted(results_floods, key=lambda k: k["dist"])
results_floods

[{'text': 'Kerala editions of @ NewIndianXpress today . It could nit be distributed in many flood-hit areas but then reading the newspapers definitely not the priority in those areas . Our reporters are also doing their bit to bring the relief teams to those stranded in the marooned towns .',
  'pred_label': 'B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Residents of',
  'dist': 0.004569615703076124},
 {'text': 'RT @ _RKSumit : @ PerennialCold @ AgentSaffron Kerala govt donated 5 crores to Pakistan for flood relief in 2010 .',
  'pred_label': 'O O O O O O O O B-LOC O O O O O B-LOC O O O O O O',
  'true_label': 'O O O O O O O O B-LOC O O O O O B-LOC O O O O O O',
  'key': 'in flood affected areas across',
  'dist': 0.00531335175037384},
 {'text': 'The devastating flooding in Maryland over Memorial Day

#### Cyclone

In [70]:
results_cyclone = synced_trigger_preds(predict_cyclone, preds_trigger_cyclone, test_cyclone)
results_cyclone = sorted(results_cyclone, key=lambda k: k["dist"])
results_cyclone

[{'text': 'My father died my brother was injured we have no house to stay but I am happy to be assisting my community that was affected . Despite suffering tragedy and loss as a result of # CycloneIdai , Wadzanai is volunteering to help others impacted by the storm in # Zimbabwe .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O',
  'true_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-LOC O',
  'key': 'rain forecast',
  'dist': 0.003959770314395428},
 {'text': 'As much as 90 % of # Beira , the fourth largest city in # Mozambique was destroyed by # CycloneIdai . The president of Mozambique has called this a real humanitarian disaster of large proportions . People need help now , please donate at .',
  'pred_label': 'O O O O O O O O O O O O O O O B-LOC O O O O O O O O O B-LOC O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O O O O 

#### Hurricane

In [71]:
results_hurricane = synced_trigger_preds(predict_hurricane, preds_trigger_hurricane, test_hurricane)
results_hurricane = sorted(results_hurricane, key=lambda k: k["dist"])
results_hurricane

[{'text': 'The folks in both Carolinas should start prepping and it looks like there may not be much left of the Bahamas when this storm is done with the area . That sucks so bad for the people living there . I hope they get luckier than it appears they will .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O B-LOC O O O O O O O O O O O O O O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Department of Health and Human Services',
  'dist': 0.0041004749946296215},
 {'text': 'As # HurricaneDorian continues to threaten the southeast , Best Friends and partners have been transporting animals out of harms way , but we can use the communitys help ! If youre in the southeast and able to transport animals to safety , sign up through .',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': '

---
### Error Analysis

#### Standard Floods test

In [72]:
errors_standard_floods = list(map(lambda v,w,x:
    {"text":v, "pred_label":sync_label(w, x["label"], pad=True), "true_label":sync_label(x["label"], w, pad=True)},
    predict_floods, preds_standard_floods["class_preds"], test
))
errors_standard_floods = [t for t in errors_standard_floods if t["pred_label"] != t["true_label"]]
errors_standard_floods

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'O O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'true_label': 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': 'FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  'pred_label': 'O O O O B-LOC O O O   O   O O   O   O O O O O O O O O O O O O O O O O O   O   O O',
  'true_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O'},
 {'text': 'Bollywood Khans donated crores to Pakistan floods now silent on Kerala floods . Proud of u Prabhas',
  'pred_label': 'O O O O O   O   O O O O B-LOC O O O O O O',
  'true_label': 'O O O O O B-LOC O O O O B-LOC O O O O O O'},
 {'text': '. @ narendramodi declining UAE 

#### TriggerNER Floods test

In [73]:
errors_trigger_floods = synced_trigger_preds(predict_floods, preds_trigger_floods, test, pad=True)
errors_trigger_floods = [t for t in errors_trigger_floods if t["pred_label"] != t["true_label"]]
errors_trigger_floods

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'pred_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O',
  'true_label': '  O   O O O O O O O O O O B-LOC O   O   O O B-LOC O O O O O O',
  'key': 'North Dakota headed for',
  'dist': 0.4281405508518219},
 {'text': '. @ narendramodi declining UAE monetary aid for # KeralaFloodRelief n releasing insufficient Funds to # RebuildKerala is a case of ◼️CARDINAL SIN ◼️POLITICAL HARAKIRI ◼️ACT OF INHUMANITY ◼️TERRIBLE GOVERNANCE ◼️GOVTs ASSAULT ON CITIZENS ◼️MENTAL BANKRUPTCY OF BJP GOVT',
  'pred_label': 'O O O O   O   O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'true_label': 'O O O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'agencies',
  'dist': 0.530505895614624},
 {'text': 'Comrade Saji Cherian , our MLA from Chengannur constituency who is par

#### Union of Standard and TriggerNER Floods test

In [74]:
errors = []
for s in errors_standard_floods:
    overlap = False
    for t in errors_trigger_floods:
        if s["text"] == t["text"]:
            errors.append({"text":s["text"], "true_label":s["true_label"], "stnd_label":s["pred_label"], "trig_label":t["pred_label"]})
            overlap = True
            break
    if not overlap:
        errors.append({"text":s["text"], "true_label":s["true_label"], "stnd_label":s["pred_label"], "trig_label":s["true_label"]})
for t in errors_trigger_floods:
    if t["text"] in [error["text"] for error in errors]:
        continue
    overlap = False
    for s in errors_standard_floods:
        if s["text"] == t["text"]:
            errors.append({"text":t["text"], "true_label":t["true_label"], "stnd_label":s["pred_label"], "trig_label":t["pred_label"]})
            overlap = True
            break
    if not overlap:
        errors.append({"text":t["text"], "true_label":t["true_label"], "stnd_label":t["true_label"], "trig_label":t["pred_label"]})
errors

[{'text': 'India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  'true_label': 'O O O O O O O O O O O B-LOC O O O O B-LOC O O O O O O',
  'stnd_label': 'O O O O O O O O O O O   O   O O O O   O   O O O O O O',
  'trig_label': 'B-LOC O O O O O O O O O O B-LOC O B-LOC O O B-LOC O O O O O O'},
 {'text': 'FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  'true_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O',
  'stnd_label': 'O O O O B-LOC O O O   O   O O   O   O O O O O O O O O O O O O O O O O O   O   O O',
  'trig_label': 'O O O O B-LOC O O O B-LOC O O B-LOC O O O O O O O O O O O O O O O O O O B-LOC O O'},
 {'text': 'Bollywood Khans donated crores to Pakistan floods now silent on Kerala floods . Prou

#### Entities which were correctly predicted by one model but not the other

In [75]:
detected_by_standard = []
detected_by_triggerner = []
hold_for_i_loc = None
for error in errors:
    tokens = error["text"].split()
    true = error["true_label"].split()
    stnd = error["stnd_label"].split()
    trig = error["trig_label"].split()
    for i, label in enumerate(true):
        if label.startswith("B-"):
            if hold_for_i_loc:
                if hold_for_i_loc[0]:
                    detected_by_standard.append(hold_for_i_loc[1:])
                else:
                    detected_by_triggerner.append(hold_for_i_loc[1:])
                hold_for_i_loc = None
            if label == stnd[i] and label != trig[i]:
                hold_for_i_loc = (True, error["text"], i, tokens[i])
            if label == trig[i] and label != stnd[i]:
                hold_for_i_loc = (False, error["text"], i, tokens[i])
        if label == "O" and hold_for_i_loc:
            if hold_for_i_loc[0]:
                detected_by_standard.append(hold_for_i_loc[1:])
            else:
                detected_by_triggerner.append(hold_for_i_loc[1:])
            hold_for_i_loc = None
        if label.startswith("I-") and hold_for_i_loc:
            if hold_for_i_loc[0] and label != stnd[i]:
                hold_for_i_loc = None
            elif not hold_for_i_loc[0] and label != trig[i]:
                hold_for_i_loc = None
print(len(detected_by_standard))
print(len(detected_by_triggerner))

6
63


In [76]:
detected_by_standard

[('# KeralaSOS Kadavantra regional sports club , ( kadavantra ) has excess food so in case some other camps which are in need can contact them Phone no : -9645221111 , 9061110000 # kerala # KeralaFloods # KeralaFloodRelief # KeralaFloodRescue @ CMOKerala # KeralaReliefFund # verified',
  33,
  'kerala'),
 ('I did my part for our fellow Indians . # Kerala # KeralaFloodRelief # KeralaFloods # donate # whatyoucan',
  10,
  'Kerala'),
 ('Earlier # Apple also donated ₹ 7 Crore for Kerala Flood Relief . # Kerala # KeralaFloodRelief # GoogleForIndia',
  14,
  'Kerala'),
 ('RT @ CAChirag : Should Central Govt Accept Financial help of 700 Cr from UAE ? # KeralaFloods # UAE # Kerala',
  21,
  'Kerala'),
 ('Salem @ ikamalhaasan Fans provided materials for # KeralaFloods # KeralaReliefFund # Kerala # KeralaFloods2018 # KamalHaasan',
  12,
  'Kerala'),
 ('TVM UPDATE : SUPPLIES NEEDED AT KINFRA APPAREL PARK READ TO EAT FOOD PACKETS AND WATER NEEDED URGENTLY # KeralaFloods # KeralaFloods2018 # DoForK

In [77]:
detected_by_triggerner

[('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  11,
  'Kerala'),
 ('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  16,
  'India'),
 ('FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  8,
  'Bangalore'),
 ('FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',
  11,
  'Kerala'),
 ('FIRST TEMPORARY SHELTERS IN KERALA FOR FLOOD AFFECTED Bangalore Cares for Kerala undertakes the responsibility to build temporary shelters for all the 520 families who lost houses in flood in Wayanad Dt -',

#### False positives

In [78]:
fps_by_standard = []
fps_by_triggerner = []
hold_for_i_loc = None
for error in errors:
    tokens = error["text"].split()
    true = error["true_label"].split()
    stnd = error["stnd_label"].split()
    trig = error["trig_label"].split()
    for i, label in enumerate(true):
        if label == "O":
            if stnd[i].startswith("B-"):
                fps_by_standard.append((error["text"], i, tokens[i]))
            if trig[i].startswith("B-"):
                fps_by_triggerner.append((error["text"], i, tokens[i]))
print(len(fps_by_standard))
print(len(fps_by_triggerner))

3
17


In [79]:
fps_by_standard

[('RT @ dinesh_rajini : Anyone please help them . # KeralaSOS , # KeralaFloods , # KeralaFloods , # Kerala , # KeralaFloodRelief',
  19,
  'Kerala'),
 ('My brothers , aunt and Grand-Parents are stuck in Moozhikkakadavu_pariyaram , Chalakkudy . Location - 10.308208,76.351140 . Phone - +918075659446 . Please RT so some1 can help them . Grandparents health getting worse . Havent had proper food in 2 days . # KeralaFloods # KeralaSOS',
  9,
  'Moozhikkakadavu_pariyaram'),
 ('2 ) A team of swayamsevaks takes care of providing food and shelter , especially in Kendriya Vidyalayas and other schools where many have taken refuge . # Sewavibhag # KeralaFloodRelief',
  16,
  'Kendriya')]

In [80]:
fps_by_triggerner

[('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  0,
  'India'),
 ('India has refused to accept overseas donations for flood relief in Kerala , Thailands Ambassador to India Chutintorn Sam Gongsakdi has said .',
  13,
  'Thailands'),
 ('Comrade Saji Cherian , our MLA from Chengannur constituency who is participating in the flood relief work in Kerala . Our resolve to build New Kerala is getting strengthened by active support of the people .',
  7,
  'Chengannur'),
 ('As a relief measure to the people of severely flood hit Kerala , 9 lakhs litres of filtered drinking water sent to kerala by a water special train from Ratlam to Palghat.All 15 tanks filled with potable water & quality of water of each tank tested # helpinghand # KeralaFloodRelief',
  31,
  'Palghat.All'),
 ('My brothers , aunt and Grand-Parents are stuck in Moozhikkakadavu_pariyaram , Chalakkudy . Location - 10.308208,76.351

---
### Distance ranges and averages

#### Floods test

In [81]:
correct, wrong, fns, fps, mixed = categorize_dists(results_floods)

print("ALL:", span_and_avgs(preds_trigger_floods["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.004569615703076124, 1.3695626258850098, 0.36066233004259385, 0.3130962699651718)
COR: (0.004569615703076124, 1.3695626258850098, 0.3572905000917552, 0.2788701057434082) 122
WRO: (0.008965759538114071, 1.0436956882476807, 0.3709464113926515, 0.3333824723958969) 40
FNs: (0.013848718255758286, 0.9771084785461426, 0.36384966570351807, 0.3777579665184021)
FPs: (0.008965759538114071, 1.0436956882476807, 0.39887472837128574, 0.3333824723958969)
MIX: (0.017268365249037743, 0.017268365249037743, 0.017268365249037743, 0.017268365249037743)


#### Cyclone

In [82]:
correct, wrong, fns, fps, mixed = categorize_dists(results_cyclone)

print("ALL:", span_and_avgs(preds_trigger_cyclone["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.003959770314395428, 1.7627986669540405, 0.3753517373682131, 0.30892400443553925)
COR: (0.003959770314395428, 1.7627986669540405, 0.37768326496438853, 0.29959337413311005) 566
WRO: (0.004507747478783131, 1.4828890562057495, 0.37255588012364677, 0.3212398737668991) 472
FNs: (0.004613111726939678, 1.4828890562057495, 0.3684770908142554, 0.32373732328414917)
FPs: (0.004507747478783131, 1.4068384170532227, 0.41276654556821113, 0.3173944652080536)
MIX: (0.00916292704641819, 0.936464250087738, 0.3276103441848567, 0.29372280836105347)


#### Hurricane

In [83]:
correct, wrong, fns, fps, mixed = categorize_dists(results_hurricane)

print("ALL:", span_and_avgs(preds_trigger_hurricane["distance_preds"]))
print("COR:", span_and_avgs(correct), len(correct))
print("WRO:", span_and_avgs(wrong), len(wrong))
print("FNs:", span_and_avgs(fns))
print("FPs:", span_and_avgs(fps))
print("MIX:", span_and_avgs(mixed))

ALL: (0.0041004749946296215, 1.7922614812850952, 0.42284297342286564, 0.3866172134876251)
COR: (0.004321516491472721, 1.7922614812850952, 0.4199360860086902, 0.37036481499671936) 592
WRO: (0.0041004749946296215, 1.7126654386520386, 0.42670144281567246, 0.3945060074329376) 446
FNs: (0.0041004749946296215, 1.7126654386520386, 0.42885971614405966, 0.394020676612854)
FPs: (0.0054724207147955894, 1.150521159172058, 0.42700335880877766, 0.48291903734207153)
MIX: (0.009135786443948746, 0.766053318977356, 0.31804980942979455, 0.30582238733768463)


---
### Distance ranges per trigger key

#### All test datasets

In [84]:
trigkey_dists_all = trigkey_dists(results_floods+results_cyclone+results_hurricane)

In [85]:
for key, value in trigkey_dists_all.items():
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

Residents of (1 instances)
(0.004569615703076124, 0.004569615703076124, 0.004569615703076124, 0.004569615703076124)
in flood affected areas across (30 instances)
(0.00531335175037384, 0.014630381017923355, 0.00749294141617914, 0.006910649826750159)
has extended (8 instances)
(0.004507747478783131, 0.007416040636599064, 0.005849302397109568, 0.005849772831425071)
parishes (6 instances)
(0.00512875197455287, 0.010509267449378967, 0.006966376444324851, 0.0060099950060248375)
should be delivered at (1 instances)
(0.008965759538114071, 0.008965759538114071, 0.008965759538114071, 0.008965759538114071)
Flash floods ripped through Ellicott City (1 instances)
(0.009202221408486366, 0.009202221408486366, 0.009202221408486366, 0.009202221408486366)
in Panchayat Community Hall (1 instances)
(0.00936727412045002, 0.00936727412045002, 0.00936727412045002, 0.00936727412045002)
waters have receded in (5 instances)
(0.007414696738123894, 0.011959951370954514, 0.009494453109800816, 0.009750365279614925)

#### Floods test

In [86]:
trigkey_dists_floods = trigkey_dists(results_floods)

In [87]:
for key, value in trigkey_dists_floods.items():
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

Residents of (1 instances)
(0.004569615703076124, 0.004569615703076124, 0.004569615703076124, 0.004569615703076124)
in flood affected areas across (1 instances)
(0.00531335175037384, 0.00531335175037384, 0.00531335175037384, 0.00531335175037384)
has extended (2 instances)
(0.0056289685890078545, 0.006567317061126232, 0.006098142825067043, 0.006098142825067043)
parishes (2 instances)
(0.008749024011194706, 0.010509267449378967, 0.009629145730286837, 0.009629145730286837)
should be delivered at (1 instances)
(0.008965759538114071, 0.008965759538114071, 0.008965759538114071, 0.008965759538114071)
Flash floods ripped through Ellicott City (1 instances)
(0.009202221408486366, 0.009202221408486366, 0.009202221408486366, 0.009202221408486366)
in Panchayat Community Hall (1 instances)
(0.00936727412045002, 0.00936727412045002, 0.00936727412045002, 0.00936727412045002)
waters have receded in (2 instances)
(0.009750365279614925, 0.011959951370954514, 0.01085515832528472, 0.01085515832528472)
flo

#### Cyclone

In [88]:
trigkey_dists_cyclone = trigkey_dists(results_cyclone)

In [89]:
for key, value in trigkey_dists_cyclone.items():
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

rain forecast (2 instances)
(0.003959770314395428, 0.014596153050661087, 0.009277961682528257, 0.009277961682528257)
rose in (5 instances)
(0.00402640737593174, 0.007497730199247599, 0.00592222074046731, 0.00584847666323185)
has extended (6 instances)
(0.004507747478783131, 0.007416040636599064, 0.00576635558779041, 0.0056994452606886625)
trip to (7 instances)
(0.004871320445090532, 0.009712755680084229, 0.006468436880303281, 0.005645656026899815)
hit (1 instances)
(0.0051209935918450356, 0.0051209935918450356, 0.0051209935918450356, 0.0051209935918450356)
parishes (2 instances)
(0.005391225218772888, 0.0065475692972540855, 0.005969397258013487, 0.005969397258013487)
in flood affected areas across (14 instances)
(0.005785800516605377, 0.01080243568867445, 0.007542393502912351, 0.007356500253081322)
proceed to (3 instances)
(0.0066625261679291725, 0.009201748296618462, 0.008139417817195257, 0.008553978987038136)
will be send (4 instances)
(0.006744896527379751, 0.014438873156905174, 0.0

#### Hurricane

In [90]:
trigkey_dists_hurricane = trigkey_dists(results_hurricane)

In [91]:
for key, value in trigkey_dists_hurricane.items():
    print(f"{key} ({len(value)} instances)")
    print(span_and_avgs(value))

Department of Health and Human Services (1 instances)
(0.0041004749946296215, 0.0041004749946296215, 0.0041004749946296215, 0.0041004749946296215)
rose in (17 instances)
(0.004321516491472721, 0.6100485324859619, 0.17110404216081782, 0.00724363187327981)
livestock across (5 instances)
(0.004816174972802401, 0.011779846623539925, 0.007667819503694773, 0.007027207873761654)
flooding across Nebraska (2 instances)
(0.004940600134432316, 0.007267453242093325, 0.00610402668826282, 0.00610402668826282)
started for (2 instances)
(0.00504837604239583, 0.009131480939686298, 0.007089928491041064, 0.007089928491041064)
parishes (2 instances)
(0.00512875197455287, 0.0054724207147955894, 0.00530058634467423, 0.00530058634467423)
in flood affected areas across (15 instances)
(0.00542091578245163, 0.014630381017923355, 0.007592092112948497, 0.006507672369480133)
eastern (1 instances)
(0.005736097227782011, 0.005736097227782011, 0.005736097227782011, 0.005736097227782011)
Highway 00 near Bellevue (4 in

---
### Performance per trigger key

#### All test datasets

In [92]:
eval_per_trigkey(results_floods+results_cyclone+results_hurricane)

Trigkey: Residents of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: in flood affected areas across. Instances: 30. Accuracy: 0.7000
{"precision":85.71428571428571,"recall":57.14285714285714,"f1":68.57142857142858}
Trigkey: has extended. Instances: 8. Accuracy: 0.5000
{"precision":87.5,"recall":73.68421052631578,"f1":80.0}
Trigkey: parishes. Instances: 6. Accuracy: 0.5000
{"precision":80.0,"recall":57.14285714285714,"f1":66.66666666666666}
Trigkey: should be delivered at. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Flash floods ripped through Ellicott City. Instances: 1. Accuracy: 0.0000
{"precision":75.0,"recall":75.0,"f1":75.0}
Trigkey: in Panchayat Community Hall. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: waters have receded in. Instances: 5. Accuracy: 0.8000
{"precision":80.0,"recall":100.0,"f1":88.88888888888889}
Trigkey: flooding across Nebraska. Instances: 4. A

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Richardson Counties. Instances: 3. Accuracy: 0.3333
{"precision":100.0,"recall":62.5,"f1":76.92307692307692}
Trigkey: Indoor Stadium. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Cmp. Instances: 8. Accuracy: 0.6250
{"precision":100.0,"recall":61.53846153846154,"f1":76.19047619047619}
Trigkey: Medicine Flood Relief Donation Management Center. Instances: 3. Accuracy: 0.3333
{"precision":80.0,"recall":80.0,"f1":80.0}
Trigkey: Hospice and Palliative. Instances: 38. Accuracy: 0.3684
{"precision":84.21052631578947,"recall":30.76923076923077,"f1":45.07042253521127}
Trigkey: flood zone. Instances: 13. Accuracy: 0.2308
{"precision":75.0,"recall":33.33333333333333,"f1":46.153846153846146}
Trigkey: North Dakota headed for. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":100.0,"f1":66.66666666666667}
Trigkey: Came frm. Instances: 4. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.666666666

{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: CPIM of. Instances: 22. Accuracy: 0.8636
{"precision":100.0,"recall":78.57142857142857,"f1":88.0}
Trigkey: reservations in. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: other state. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: traveling through. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":33.33333333333333,"f1":40.0}
Trigkey: haul to. Instances: 7. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: in Howard County. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: populations. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Air Force. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: hospitals. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: naval base

{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: Flash floods in Maryland. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: ’ s Ambassador. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: along. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: other foreign. Instances: 2. Accuracy: 0.5000
{"precision":50.0,"recall":100.0,"f1":66.66666666666667}
Trigkey: North Carolina to Omaha. Instances: 2. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: gave. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: its main street. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":33.33333333333333,"f1":40.0}
Trigkey: RELIEF. Instances: 5. Accuracy: 0.8000
{"precision":100.0,"recall":77.77777777777779,"f1":87.50000000000001}
Trigkey: pray for. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":10

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: trapped in flood hit. Instances: 2. Accuracy: 0.0000
{"precision":80.0,"recall":57.14285714285714,"f1":66.66666666666666}
Trigkey: Breweries throughout. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: pradesh mahila congress. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: health camp. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Locals in. Instances: 3. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Nebraska/Southwest. Instances: 5. Accuracy: 0.6000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: American Red Cross of Nebraska. Instances: 2. Accuracy: 0.0000
{"precision":75.0,"recall":75.0,"f1":75.0}
Trigkey: swept through. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Specialized Equipment Program. Instances: 2. Accuracy: 0.5000
{"precision":100.

{"precision":100.0,"recall":77.77777777777779,"f1":87.50000000000001}
Trigkey: Iowa flood relief. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Little Patuxtent in Laurel. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: state electricity board officer. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: colony in. Instances: 6. Accuracy: 0.8333
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood devastation. Instances: 4. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: families living in. Instances: 2. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: public. Instances: 3. Accuracy: 0.3333
{"precision":100.0,"recall":57.14285714285714,"f1":72.72727272727272}
Trigkey: Spencer. Instances: 11. Accuracy: 0.4545
{"precision":66.66666666666666,"recall":61.53846153846154,"f1":64.0}
Trigkey: brought flash flooding to Ellicott City. Inst

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: rice production in. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: doctors. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: attack. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: In flood-hit. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Maryland state. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: areas near. Instances: 2. Accuracy: 0.5000
{"precision":66.66666666666666,"recall":100.0,"f1":80.0}
Trigkey: amid. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":40.0,"f1":57.142857142857146}
Trigkey: Correctional Youth Institute. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":25.0,"f1":40.0}
Trigkey: above. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: American Red Cross. Instances: 5. Accuracy: 1.0000
{"preci

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Govt of. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flash flooding rips through central. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":33.33333333333333,"f1":40.0}
Trigkey: SDRF of. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: areas across. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: declared in. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: around. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Companies in. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: take place in. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: deployment to. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: downtown. Instances: 1. Accuracy: 0.0000
{"precision

#### Floods test

In [93]:
eval_per_trigkey(results_floods)

Trigkey: Residents of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: in flood affected areas across. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: has extended. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: parishes. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: should be delivered at. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Flash floods ripped through Ellicott City. Instances: 1. Accuracy: 0.0000
{"precision":75.0,"recall":75.0,"f1":75.0}
Trigkey: in Panchayat Community Hall. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: waters have receded in. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flooding across Nebraska. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: flash floods in Ellico

{"precision":100.0,"recall":80.0,"f1":88.88888888888889}
Trigkey: flood zone. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: North Dakota headed for. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":100.0,"f1":66.66666666666667}
Trigkey: Came frm. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: roads in Eastern. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: submerges. Instances: 2. Accuracy: 0.5000
{"precision":80.0,"recall":100.0,"f1":88.88888888888889}
Trigkey: are missing in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: trauma relief camps. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Health Minister. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: scouring. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":80.0,"f1":88.8

#### Cyclone

In [94]:
eval_per_trigkey(results_cyclone)

Trigkey: rain forecast. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: rose in. Instances: 5. Accuracy: 0.6000
{"precision":100.0,"recall":60.0,"f1":75.0}
Trigkey: has extended. Instances: 6. Accuracy: 0.3333
{"precision":81.81818181818183,"recall":64.28571428571429,"f1":72.0}
Trigkey: trip to. Instances: 7. Accuracy: 0.4286
{"precision":100.0,"recall":58.333333333333336,"f1":73.6842105263158}
Trigkey: hit. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: parishes. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Trigkey: in flood affected areas across. Instances: 14. Accuracy: 0.7143
{"precision":87.5,"recall":70.0,"f1":77.77777777777777}
Trigkey: proceed to. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: will be send. Instances: 4. Accuracy: 0.5000
{"precision":83.33333333333334,"recall":71.42857142857143,"f1":

{"precision":100.0,"recall":21.428571428571427,"f1":35.29411764705882}
Trigkey: in southeast. Instances: 18. Accuracy: 0.6667
{"precision":80.0,"recall":66.66666666666666,"f1":72.72727272727272}
Trigkey: delivering. Instances: 23. Accuracy: 0.5652
{"precision":80.0,"recall":23.52941176470588,"f1":36.36363636363636}
Trigkey: from Northwest. Instances: 6. Accuracy: 0.6667
{"precision":100.0,"recall":55.55555555555556,"f1":71.42857142857143}
Trigkey: off to. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: despatched to. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: home town in. Instances: 15. Accuracy: 0.6667
{"precision":100.0,"recall":61.53846153846154,"f1":76.19047619047619}
Trigkey: tour. Instances: 26. Accuracy: 0.4231
{"precision":83.33333333333334,"recall":60.97560975609756,"f1":70.42253521126761}
Trigkey: Malayali Association. Instances: 7. Accuracy: 0.7143
{"precision":66.66666666666666,"recall":66.666666666

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: post. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":25.0,"f1":33.333333333333336}
Trigkey: Center. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":71.42857142857143,"f1":83.33333333333333}
Trigkey: stuck at. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: grapples to recover. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: volunteers. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Congress Office. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Feeding. Instances: 4. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Forecast. Instances: 2. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: foreign aid. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: FLOODING devastates Ellicott City. Instances: 3. Accur

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: scouring. Instances: 1. Accuracy: 0.0000
{"precision":66.66666666666666,"recall":100.0,"f1":80.0}
Trigkey: Locals in. Instances: 3. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Nebraska/Southwest. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: American Red Cross of Nebraska. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: swept through. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Specialized Equipment Program. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: school communities. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Ministry officials. Instances: 3. Accuracy: 0.3333
{"precision":100.0,"recall":60.0,"f1":75.0}
Trigkey: in Nebraska and. Instances: 5. Accuracy: 0.8000
{"precision":100.0,"recall":87.5

{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: country of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: donation to. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Nettor stores. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: President of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: In Maryland. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":66.66666666666666,"f1":80.0}
Trigkey: bridge in. Instances: 3. Accuracy: 0.6667
{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Trigkey: Iowa flood relief. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Little Patuxtent in Laurel. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: state electricity board officer. Instances: 2. Accuracy: 0.0000
{"precision":100.0,"recall":50

#### Hurricane

In [95]:
eval_per_trigkey(results_hurricane)

Trigkey: Department of Health and Human Services. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: rose in. Instances: 17. Accuracy: 0.4706
{"precision":87.5,"recall":41.17647058823529,"f1":55.99999999999999}
Trigkey: livestock across. Instances: 5. Accuracy: 0.4000
{"precision":66.66666666666666,"recall":40.0,"f1":49.99999999999999}
Trigkey: flooding across Nebraska. Instances: 2. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: started for. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: parishes. Instances: 2. Accuracy: 0.0000
{"precision":50.0,"recall":33.33333333333333,"f1":40.0}
Trigkey: in flood affected areas across. Instances: 15. Accuracy: 0.6667
{"precision":75.0,"recall":33.33333333333333,"f1":46.153846153846146}
Trigkey: eastern. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Highway 00 near Bellevue. Instances: 4. Accuracy: 0.7500
{"precision":100.0

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: military ship. Instances: 7. Accuracy: 0.2857
{"precision":100.0,"recall":25.0,"f1":40.0}
Trigkey: storm victims. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: from trivandrum. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: NE cut off from. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Malayali Association. Instances: 9. Accuracy: 0.3333
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: delivering. Instances: 13. Accuracy: 0.8462
{"precision":50.0,"recall":33.33333333333333,"f1":40.0}
Trigkey: facing. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: at EDAYARANMULA. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: in southeast. Instances: 4. Accuracy: 0.5000
{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: home town in. Instances: 4. Accuracy: 0.2500
{"pre

{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: The University of. Instances: 1. Accuracy: 0.0000
{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Trigkey: 0000+relief camps. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":33.33333333333333,"f1":50.0}
Trigkey: flood stricken. Instances: 2. Accuracy: 0.0000
{"precision":66.66666666666666,"recall":50.0,"f1":57.14285714285714}
Trigkey: post. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Ambassador to. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Ratmalana. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: western Charles counties in. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: disaster of. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Rains. Instances: 2. Accuracy: 0.5000
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey

{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Trigkey: Relif Camps. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flood zone. Instances: 12. Accuracy: 0.2500
{"precision":71.42857142857143,"recall":31.25,"f1":43.47826086956522}
Trigkey: ’ s agricultural sector. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Tyndall. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: rescue. Instances: 3. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: Washington counties in. Instances: 2. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: ’ s floodwaters. Instances: 2. Accuracy: 0.5000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: From Lincoln. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Is Coming To. Instances: 4. Accuracy: 0.2500
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: DONATES. Instances

{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: people in flood-hit. Instances: 4. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Norfolk. Instances: 3. Accuracy: 0.3333
{"precision":100.0,"recall":50.0,"f1":66.66666666666667}
Trigkey: Fire Service. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: its Main Street. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: police. Instances: 1. Accuracy: 1.0000
{"precision":100.0,"recall":100.0,"f1":100.0}
Trigkey: west of Baltimore. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Base near. Instances: 1. Accuracy: 0.0000
{"precision":50.0,"recall":50.0,"f1":50.0}
Trigkey: Meet me in. Instances: 1. Accuracy: 1.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: Govt of. Instances: 1. Accuracy: 0.0000
{"precision":0.0,"recall":0.0,"f1":0.0}
Trigkey: flash flooding rips through central. Instances: 2. Accuracy: 0.0000
{"pre

---
### Performance per distance interval

#### Floods test

In [96]:
eval_per_dist_interval(results_floods, 0.1)

Interval: 0.00-0.10. Instances: 47. Accuracy: 0.8085
{"precision":87.71929824561403,"recall":81.9672131147541,"f1":84.7457627118644}
Interval: 0.10-0.20. Instances: 14. Accuracy: 0.9286
{"precision":93.75,"recall":100.0,"f1":96.7741935483871}
Interval: 0.20-0.30. Instances: 18. Accuracy: 0.6111
{"precision":88.46153846153845,"recall":60.526315789473685,"f1":71.875}
Interval: 0.30-0.40. Instances: 16. Accuracy: 0.6875
{"precision":86.95652173913044,"recall":86.95652173913044,"f1":86.95652173913044}
Interval: 0.40-0.50. Instances: 17. Accuracy: 0.5882
{"precision":88.57142857142857,"recall":88.57142857142857,"f1":88.57142857142857}
Interval: 0.50-0.60. Instances: 17. Accuracy: 0.8235
{"precision":100.0,"recall":75.0,"f1":85.71428571428571}
Interval: 0.60-0.70. Instances: 8. Accuracy: 0.7500
{"precision":100.0,"recall":85.0,"f1":91.89189189189189}
Interval: 0.70-0.80. Instances: 8. Accuracy: 0.6250
{"precision":75.0,"recall":100.0,"f1":85.71428571428571}
Interval: 0.80-0.90. Instances: 6.

#### Cyclone

In [97]:
eval_per_dist_interval(results_cyclone, 0.1)

Interval: 0.00-0.10. Instances: 204. Accuracy: 0.6029
{"precision":85.86387434554975,"recall":64.31372549019608,"f1":73.54260089686099}
Interval: 0.10-0.20. Instances: 161. Accuracy: 0.5404
{"precision":89.47368421052632,"recall":52.04081632653062,"f1":65.80645161290323}
Interval: 0.20-0.30. Instances: 135. Accuracy: 0.5481
{"precision":81.35593220338984,"recall":56.14035087719298,"f1":66.43598615916954}
Interval: 0.30-0.40. Instances: 124. Accuracy: 0.4758
{"precision":83.9080459770115,"recall":45.3416149068323,"f1":58.87096774193549}
Interval: 0.40-0.50. Instances: 101. Accuracy: 0.4455
{"precision":86.04651162790698,"recall":52.112676056338024,"f1":64.91228070175437}
Interval: 0.50-0.60. Instances: 96. Accuracy: 0.5417
{"precision":92.38095238095238,"recall":59.14634146341463,"f1":72.11895910780667}
Interval: 0.60-0.70. Instances: 60. Accuracy: 0.6500
{"precision":86.27450980392157,"recall":67.6923076923077,"f1":75.86206896551724}
Interval: 0.70-0.80. Instances: 49. Accuracy: 0.4490

#### Hurricane

In [98]:
eval_per_dist_interval(results_hurricane, 0.1)

Interval: 0.00-0.10. Instances: 195. Accuracy: 0.6410
{"precision":82.72727272727273,"recall":52.0,"f1":63.859649122807014}
Interval: 0.10-0.20. Instances: 101. Accuracy: 0.5248
{"precision":68.88888888888889,"recall":34.44444444444444,"f1":45.925925925925924}
Interval: 0.20-0.30. Instances: 129. Accuracy: 0.5814
{"precision":73.33333333333333,"recall":30.555555555555557,"f1":43.13725490196079}
Interval: 0.30-0.40. Instances: 104. Accuracy: 0.4904
{"precision":85.71428571428571,"recall":44.776119402985074,"f1":58.82352941176471}
Interval: 0.40-0.50. Instances: 117. Accuracy: 0.5556
{"precision":80.28169014084507,"recall":45.96774193548387,"f1":58.46153846153847}
Interval: 0.50-0.60. Instances: 109. Accuracy: 0.5596
{"precision":85.18518518518519,"recall":46.93877551020408,"f1":60.52631578947369}
Interval: 0.60-0.70. Instances: 86. Accuracy: 0.5233
{"precision":84.44444444444444,"recall":35.51401869158878,"f1":49.99999999999999}
Interval: 0.70-0.80. Instances: 62. Accuracy: 0.5968
{"pre

---
### Student's t-Test

In [99]:
dists_floods = preds_trigger_floods["distance_preds"]
dists_cyclone = preds_trigger_cyclone["distance_preds"]
dists_hurricane = preds_trigger_hurricane["distance_preds"]

In [100]:
stats.ttest_ind(dists_floods, dists_cyclone)

Ttest_indResult(statistic=-0.5634246043257966, pvalue=0.5732512255849258)

In [101]:
stats.ttest_ind(dists_floods, dists_hurricane)

Ttest_indResult(statistic=-2.282989523101936, pvalue=0.022605519306141696)

In [102]:
stats.ttest_ind(dists_cyclone, dists_hurricane)

Ttest_indResult(statistic=-3.415155477750395, pvalue=0.0006497292330789175)

---
### Trigger keys and tendency to predict no NEs

#### All test datasets

In [103]:
entityless_predictions_per_trigkey(results_floods+results_cyclone+results_hurricane)

{'Residents of': {'total': 1, 'pred': 0, 'true': 0},
 'in flood affected areas across': {'total': 30, 'pred': 21, 'true': 14},
 'has extended': {'total': 8, 'pred': 1, 'true': 0},
 'parishes': {'total': 6, 'pred': 2, 'true': 3},
 'should be delivered at': {'total': 1, 'pred': 0, 'true': 1},
 'Flash floods ripped through Ellicott City': {'total': 1,
  'pred': 0,
  'true': 0},
 'in Panchayat Community Hall': {'total': 1, 'pred': 0, 'true': 0},
 'waters have receded in': {'total': 5, 'pred': 3, 'true': 3},
 'flooding across Nebraska': {'total': 4, 'pred': 0, 'true': 0},
 'flash floods in Ellicott City': {'total': 4, 'pred': 1, 'true': 1},
 'family in': {'total': 1, 'pred': 0, 'true': 0},
 '’ s coffee output': {'total': 4, 'pred': 2, 'true': 0},
 'shops': {'total': 1, 'pred': 0, 'true': 0},
 'journalist': {'total': 1, 'pred': 1, 'true': 1},
 'across Central': {'total': 3, 'pred': 0, 'true': 0},
 'HADR operations in': {'total': 1, 'pred': 0, 'true': 0},
 'Residents': {'total': 4, 'pred': 1,

#### Floods test

In [104]:
entityless_predictions_per_trigkey(results_floods)

{'Residents of': {'total': 1, 'pred': 0, 'true': 0},
 'in flood affected areas across': {'total': 1, 'pred': 0, 'true': 0},
 'has extended': {'total': 2, 'pred': 0, 'true': 0},
 'parishes': {'total': 2, 'pred': 2, 'true': 2},
 'should be delivered at': {'total': 1, 'pred': 0, 'true': 1},
 'Flash floods ripped through Ellicott City': {'total': 1,
  'pred': 0,
  'true': 0},
 'in Panchayat Community Hall': {'total': 1, 'pred': 0, 'true': 0},
 'waters have receded in': {'total': 2, 'pred': 2, 'true': 2},
 'flooding across Nebraska': {'total': 1, 'pred': 0, 'true': 0},
 'flash floods in Ellicott City': {'total': 1, 'pred': 0, 'true': 0},
 'family in': {'total': 1, 'pred': 0, 'true': 0},
 '’ s coffee output': {'total': 2, 'pred': 0, 'true': 0},
 'shops': {'total': 1, 'pred': 0, 'true': 0},
 'journalist': {'total': 1, 'pred': 1, 'true': 1},
 'across Central': {'total': 1, 'pred': 0, 'true': 0},
 'HADR operations in': {'total': 1, 'pred': 0, 'true': 0},
 'Residents': {'total': 1, 'pred': 0, 't

#### Cyclone

In [105]:
entityless_predictions_per_trigkey(results_cyclone)

{'rain forecast': {'total': 2, 'pred': 1, 'true': 0},
 'rose in': {'total': 5, 'pred': 3, 'true': 2},
 'has extended': {'total': 6, 'pred': 1, 'true': 0},
 'trip to': {'total': 7, 'pred': 3, 'true': 1},
 'hit': {'total': 1, 'pred': 0, 'true': 0},
 'parishes': {'total': 2, 'pred': 0, 'true': 0},
 'in flood affected areas across': {'total': 14, 'pred': 9, 'true': 6},
 'proceed to': {'total': 3, 'pred': 1, 'true': 1},
 'will be send': {'total': 4, 'pred': 0, 'true': 0},
 'flooding across Nebraska': {'total': 1, 'pred': 0, 'true': 0},
 'citizens in': {'total': 4, 'pred': 2, 'true': 1},
 'waters have receded in': {'total': 2, 'pred': 0, 'true': 0},
 'flash floods swept through': {'total': 1, 'pred': 0, 'true': 0},
 'help for': {'total': 1, 'pred': 0, 'true': 0},
 'street poles on': {'total': 1, 'pred': 0, 'true': 0},
 'here in Nebraska': {'total': 10, 'pred': 4, 'true': 2},
 'landed at': {'total': 4, 'pred': 0, 'true': 0},
 'Flood Relief Program': {'total': 11, 'pred': 3, 'true': 3},
 'NE S

#### Hurricane

In [106]:
entityless_predictions_per_trigkey(results_hurricane)

{'Department of Health and Human Services': {'total': 1, 'pred': 1, 'true': 0},
 'rose in': {'total': 17, 'pred': 10, 'true': 6},
 'livestock across': {'total': 5, 'pred': 3, 'true': 1},
 'flooding across Nebraska': {'total': 2, 'pred': 0, 'true': 0},
 'started for': {'total': 2, 'pred': 2, 'true': 2},
 'parishes': {'total': 2, 'pred': 0, 'true': 1},
 'in flood affected areas across': {'total': 15, 'pred': 12, 'true': 8},
 'eastern': {'total': 1, 'pred': 0, 'true': 0},
 'Highway 00 near Bellevue': {'total': 4, 'pred': 3, 'true': 3},
 'trip to': {'total': 1, 'pred': 1, 'true': 1},
 'Homes': {'total': 3, 'pred': 1, 'true': 1},
 'Farm Hotline': {'total': 5, 'pred': 2, 'true': 0},
 'haul to': {'total': 4, 'pred': 3, 'true': 3},
 'foreign aid': {'total': 7, 'pred': 2, 'true': 1},
 'Bank on': {'total': 3, 'pred': 2, 'true': 0},
 'proceed to': {'total': 3, 'pred': 1, 'true': 0},
 'flooding event hit': {'total': 1, 'pred': 0, 'true': 0},
 'flooding of': {'total': 5, 'pred': 3, 'true': 2},
 'gr

---
### Difference between trigger entities and trigger keys

In [107]:
trigger_entity_strings = [t[0] for t in unique_trigger_entities]
print(len(trigger_entity_strings))
print(trigger_entity_strings[:3])

1909
['in', 'Nebraska', 'Iowa']


In [108]:
trigger_keys = ['flood affected regions', 'Pathanamthita', 'Aid', 'Falls City', 'in Hays', 'Frederick County in', 'Southern', 'washed out', 'kitchen', 'another country', 'flooding in eastern', 'between Iowa', 'Church', 'flood-hit areas', 'Ladies of', 'donate to', 'operations in', 'give aid to flood-hit', 'south and north', 'around', 'in Lynch', 'TRAIN BETWEEN', 'it recovers from', 'in Cochin', 'Metro in', 'People from South India', 'Pakistan', 'Maryland flooding', 'public', 'raised in', 'landslide relief', 'state of emergency for', 'flood relief campaign', 'its flood', 'Hindon to', 'Spent', 'residents of Pierce', 'rain-soaked', 'deployment to', 'flash flood emergency has been issued for', 'lives', 'governor of', 'east of', 'city where roads', 'States', 'stations', 'at EDAYARANMULA', 'Kuthiyathodu', 'nations', 'ESUs', 'WJZ', 'eastern part of', 'flood relief account', 'Farmers Union Foundation', 'in relief camps across', 'its surroundings', 'Near malakara temple aranmula', 'Train to', 'flash flooding hits', 'flash floods swept through', 'will be send', 'waters in', 'CMs Flood relief fund', 'convoy from', 'Terriers', 'Honerable District Collector', 'for parts of', 'flood relief material', 'donating', 'is on site in', 'delivering', 'plants in', 'move over', 'remote parts', 'Nagarkovil', 'sets up', 'bank in', 'areas across southern', 'Richardson Counties', 'United Way', 'floods in central', 'to Aranmula in', 'storm', 'Calicut', 'in pongalakariyil colony of', 'its Main Street', 'Pumphouse at', 'University of', 'bases in Nebraska', 'relief for', 'flash flooding ripped through', 'Flood-Hit', 'required at Rajiv Gandhi stadium', 'agencies', 'Government of', 'farm acres', 'FLOODING IN MARYLAND', 'girl', 'in Boyd County', 'Residents', 'State Patrol', 'attack where', 'State Disaster Management Authority', 'refugees in', 'Bihar', 'populations', 'Pathanamthitta district', 'From', 'Corp', 'Ambassador to', 'Lower 00 In Nebraska', 'are stationed in', 'Farm', 'Gov .', 'Breweries throughout', 'Washington counties in', 'Nebraska City to', 'Madapura in', 'Football Team', 'forest', 'flood zone', 'city hit by', 'Took', 'rescue efforts across', 'Pala to', 'will be going to', 'Force Camp', 'haul to', 'Air', 'CM relief fund', 'Medical Center', 'Spencer', 'Erie', 'flood-ravaged Valley', 'taken', 'airlifted to', 'here in Nebraska', 'mishap', 'despatched to', 'collect', 'country of', 'city neighborhood', 'store in Hays', 'pradesh mahila congress', 'Red Cross of Nebraska', 'floodwaters in Maryland ’ s', 'govt announces', 'reported in', 'from trivandrum', 'room', 'Medicine Flood Relief Donation Management Center', 'lottery', 'flood relief fund for', 'Ratmalana', 'roads in Eastern', 'Town Hit by', 'flash flood emergency has been issued for Ellicott City in', 'river in Nebraska', 'distributed at', 'floods in rural', 'stuck at', 'in Fremont', 'landslides in State Of Kerala of', 'HC', 'flooding from Ellicott City', 'grapples to recover', 'landslides to', 'Flooding Destroys', 'coming in from', 'rails', 'visited', 'tributaries into', 'cattle', 'is committed to support', 'in SW', 'flash flood emergency has been issued for Ellicott City in Howard County', 'Serves', 'relief aid from', 'flood relief camps', 'its downtown', 'Flooding submerged parts of', 'state of emergency', 'Little Patuxtent in Laurel', 'Tekamah south to', 'set out from', 'Marylands', 'village of Idukki district', '’ s Floods', 'Hotel Livia Tower', 'needed.Drop', 'Attorney General', 'Flood-Affected', 'Flood Relief Operations', 'Red Cross of', 'Tamil Nadu', 'Nationals in', 'headed from', 'in Ellicot City', 'doctors', 'work in', 'distributed at UC College', 'was discovered in', 'land in', 'flood-affected areas of', 'camps across flood-affected', 'in kochi', 'omaha', 'hit by', 'displaced', 'is flood hit', 'flood hits Ellicott City', 'cm', 'areas across', 'area surroundings of', 'downtown', 'near Alangad', 'Region', 'push toward', 'Times of', 'American Red Cross', 'Congress Office', 'Gujarat', 'NE cut off from', 'Captain & crew of', 'Haripad', 'smashed into Bangladesh', 'Kumbalam', 'Malayali Association', 'Chenganoor', 'flash floods rage through Ellicott City', 'drinking water', 'Indoor Stadium', 'rescue camps', 'arrived', 'Group in', 'house is in', 'AOL', 'nebraska', 'Collection centres in Chennai', 'taking place in', 'Across parts of', 'Aranmula', 'parishes', 'flood hits', 'main street in Ellicott City', 'rushed through downtown Ellicott City', 'BRANCH', 'Catonsville', 'who', 'Car street', 'donation to', 'city is ravaged by flood waters', 'Offutt in', 'outside Baltimore', 'branch', 'will leave for', 'missing', 'FL', 'Dam', 'Volunteers', 'SriLankas', 'portions of', 'landed at', 'in Florida', 'LasVegas', 'discovered along', 'from Northwest', 'competitors', 'of India', 'in Chalakudy', 'Flash Flood in Elliot City', 'NE Office', 'site', 'along', 'Air Force', 'camps across', 'bases in', 'flooding swept through Ellicott City', 'flood relief in Kerala', 'Diocese of', 'Sri .', 'citizens in', 'Apt', 'Shelter in', 'flood-ravaged', 'area of', 'flood-hit state', 'heading to flood-affected', 'students from', 'in Preston', '’ s Lobby Day For Planned Parenthood', 'downtown of', 'donation', 'FLOOD RELIEF', 'group', 'Its', 'Guard', 'missing amid', 'was found', 'CMs', 'above', 'Bhubaneswar', 'between Texas', 'flooding throughout central', 'Visited', 'farmer', 'move', 'HADR operations in', 'post', 'located on HWY 00', 'farmlands', 'Forecast', 'chapter', 'Flooding Slams', 'rescue work at', 'shipping', 'provide help in', 'Chief Ministers', 'remote', 'University', 'Shopping Center', 'Army National Guard', 'police', 'radio', '’ s radio', 'it was hit with', 'northwest', 'District Committee', 'surged through', 'flood rescue in', 'under water', 'heading', 'Center', 'issued for', 'Locals in', 'disappeared amid', 'devastated by', 'citizen of', 'other state', 'Flooding Ravages', 'Strong Red Ale', 'Niobrara', 'have sent', 'landslides hit', 'Woman', 'KODAGU', 'tornado drill day for Iowa', 'are stuck at', 'flash floods devastated Maryland ’ s', 'have been evacuated', 'faces', 'flooding from', 'AmeriCorps', 'smashed into', 'communities around', 'from Laurel', 'donations for', 'agricultural relief efforts', 'evacuated in', 'FLOODING devastates Ellicott City', 'Universitys', 'history', 'provide relief to', 'scouring', 'populace', 'Karnataka flood', 'my mother Indias land', 'Flooding In', 'suburb', 'was recovered in', 'Welcome', 'strikes Bangladesh', 'Preservation', 'lanes on US-00 at Kenilworth Avenue', 'Diego-Based', 'Flood Relief Program', 'allapuzha', 'Thrissur district', 'carrying', 'coverage', 'Companies in', 'serving in', 'Veterans', 'centres', 'KANSAS', 'flash flooding devastate Ellicott City', 'collected in', 'transport to', 'friends', 'flood event', 'Gujarat floods', 'Fireforce', 'Nebraska flooding victims', 'flood Relief', 'brought', 'rice production in', 'in flood affected Palakkads', '0000+relief camps', 'on-ground', 'Salvation Army', 'Mangalam Dam village', 'home state of', 'Farmers Union', '’ s districts', 'one', 'state electricity board officer', 'Strike', 'in Marylands', 'Town', 'in Panchayat Community Hall', 'tv stations', 'from Lillington', 'in MT School', 'will donate', 'flood-affected', 'Kerala Samajam', 'PARTS OF SOUTHERN NEBRASKA', 'Auto Center Stewartville', 'flood-hit state of', 'juvenile treatment center', 'finance minister of', 'at Thiruvalla', 'Floods affected people', 'Kodagu Flood Relief', 'Kunnamkulam', 'Air Force Base in Florida', 'MLAs', 'North Dakota headed for', 'frm whr', 'People from South', 'parks', 'women in', 'Flash flood smashes into Ellicott City', 'Karnatakas flood-hit', 'its cattle', 'it ’ s', 'CNTRL/E', 'Nebraska workers', 'Sappers', 'health camps at', 'mill town', 'flooding of', 'The University of', 'families in flood hit', 'SouthEast', 'in effect for portions of', 'from where', '’ s southern state', 'landslides in State Of', 'water in Cheverly', 'made in', 'RELIEF', 'disasters', 'flash flood slammed into', 'Base near', 'floating down', 'has extended', 'heading from', 'Panchayat office', 'Govt of', 'situation in', 'chengannur', 'rain pounding', 'journalist', 'leaving from', 'from Karunagapally', 'NGO in', 'dispatching', 'Controlled Burn Season', 'travelled from North to South of', '’ s agricultural sector', 'taking place in Yatagampitiya', 'Marathahalli', 'Fremont residents', 'nearby Camps', 'amid', 'Animal Shelter', 'Catch us', 'Ellicott City flooding in', 'Flash Floods Rip Through Ellicott City', 'truck driver', 'flash flooding sweeps through Ellicott City', 'town of', 'reliefcamps', 'folks of', 'Air Base in Nebraska', 'soldiers of', 'Oil Marketing', 'Distress Relief Fund Of', 'Air Force Base in', 'Kodagu flood victims', 'Flood Relief Trip', 'Scouts', 'villages across', 'Flash Flood Tears Through', 'rural', 'flooding devastates Ellicott City', 'FREMONT', 'Farm Hotline', 'in India ’ s southern state of', 'area Tharangam rescue centre', 'nurse', 'Palakkad', 'Hospice and Palliative', 'Tirupati Railway Station', 'flood crisis in', 'Dakotas', 'Base outside of Omaha', 'praying for', 'Flash floods struck', 'located', 'Odisha to', 'Is Coming To', 'Bridges washed away in', 'days in', 'in flood-devastated', 'north of Preston', 'flash flooding has devastated', 'Headed to', 'volunteers of', 'Valiyakadu village', 'rainfalls in Texas', 'in parts of', 'INTO', 'govt . hospitals', 'flooding devastates', 'forecasts', 'DELAWARE', 'ChildFund', 'be sent to Ernakulam', 'Panhandle Research Center', 'community kitchen in', 'victim.all the way from', 'in Nebraska and', 'concert', 'mission in', 'here', 'have been sent', 'flash floods swept through Ellicott City', 'in Pitabaddra', 'from Muthoot hospital in', 'departed to', 'in Thrissur district', 'heroes of', 'towards flood affected', 'U of', 'rescue camps near by', 'school', 'bridge in', 'mosque housing', 'extends assistance', 'Community', 'rehabilitation', 'Officers of IOCL', 'floods in Ellicott City', 'arranged by NPOL', 'distribution in', 'authorities',
                'flash flooding rips through central', 'Post', 'Health Minister', 'organization in', 'has released', 'Govts', 'in Northeast', 'rains across', 'Floods/disaster', 'operations from', 'wells', 'hands over', 'families near', 'relief activities in', 'western Charles counties in', 'trails', 'flash floods in Ellicott City', 'searching', 'appreciates', 'communities from', 'camp in', 'started for', 'Stewartville Auto Center Stewartville', 'Flood Damage', '’ s response', 'Citizens', 'Secretary of Agriculture', 'reaches flood-hit', 'FLOOD WATERS in', 'have been sent from', 'TRAIN BETWEEN BHUBANESWAR', 'different districts of', 'Flood Warning in west', 'Pallipad village of', 'businesses in', 'canvass', 'conference in', 'Nebraska to', 'people for', 'carried', 'communities across', 'Bhubaneswar via', 'is from', 'in northern', 'reports from', 'Collection centres in', 'flooding relief', 'Hauling', 'Mumbai to', 'in Vypeen', 'street poles on', 'President of', 'origin in', 'parts in', 'Came frm', 'Colorado', 'county seat of Howard County', 'people from', 'has been asked', 'signs', 'Pampampallam in', 'work going on in', 'trauma relief camps', 'in flood affected areas across', 'BOONE', 'home town in', 'fishermen', 'North Central', 'peeps', 'In flood-ravaged', 'nuclear power plant', 'NU has', 'Mobile homes', 'flooding in areas of', 'flash flooding sweeps through', 'Water level has receded in Pandalam', 'flood affected area', 'disaster relief', 'floods hitting', 'family in', 'Hit By', 'financial aid to', 'house in', 'show', 'Bar Exchange', 'medical relief camp', 'Andaman', 'Samajam', 'dist .', 'flood waters covered', 'overtook', 'Lakeview FFA Chapter in Columbus', 'happening in', 'Operations in Flood Stricken', 'aid from', '’ s Ambassador', 'to flood affected', 'rain storm', 'From Lincoln', 'headed out to', 'representatives from', 'Bank on', 'citizens of', 'Efforts in', 'flood affected Pandalam', 'Auburn', 'Association', 'Kottayam district of', 'Resident commissioner of', 'reservation', 'northern', 'Double up Food Bucks program', 'tribal colony', 'sell in', 'was glowing', 'Laurel', 'floods rip through Ellicott City', 'Indiana', 'stayed in', 'flash flooding devastates Ellicott City', 'traveling in and', 'flood victim', 'aroiund', 'is devastated by', 'Kushalnagar in', 'Baltimore suburb', 'Fire Service', 'Bureau', 'flood victims from', 'neighborhood', 'SDM Offices in', 'Amb', 'rivers of', 'Vegas', 'goods in', 'SDRF of', 'floodwaters surge through', 'in flood affected regions of', 'Synod Disaster relief', 'state of emergency was', 'E-CNTRL', 'rose in', 'flash flooding devastate', 'Gurgaon', 'USA', 'are stuck at Paravoor', 'Little Patuxtent in', 'Food Bank', 'Rehabilitation', 'Dundalk', 'into flood affected areas of', 'NEBRASKA', 'county seat of', 'flood-battered', 'report from', 'facing', 'Khalsa Aid', 'rain flooded', 'flash flooding swept through', 'forestland', 'County Executive', 'other countries', 'declared in', 'Department of Health and Human Services', 'workers', 'Flash Floods Surge Through Town in', 'relief camps of', 'FLOODING devastates', 'west of Baltimore', 'heatwave', 'floods across', 'health camp', 'floods hit', 'home in flood-struck', 'Roads in Virginia', 'Lakeview FFA Chapter in', 'place', 'Jharkhand', 'Pathadippalam', 'flood affected people', 'Foreign Minister', 'Transco Limited', 'Homes', 'PMC', 'relief centers allover', 'rice', 'Rain', 'Flash floods ripped through', 'Iowa flooding victims', 'Tyndall', 'southwest Iowa', 'are going to', 'Build', 'blizzard in western', 'rehabilitation work in', 'take place in Iowa', 'Beatrice areas', 'gave', 'torrents devastate', 'relief work in', 'miles across', 'LINCOLN', 'rushed through downtown', 'villages in', 'Bengaluru friends', 'parishioners', 'Embassy', 'kallishery kollattu', 'Sent', 'heads for', 'place in', 'Nagarjuna', 'items from', 'coasts', 'premises', 'Colonies in', 'providing', 'evacuees from', 'quake relief', 'e-Health center', 'Refinery', 'farmland in', 'of Bihar', 'Landslide in', '’ s floodwaters', 'livestock across', 'evacuation plan', 'Lillington', 'TV stations', 'Rajiv gandhi indoor stadium', 'South', 'Kumbalam in', 'is in', 'Adamannu', 'Residents of', 'Alapuzha', 'side', 'team in', 'American Red Cross of Nebraska', 'missing from Ellicott City', 'Hit by', 'sends medical assistance', 'Peoples of', 'flooding event hit', 'Relif Camps', 'Community Action', 'hit', 'Achankovil rivers are flooded', 'Weather', 'rescue centres in flood hit', 'destroyed state', 'stand with', 'trip to', 'flood victim of Nebraska', 'is facing', 'was airlifted', 'working in', 'Valley', '’ s coffee output', 'faced', 'UAE', 'Flood Waters', 'flood stricken', 'take place in', 'Iowa residents', 'victims of flood in', 'Districts in', 'families living in', 'in Ellicott', 'Custer County', 'food truck owner', 'camp at', 'Gov of', 'hit Ellicott City', 'flood warning', 'CivilAirPatrol', 'despatched', 'landed in', 'deployed at Kalutara', 'flash flood in Ellicott City', 'came', 'strike in', 'flight from', 'flood relief work in', 'Financial Corporation', 'of southern', 'floods in Marylands', 'towns in', 'areas near', 'China', 'USA where', 'NISA', 'transported', 'CycloneMora in', 'flash floods devastated', 'trapped in flood hit', 'sites in', 'Flash Floods Rip Through', 'settled in', 'counties from across', 'rescue', 'United States', 'President of Maldives', 'state lines', 'Air National Guard', 'Pradesh Congress Committee', 'flood disaster in', 'headquarters in', 'off red alert', 'Neyyar Dam', 'Ernakulam from', 'Kollupitiya stations', 'Highway 00 near Bellevue', 'flood-affected areas', 'Western Iowa', 'Peroorkada Area Committee', 'Sri Lankan', 'Disaster Relief Fund', 'heads to', 'they', 'which ravaged', 'waters have receded in', 'its residents', 'earthquake of', 'disbursed in', 'Specialized Equipment Program', 'Flash floods', 'travel to', 'officials of', 'near villages', 'Cmp', 'Flood/Landslide', 'food relief', 'At this', 'Infrastructure of state', 'take', 'flash floods devastate', 'proceed to', 'flash flooding strikes Ellicott City', 'is undergoing', 'community in', 'church in', 'suburbs', 'visited flood-affected areas', 'homes/farms in', 'CISF Unit ASG', 'cattleman', 'Correctional Youth Institute', 'released', 'floods sink', 'Arattupuzha', 'Aluva region', 'Matara District', 'Roads in', 'other foreign', 'Karnataka coasts', 'ground', 'pray for', 'North Carolina to Omaha', 'counties in Maryland', 'Maine to', 'shops', 'In Maryland', 'distribute in flood effected area', 'surrounding', 'iowa', 'cut off from', 'gets stuck in', 'flood relief work', 'Uni', 'Nation of', 'drinking water in Iowa', 'flash floods rage through', 'missing in Ellicott City', 'State of Emergency in', 'storm victims', 'be sent to', 'Family', 'Foreign Aid Received by', 'centre at', 'peroorkada', 'potholes', 'Peoples', 'volunteers', 'chengannur kerala', 'Haryana', 'brought flash flooding to', 'province of Gujarath', 'Main', 'Chief', '’ s governor', 'in South', 'floods ravaged', 'Vellappara village of', 'to flood-ravaged', 'flood in Kerala', 'Runza', 'donate for', 'drought relief in', 'aid by', 'tornado drill day for', 'in Manchester', 'sound in', 'infrastructure in flood-hit', 'flooding rips through', 'issued for Iowa', 'Malampuzhas', 'Chengannur in', 'trip to Iowa', 'ministers', 'GECBH', 'To', 'flash flooding strikes', 'Nationals in Texas', 'contribute', 'power cuts', 'historic flood in', 'Mosques in', 'collecting', 'at Cessna Business park', 'help for', 'within', 'headed to Nebraska', 'donation site at', 'has been send off to', 'Company', 'flash floods struck', 'toll plazas in', 'across Nebraska', 'Dept . of Ag', 'reaches', 'Palakkad district', 'parts of flood affected areas in', 'in Omaha', 'Flash floods in', 'farm', 'naval base relief camp', 'crop', 'attack', 'Maldives', 'Metro Area', 'Amala', 'on way to', 'washed out Ellicott City', 'bring', 'rivers are flooded', 'rescue operations in', 'was airlifted to', 'brought flash flooding to Ellicott City', 'Sivananda', 'in Jind', 'Reservation in', 'in Lourdes matha hospital', 'Missing from', 'Israel ’ s Ambassador', 'county in', 'distribution', 'map', 'Pahiyangala', 'FM', 'Ngos in', 'Victims in', 'working in flood-stricken areas of SW', 'save', 'am from East', 'Fields', 'Hurricane Season for', 'flood devastation', 'Medical Camp', 'in flood-ravaged', 'regions', 'rain forecast', 'disaster of', 'sounds alert', 'water line', 'off to', 'SE', 'Iowa ,', 'Minister for Public Works', 'jodupala in', 'northeast', 'flooding across Nebraska', 'floods struck', 'flood hit', 'in southeast', 'hospital in Kearney', 'Deputy', 'Florida', 'Panama City', 'KERALA', 'required at', 'traveling through', 'flood hit state', 'aranmula', 'Divine Care Centre', 'team', 'flying', 'Medicine', 'Ministers', 'colony in', 'State of emergency declared in', 'Office', 'PENNSYLVANIA', 'times in', 'landscape', 'Pampa', 'flood victim of', 'victims in dnkoluwaththa in', 'medical camps in', 'island in', 'NE State', 'to Flood-Stricken', 'drinking water in', 'reservations in', 'flood affected regions of', 'highway', 'Raising Canes', 'on site in', 'girls', 'Districts', 'flood areas around', 'in Imperial', 'Folks in', 'Briarcroft Lane', 'areas surrounding', 'hospitals', 'washed out parts of', 'flooding in parts of eastern', 'Ministry officials', 'Flooding Slams Ellicott City', 'Aid from', 'Flash floods in Maryland', 'Missing in', 'community in Lincoln', 'Air Base in', 'Flood in', 'ranchers in', 'flooding in areas of Missouri', 'in Flood-Hit', 'Deseeya Seva Bharati', 'take to', 'on', 'International', 'went', 'Mangalore', 'in flood-affected areas in', 'rescue plans for', 'Welcome to', 'legislators', 'Rains', 'announces', 'GrandIsland', 'Cattlemens Disaster Relief Fund', 'leaving', 'shelter at', 'foreign aid', 'recovers from floods', 'Hyderabad', 'NE College of Technical Ag', 'head to', 'development', 'swimming pool', 'in Ashland', 'people in flood-hit', 'Ramthakur College', 'Maryland state', 'County official', 'head west', 'aerial survey of',
                'UCCollege', 'river in', 'Department of Education', 'People', 'ERNAKULAM', 'floods swept through', 'flooding impacting', 'travel from', 'hectares of paddy', 'flood waters in Ellicott City', 'fellow state', 'receeed in', 'at Bambalapitiya', 'Begumpet airport', 'Relief Commissioner', 'petrol pumps in', 'water in', 'section of', 'relief aid to', 'lived in', 'flooding throughout', 'floods affect', 'Flood-hit', 'death toll in', 'Health Department', 'our', 'activities at', 'from Western', 'FLOODING', 'indoor stadium', 'should be delivered at', 'Disaster Resource Center in', 'register', 'Near malakara temple', 'Flash Flood Tears Through Ellicott City', 'in Kerala', 'Domestic', 'donations center in', 'across Central', 'was in', 'in eastern', 'information', 'happening', 'india', 'Meet me in', 'PARTS OF SOUTHERN', 'military ship', 'finance minister', 'toll plazas at Paliekkara in', 'going to', 'Ship from', 'which has been ravaged by', 'have impacted', 'thanks', 'Water level has receded in', 'shore county', 'tour', 'districts in', 'missing persons', 'submerges', 'DONATES', 'in Ratnapura', 'Nettor stores', 'Norfolk', 'communities from Montana', 'CPIM of', 'missing from', 'levee systems in', 'south of', 'flash flooding devastates', 'in Glenwood', 'Impact', 'where churches', 'stands', 'stations in', 'flood areas around chengannur', 'surrounding area', 'Railroad', 'rainfalls in', 'Ratings and Research', 'floodwaters in', 'rip through', 'Cattlemens', 'Save', 'In flood-hit', 'in Howard County', 'school communities', 'swept through', 'western parts of', 'Tax Commissioner', 'accept financial aid', 'levee', 'CMDRF', 'its main street', 'Broadcasters', 'Manitoba to', 'out of Marshall', 'is sinking', 'Allepy', 'flooding victims in', 'to Eastern', 'Bangladesh', 'set up in', 'Haskell Ag Lab', 'Haj Committee', 'flooding around', 'are here from', 'Islamic Foundation', 'Nebraska/Southwest', 'Fishermans', 'eastern', 'leaving for', 'toll plazas at Paliekkara', 'coal workers', 'Sunrise hospital', 'flooding in Nebraska', 'Flash flooding in', 'part of', 'Extension', 'SPCA', 'aid for', 'Aryanadu province of', 'partner', 'hav 0NationPark', 'have died', 'Millaniya', 'Relief for', 'Flash floods ripped through Ellicott City', 'citizens', 'Lower 00 In', 'flood levels', '’ s Rural America Relief', 'Financial Assistance to', 'in flood-affected', 'Distribution centre', 'villages', 'Battallion of', 'area of Columbus', 'lives there', 'Bulathsinhala', 'in the district', 'headed to Fullerton', 'Home Minister of', 'cleaned in', 'needed', 'are missing in', 'called', 'Seen In', 'stood with', 'fund Relief', 'them', 'Air Force Base', 'Flash Flood Smashes Into', 'Flash flood smashes into', 'in that flood', 'Iowa flood relief', 'deployed at', 'Feeding', 'Coimbatore', 'Arattupuzha area Tharangam rescue centre', 'hit by floods', 'took place in']
print(len(trigger_keys))
print(trigger_keys[:3])

1289
['flood affected regions', 'Pathanamthita', 'Aid']


In [109]:
intersection = []
disjunct_keys = []
disjunct_entities = []
for entity in trigger_entity_strings:
    if entity in trigger_keys:
        intersection.append(entity)
    else:
        disjunct_entities.append(entity)
for trigkey in trigger_keys:
    if trigkey not in intersection:
        disjunct_keys.append(trigkey)
print(len(intersection))
print(len(disjunct_keys))
print(len(disjunct_entities))

1277
12
632


In [110]:
intersection

['conference in',
 'Air',
 'Force Camp',
 'travelled from North to South of',
 'rice production in',
 'flying',
 'visited',
 'Ministry officials',
 'in the district',
 'remote',
 'evacuees from',
 'President of Maldives',
 'President of',
 'Financial Assistance to',
 'volunteers of',
 'branch',
 'cleaned in',
 'Captain & crew of',
 'Erie',
 'Cmp',
 'thanks',
 'in Ratnapura',
 'BRANCH',
 'Group in',
 'relief aid from',
 'flood levels',
 'floods hitting',
 'landslide relief',
 'Shopping Center',
 'Kollupitiya stations',
 'at Bambalapitiya',
 'stations',
 'appreciates',
 'Flood/Landslide',
 'military ship',
 'arrived',
 'was glowing',
 'Scouts',
 'off to',
 'goods in',
 'mishap',
 'strikes Bangladesh',
 'on way to',
 'relief aid to',
 'announces',
 'women in',
 'Millaniya',
 'Matara District',
 'SriLankas',
 'deployed at',
 'Ratmalana',
 'deployed at Kalutara',
 'operations in',
 'Israel ’ s Ambassador',
 'to Flood-Stricken',
 '’ s Ambassador',
 'Ambassador to',
 'districts in',
 'authori

In [111]:
disjunct_keys

['Lower 00 In Nebraska',
 'American Red Cross',
 'Indoor Stadium',
 'located on HWY 00',
 'lanes on US-00 at Kenilworth Avenue',
 'flood Relief',
 '0000+relief camps',
 'trauma relief camps',
 'Highway 00 near Bellevue',
 'homes/farms in',
 'hav 0NationPark',
 'Lower 00 In']

In [112]:
disjunct_entities

['in',
 'Nebraska',
 'Iowa',
 'floods',
 'Maryland',
 'flooding',
 'flood victims',
 'flood',
 'flooding in',
 'to',
 'state',
 'from',
 'flood relief',
 'Flood Relief',
 'floods in',
 'people of',
 'Missouri',
 'South Dakota',
 'homes',
 'roads',
 'communities',
 'Kerala',
 'states',
 'District',
 'Wisconsin',
 'Mississippi',
 'district',
 'across',
 'farmers',
 'areas',
 'Floods',
 'residents',
 'flood-hit',
 'at',
 'Wyoming',
 'relief camps',
 'Ellicott City',
 'city',
 'Montana',
 'CM',
 'Alappuzha',
 'In',
 'community',
 'town',
 'Illinois',
 'Ernakulam',
 'bridges',
 'livestock',
 'help',
 'Kansas',
 'Midwest',
 'area',
 'districts',
 'Flood',
 'houses',
 'Flood Victims',
 'Flooding',
 'Alaska',
 'Utah',
 'Idaho',
 'North Dakota',
 'Maine',
 'Vermont',
 'Delaware',
 'Hawaii',
 'country',
 'flash flooding',
 'Farm Bureau',
 'Louisiana',
 'Minnesota',
 'flooding in Arkansas',
 'City',
 'Chengannur',
 'rain',
 'flooding in Ellicott City',
 'flash floods',
 'where',
 'people in',
 'h

---
### Is distance a measure of familiarity?

#### Test data built from trigger-annotated training data

In [113]:
test_from_training = []

for sample in train_with_triggers:
    if sample["text"] in test_from_training:
        continue
    if "T" in sample["explanation"]:
        test_from_training.append(sample["text"])

len(test_from_training)

1742

In [114]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': test_from_training,
    }
)

In [115]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.0023202390875667334,
 1.5017300844192505,
 0.22954247357127558,
 0.019115589559078217)

In [116]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_training = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    test_from_training, c, t, d
))
results_training = sorted(results_training, key=lambda k: k["dist"], reverse=True)
results_training

[{'text': 'If you want to Donate to Kerala Flood Victims then Open Paytm and select Kerala Floods from categories & Donate or pay Directly to Kerala Govt By There Official Website . : ) ❤️ @ Paytmcare @ Paytm # KeralaFloodRelief @ vijayshekhar Lets help Kerala Flood Victims',
  'pred_label': 'O O O O O O S-LOC O O O O O O O S-LOC O O O O O O O O O S-LOC O O O O O O O O O O O O O O O O O O O S-LOC O O',
  'key': 'Malayali Association',
  'dist': 1.5017300844192505},
 {'text': 'Man missing after 1,000-year flood ravages Ellicott City for 2nd time in two years Good thing Trump & his quislings are taking such good care of us - What climate change ?',
  'pred_label': 'O O O O O O B-LOC E-LOC O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'mishap',
  'dist': 1.4375314712524414},
 {'text': 'Thank you @ NTVsSteveWhite and @ NEStateFair and ALL of the farmers , ranchers , et al who have donated and volunteered to the Nebraska Flood Relief .',
  'pred_label': 'O O O O O O O O O O O O

#### Test data built from unique trigger entities

In [117]:
len(trigger_entity_strings)

1909

In [118]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': trigger_entity_strings,
    }
)

In [119]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.11451055854558945,
 1.529485821723938,
 0.7348763466919511,
 0.7353000640869141)

In [120]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_triggers = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    trigger_entity_strings, c, t, d
))
results_triggers = sorted(results_triggers, key=lambda k: k["dist"])
results_triggers

[{'text': 'Sri .',
  'pred_label': 'O O',
  'key': 'Disaster Relief Fund',
  'dist': 0.11451055854558945},
 {'text': 'In Flood-Hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.14071688055992126},
 {'text': 'In flood-hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.165828138589859},
 {'text': 'United States',
  'pred_label': 'B-LOC E-LOC',
  'key': 'Apt',
  'dist': 0.21708419919013977},
 {'text': 'flood-ravaged',
  'pred_label': 'O',
  'key': 'stands',
  'dist': 0.22010649740695953},
 {'text': 'In flood-ravaged',
  'pred_label': 'O O',
  'key': 'remote',
  'dist': 0.22204844653606415},
 {'text': 'Arattupuzha',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.22855144739151},
 {'text': 'Tekamah south to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.23225738108158112},
 {'text': 'Fireforce',
  'pred_label': 'O',
  'key': 'attack where',
  'dist': 0.23640602827072144},
 {'text': 'Marathahalli',
  'pred_label': 'S-LOC',
  'key': 'Flood-hit',
  'dist'

#### Test data built from unique trigger keys

In [121]:
len(trigger_keys)

1289

In [122]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': trigger_keys,
    }
)

In [123]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.11451063305139542,
 1.5294859409332275,
 0.7366184786285792,
 0.7387682199478149)

In [124]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_trigkeys = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    trigger_keys, c, t, d
))
results_trigkeys = sorted(results_trigkeys, key=lambda k: k["dist"])
results_trigkeys

[{'text': 'Sri .',
  'pred_label': 'O O',
  'key': 'Disaster Relief Fund',
  'dist': 0.11451063305139542},
 {'text': 'In flood-hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.1658281534910202},
 {'text': 'United States',
  'pred_label': 'B-LOC E-LOC',
  'key': 'Apt',
  'dist': 0.21708418428897858},
 {'text': 'flood-ravaged',
  'pred_label': 'O',
  'key': 'stands',
  'dist': 0.22010643780231476},
 {'text': 'In flood-ravaged',
  'pred_label': 'O O',
  'key': 'remote',
  'dist': 0.22204840183258057},
 {'text': 'Arattupuzha',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.22855152189731598},
 {'text': 'Tekamah south to',
  'pred_label': 'O O O',
  'key': 'Association',
  'dist': 0.23225738108158112},
 {'text': 'Fireforce',
  'pred_label': 'O',
  'key': 'attack where',
  'dist': 0.23640601336956024},
 {'text': 'Marathahalli',
  'pred_label': 'S-LOC',
  'key': 'Flood-hit',
  'dist': 0.2368757724761963},
 {'text': 'Catch us',
  'pred_label': 'O O',
  'key': 'Catch us',
  'di

#### Difference between predictions for trigger entities and trigger keys

In [125]:
unique_trigger_results = [result for result in results_triggers if result not in results_trigkeys]
print(len(unique_trigger_results))
unique_trigger_results

1186


[{'text': 'Sri .',
  'pred_label': 'O O',
  'key': 'Disaster Relief Fund',
  'dist': 0.11451055854558945},
 {'text': 'In Flood-Hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.14071688055992126},
 {'text': 'In flood-hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.165828138589859},
 {'text': 'United States',
  'pred_label': 'B-LOC E-LOC',
  'key': 'Apt',
  'dist': 0.21708419919013977},
 {'text': 'flood-ravaged',
  'pred_label': 'O',
  'key': 'stands',
  'dist': 0.22010649740695953},
 {'text': 'In flood-ravaged',
  'pred_label': 'O O',
  'key': 'remote',
  'dist': 0.22204844653606415},
 {'text': 'Arattupuzha',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.22855144739151},
 {'text': 'Fireforce',
  'pred_label': 'O',
  'key': 'attack where',
  'dist': 0.23640602827072144},
 {'text': 'Catch us',
  'pred_label': 'O O',
  'key': 'Catch us',
  'dist': 0.2377113699913025},
 {'text': 'peroorkada',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.2401131987571716

In [126]:
unique_trigkey_results = [result for result in results_trigkeys if result not in results_triggers]
print(len(unique_trigkey_results))
unique_trigkey_results

566


[{'text': 'Sri .',
  'pred_label': 'O O',
  'key': 'Disaster Relief Fund',
  'dist': 0.11451063305139542},
 {'text': 'In flood-hit',
  'pred_label': 'O O',
  'key': 'crop',
  'dist': 0.1658281534910202},
 {'text': 'United States',
  'pred_label': 'B-LOC E-LOC',
  'key': 'Apt',
  'dist': 0.21708418428897858},
 {'text': 'flood-ravaged',
  'pred_label': 'O',
  'key': 'stands',
  'dist': 0.22010643780231476},
 {'text': 'In flood-ravaged',
  'pred_label': 'O O',
  'key': 'remote',
  'dist': 0.22204840183258057},
 {'text': 'Arattupuzha',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.22855152189731598},
 {'text': 'Fireforce',
  'pred_label': 'O',
  'key': 'attack where',
  'dist': 0.23640601336956024},
 {'text': 'Catch us',
  'pred_label': 'O O',
  'key': 'Catch us',
  'dist': 0.23771145939826965},
 {'text': 'peroorkada',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.24011313915252686},
 {'text': 'Flood-Hit',
  'pred_label': 'O',
  'key': 'Flood-hit',
  'dist': 0.24497452378

In [127]:
differing_predictions = []
for result in unique_trigger_results:
    for comparison in unique_trigkey_results:
        if result["text"] == comparison["text"]:
            diff = {"text": result["text"], 
                    "trigger_pred": result["pred_label"], "trigkey_pred": comparison["pred_label"],
                    "trigger_key": result["key"], "trigkey_key": comparison["key"],
                    "trigger_dist": result["dist"], "trigkey_dist": comparison["dist"]
            }
            differing_predictions.append(diff)
print(len(differing_predictions))

554


In [128]:
differing_predictions

[{'text': 'Sri .',
  'trigger_pred': 'O O',
  'trigkey_pred': 'O O',
  'trigger_key': 'Disaster Relief Fund',
  'trigkey_key': 'Disaster Relief Fund',
  'trigger_dist': 0.11451055854558945,
  'trigkey_dist': 0.11451063305139542},
 {'text': 'In flood-hit',
  'trigger_pred': 'O O',
  'trigkey_pred': 'O O',
  'trigger_key': 'crop',
  'trigkey_key': 'crop',
  'trigger_dist': 0.165828138589859,
  'trigkey_dist': 0.1658281534910202},
 {'text': 'United States',
  'trigger_pred': 'B-LOC E-LOC',
  'trigkey_pred': 'B-LOC E-LOC',
  'trigger_key': 'Apt',
  'trigkey_key': 'Apt',
  'trigger_dist': 0.21708419919013977,
  'trigkey_dist': 0.21708418428897858},
 {'text': 'flood-ravaged',
  'trigger_pred': 'O',
  'trigkey_pred': 'O',
  'trigger_key': 'stands',
  'trigkey_key': 'stands',
  'trigger_dist': 0.22010649740695953,
  'trigkey_dist': 0.22010643780231476},
 {'text': 'In flood-ravaged',
  'trigger_pred': 'O O',
  'trigkey_pred': 'O O',
  'trigger_key': 'remote',
  'trigkey_key': 'remote',
  'trigg

#### Test data built from gibberish

In [129]:
gen = np.random.default_rng(seed=1337)
test_gibberish = [
    " ".join(["".join(gen.choice(
    ["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",
    "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z",
    "0","1","2","3","4","5","6","7","8","9"], 
    gen.integers(1,20))) for _ in range(gen.integers(1,40))]) for _ in range(2000)
]
test_gibberish[:3]

['tLh5Y6R2cHDMKVRej OAgO3xIZUFvYdVQBD3 BYz4c3CAy wcslEs 79yvVNcFE8Wt 8jwwIsHSNZ 9rBvYZUVm3cF MoUByOci0N4tR7Tn 6IL cPqFu27SryaDHqJE bwXfPw OOTsHI moJK7UkMfmmTZsuc goJIqurfs nYji Bq4ehRSenQBoKq 0IP8B4CYMI76 fWpjheRXRJZwP fPRigdL O9sYlv1rbclN8OlOEh 59gVd0BTdXqGT z7AJJ8Pw1GvD',
 'Br0ymF1U TOK26vdxeFQV i5ipo3jRtmsOLDS veD86DUkz1teGyDauf',
 'yFN8EtTWfuVCmbtB1Ou 8wQG8WOlFBW Cs0uGvzLkSv4JACxg EsR5y6dLH']

In [130]:
response = requests.post(
    FAST_API_URL + '/training/trigger/predict/',
    json={
        'params': params_trigger_prediction,
        'prediction_data': test_gibberish,
    }
)

In [131]:
dists = response.json()["distance_preds"]
span_and_avgs(dists)

(0.08853542804718018,
 1.669521689414978,
 0.4306697828322649,
 0.40861037373542786)

In [132]:
c = response.json()["class_preds"]
t = response.json()["trigger_preds"]
d = response.json()["distance_preds"]
results_gibberish = list(map(lambda w,x,y,z:
    {"text":w, "pred_label":x, "key":y, "dist":z},
    test_gibberish, c, t, d
))
results_gibberish = sorted(results_gibberish, key=lambda k: k["dist"])
results_gibberish

[{'text': 'A R5ggGAo22s BcefBsUYyDE eK DUwiWzZ0K G VwNmACl pYRPJ ZGHIsniOAh1RhTn1qCg ThBbKHNbSs8ovSuT b6GI7RfyA371CS3BglM WLFBPDbH4bI ubB AjYeHosdWUXSQZ9aeN6 TBF8QASV9I5oJ iXLpuLO6kTlIVysQbG y5Hk2K6SSWj ljGlhT36tHm8 Pnt7eHICYSzFDvs Q0ZXsoQFcR5aQQ NSh6wDT1a6Kwg ssW 7NLYjG Xk72 VaSFxAJ mJsidrnyHExzGO',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'from Northwest',
  'dist': 0.08853542804718018},
 {'text': '8 NrHlTW6Kf V4 hi5GYhwYXBQe4i Xx7 wNmUy 06z juSTeNNw6ptaflvikW CYKw 5S DzGSqNV xOmgIkP5 10CnW8fddMpd0DDHyf nqsiEu5jlXh 69v o8mMTVJqYRIt5q qB0IesT7s MrVyb60Y2I8OfqaVA v awybnpQnMS3yFKwp3G RuAerVPCpbnvhEJQino ULyXiQz zKxsdqOZPFaeNT24ehq Rp6iEPg1M7A6WzOHn',
  'pred_label': 'O O O O O O O O O O O O O O O O O O O O O O O O',
  'key': 'Pakistan',
  'dist': 0.16713638603687286},
 {'text': '342 egAWdsmlnHkq o6BnwrhBzmU5AKizhBY AZDzwvHZsdMoxTDPp xUip6ywcI BN8ZagYDsi9MqMc ajpk Nr77Z78FQB I1zMrgeka29cCb4eKI rub2lXj J 9VLUceEr1u qdBbgV7iia T6YhDJnFNhOo 899wx7M1vV

#### Test data built from previous datasets with prefixed/suffixed tokens

In [133]:
test_prefix_suffix = []
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix.append([f"Why {text} News" for text in tests])
for tests in [test, test_cyclone, test_hurricane, test_from_training, trigger_keys, trigger_entity_strings, test_gibberish]:
    test_prefix_suffix.append([f"Indo-Tibetan {text} KeralaFloodRelief" for text in tests])

In [134]:
responses = []
for tests in test_prefix_suffix:
    response = requests.post(
        FAST_API_URL + '/training/trigger/predict/',
        json={
            'params': params_trigger_prediction,
            'prediction_data': tests,
        }
    )
    responses.append(response)

In [135]:
for response in responses:
    dists = response.json()["distance_preds"]
    print(span_and_avgs(dists))

(0.9721304178237915, 0.9977880716323853, 0.9870209355413178, 0.9871712923049927)
(0.9767643213272095, 0.998812735080719, 0.9873224147137879, 0.9870606958866119)
(0.9754350185394287, 1.0005789995193481, 0.9871013769754325, 0.9873575866222382)
(0.9570311307907104, 1.0365588665008545, 0.9847859130182167, 0.9849372804164886)
(0.989098846912384, 1.0034908056259155, 0.9970508777790055, 0.9969708323478699)
(0.9890989065170288, 1.0034908056259155, 0.997420835082852, 0.9973666667938232)
(0.978019654750824, 1.0009570121765137, 0.986858999490738, 0.986234039068222)
(0.6619337201118469, 0.6750358939170837, 0.669220233037148, 0.6692856252193451)
(0.6469734311103821, 0.6786141991615295, 0.6708608880093562, 0.6713415384292603)
(0.6542129516601562, 0.6787496209144592, 0.6694321768476784, 0.6699688732624054)
(0.642785370349884, 0.6677659749984741, 0.6595230985659546, 0.6599998772144318)
(0.6496836543083191, 0.6620323657989502, 0.6560102918982228, 0.6560221910476685)
(0.6496836543083191, 0.6620323657989