### This notebook is used for analyzing / debugging the predictions from different trained models

In [51]:
import numpy as np
import pandas as pd
import datetime as dt
# other modules, code
from html import unescape
import unicodedata
import ast

import spacy
import srsly
from spacy import displacy
from spacy.training import docs_to_json, offsets_to_biluo_tags, biluo_tags_to_spans
from spacy.pipeline import EntityRuler
from spacy.tokens import DocBin

from spacy.training import Example
from spacy.util import minibatch, compounding
from spacy.tokens import Span
from toolz import partition_all

from spacy.scorer import Scorer

In [55]:
nlp = spacy.load('en_core_web_sm')

txt = 'Jim bought 300 shares of Acme Corp. in 2006.'

displacy.render(nlp(txt), style='ent') # Default spacy model

## COPIED CODE FROM 3_Model_Training.ipynb; FIX IT!

In [2]:
def get_list_from_docbin(filename):
    nlp = spacy.blank('en')
    nlp.add_pipe('sentencizer')
    doc_bin = DocBin().from_disk(filename)

    list_data = []
    labels = set()
    for doc in doc_bin.get_docs(nlp.vocab):
        spans = [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
        list_data.append((doc.text, {'entities': spans}))
        for ent in doc.ents:
            labels.add(ent.label_)
    return (list_data, labels)

### Function to generate metrics for single annotation example for specified model

In [3]:
def get_example_score(model, raw_text, annotations):
    # create doc object with annotations based on model used
    doc = model(raw_text)
    # create spacy example object
    example = Example.from_dict(doc, {"entities": annotations['entities']})
    # get the doc with reference annotations
    doc_ref = example.reference
    # Provided scoring pipeline
    scorer = Scorer(model)
    scores = scorer.score([example])
    #print (scores)
    return (scores, doc_ref)

### Initialize basic arguments

In [4]:
# Set seed
np.random.seed(42)
months = ['May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct']
n_start = 0
n_end = 80 #40
# number of months * number of examples from each month (n_end  - n_start); n_end excluded
model_suffix = len(months) * (n_end - n_start)
sub_group = 'train'

### Annotated files generated from Doccano & rehearsal data notebook

In [5]:
# SET this flag to False if not using SYMPTOM as part of NER model
keep_symptom_ent = False

if not keep_symptom_ent:
    model_suffix = str(model_suffix) + '_NS_v2'

# final annotated output from Doccano
train_filename = './data/json/nsamples_480_v2_2021_6m_doccano.jsonl' #'./nsamples_240_2021_6m_doccano.jsonl'
val_filename = './data/json/val_nsamples_240_doccano.jsonl'

# annotated rehearsal filenames from generate_rehearsal_data.ipynb
rehearsal_train_filename = './data/json/nlp_rehearsal_1000.json'
rehearsal_val_filename = './data/json/test_nlp_rehearsal_1000.json'

old_ptrns_fname = f'./data/patterns/old_patterns_240.csv' #f'./old_patterns_60.csv'
new_ptrns_fname = f'./data/patterns/old_patterns_{model_suffix}.csv'

### binary data sets generated from preprocessing

In [6]:
docbin_train = f'./outputs/train_n_{model_suffix}.spacy'
docbin_val = f'./outputs/val_n_240_NS.spacy' #f'./outputs/val_n_{model_suffix}.spacy'
docbin_rhrsl_train = f'./outputs/rhrsl_train_n_240_NS.spacy' #f'./outputs/rhrsl_train_n_{model_suffix}.spacy'
docbin_rhrsl_val = f'./outputs/rhrsl_val_n_240_NS.spacy' #f'./outputs/rhrsl_val_n_{model_suffix}.spacy'

epochs = 40

In [7]:
train_data, _ = get_list_from_docbin(docbin_train)
val_data, _ = get_list_from_docbin(docbin_val)

In [8]:
train_text = []
for raw_text, annotations in train_data:
    train_text.append(raw_text)

val_text = []
for raw_text, annotations in val_data:
    val_text.append(raw_text)    

### Load the trained models here

In [9]:
# load trained models
### Model 0: Blank spacy model using patterns with EntityRuler
model_0 = spacy.load(f'./models/model_0_n_{model_suffix}/')

### Model 1: pre-trained spacy model trained using training data AND revision data
model_1 = spacy.load(f'./models/model_1_n_{model_suffix}/')

# ### Model 2: pre-trained spacy model trained using training data only (1 epoch, catastrophic forgetting)
# model_2 = spacy.load(f'./models/model_2_n_{model_suffix}/')

### Model 3: Blank spacy model trained using training data only
model_3 = spacy.load(f'./models/model_3_n_{model_suffix}/')

# ### Model 4: pre-trained spacy model using patterns with EntityRuler
# model_4 = spacy.load(f'./models/model_4_n_{model_suffix}/')

### Model 5: pre-trained spacy model
model_5 = spacy.load('en_core_web_sm')

### Model 1, prev version: pre-trained spacy model trained using training data AND revision data
model_1_NS = spacy.load(f'./models/model_1_n_480_NS/')

## MODEL EVALUATION

In [10]:
colors = {'DRUG': 'PINK', 'SYMPTOM': 'LIGHTGREEN', 'DISEASE': 'LIGHTBLUE'}
options = {"ents": ["DRUG", "SYMPTOM", "DISEASE"], "colors": colors}

In [11]:
# change index and run cells below to see output from pre-trained models
idx = np.random.randint(0, len(train_data))
txt, annotations = train_data[idx]
doc = model_5(txt) #default spacy model
example = Example.from_dict(doc, {"entities": annotations['entities']})
doc_ref = example.reference

In [12]:
# displacy.render(doc_ref, style='ent', options=options)

### MODEL 1 Sample output: fine-tune spacy model trained using pseudo rehearsal

In [13]:
# txt = "At least 55 reports of hurricane-force thunderstorm wind gusts of over 75 mph were tracked across the Great Plains and Midwest, according to the National Weather Service Storm Prediction Center, setting a one-day national record."
# displacy.render(model_1(txt), style='ent')

### MODEL 3 Sample output: Blank spacy model trained from scratch

In [14]:
# displacy.render(model_3(txt), style='ent', options=options)

### MODEL 4 Sample output: Rule-based model plus pre-trained spacy model

In [15]:
# displacy.render(model_4(txt), style='ent')

### MODEL 5 Sample output: Baseline pre-trained spacy model

In [16]:
# 'Apple is looking at buying U.K. startup for $1 billion'
# Model Kate Moss walks Louis Vuitton RTW Fall 2013. Golden girl model Kate Moss wowed in her usual chic way as she walked the Louis Vuitton Ready-To-Wear Fall Collection 2013 fashion show. Moss continues to give us a close look at her ability to inspire while wearing all things gorgeous. Yes, absolutely Louis Vuitton gorgeous.'
# displacy.render(model_5(txt), style='ent')

## Debugging Model output

### Filter examples by comparing output of different models

In [17]:
# # Get only text data
# # tmp_df = pd.DataFrame(train_text, columns=['text']) #TRAIN
# tmp_df = pd.DataFrame(val_text, columns=['text']) #VALIDATION
# # Combine all entities detected using blank spacy model
# tmp_df['model1'] = tmp_df['text'].map(lambda x: ','.join([str(e) for e in model_1(x).ents]))
# # Copmbine all entities detected using blank rule-based model
# tmp_df['model0'] = tmp_df['text'].map(lambda x: ','.join([str(e) for e in model_0(x).ents]))
# # True if text has 'DRUG' entity using rule-based model
# tmp_df['model0_has_drug'] = tmp_df['text'].map(lambda x: any([True if e.label_ == 'DRUG' else False for e in model_0(x).ents]))
# # select entities where predictions are different AND rule-based model detected a DRUG entity in its text
# tmp_df1 = tmp_df.loc[(tmp_df['model0'] != tmp_df['model1']) & (tmp_df['model0_has_drug'])]

In [18]:
# Get only text data
# tmp_df = pd.DataFrame(train_text, columns=['text']) #TRAIN
tmp_df = pd.DataFrame(val_text, columns=['text']) #VALIDATION
# Combine all entities detected using blank spacy model
tmp_df['model1_NS'] = tmp_df['text'].map(lambda x: ','.join(sorted([str(e) for e in model_1_NS(x).ents])))
# Copmbine all entities detected using blank rule-based model
tmp_df['model1_NS_v2'] = tmp_df['text'].map(lambda x: ','.join(sorted([str(e) for e in model_1(x).ents])))
# True if text has 'DRUG' entity using rule-based model
tmp_df['model1_NS_drug'] = tmp_df['text'].map(lambda x: any([True if e.label_ == 'DRUG' else False for e in model_1_NS(x).ents]))
# select entities where predictions are different AND rule-based model detected a DRUG entity in its text
tmp_df1 = tmp_df.loc[(tmp_df['model1_NS'] != tmp_df['model1_NS_v2']) & (tmp_df['model1_NS_drug'])]

### Select random example from filtered dataframe and review model output
### Get evaluation metrics for above example using spacy scorer object

In [40]:
idx = np.random.randint(0, tmp_df1.shape[0])
txt = tmp_df1.iloc[idx]['text']

# reset df index
tmpd_df2 = tmp_df1.reset_index()

# get the index in the original df
idx_orig = tmpd_df2.iloc[idx]['index']

# ## ***DEBUG***
# # The following indices where model_1_480_NS predicts DRUG none present in annotations
# # 21,  35,  55,  70, 106, 107, 111, 140, 169, 183
# #Indices where model_1_480_NS predicts DRUG entities with higher recall than model_1_480_NS_v2
# # 18, 23, 27, 33, 38, 43, 44, 49, 66, 67, 76, 80, 94, 101, 139, 148, 151, 157, 
# # 162, 164, 168, 173, 174, 186, 189, 191, 194, 207, 208	
# idx_orig = 49

# raw_text, annotations = train_data[idx_orig] #TRAIN
raw_text, annotations = val_data[idx_orig] #VALIDATION
# ## ***DEBUG***
# txt = raw_text

score_m0, doc_ref = get_example_score(model_0, raw_text, annotations)
score_m1, _ = get_example_score(model_1, raw_text, annotations)
score_m1_NS, _ = get_example_score(model_1_NS, raw_text, annotations)
score_m3, _ = get_example_score(model_3, raw_text, annotations)

#### Rule-based

In [41]:
# only looking at performance for 2 entities
pd.DataFrame(score_m0['ents_per_type'])[['DRUG', 'DISEASE']].sort_index()

Unnamed: 0,DRUG,DISEASE
f,0.666667,0.0
p,1.0,0.0
r,0.5,0.0


#### Model1_NS_v2

In [42]:
# only looking at performance for 2 entities
pd.DataFrame(score_m1['ents_per_type'])[['DRUG', 'DISEASE']].sort_index()

Unnamed: 0,DRUG,DISEASE
f,1.0,1.0
p,1.0,1.0
r,1.0,1.0


#### Model1_NS

In [43]:
# only looking at performance for 2 entities
pd.DataFrame(score_m1_NS['ents_per_type'])[['DRUG', 'DISEASE']].sort_index()

Unnamed: 0,DRUG,DISEASE
f,1.0,0.5
p,1.0,0.5
r,1.0,0.5


In [44]:
# only looking at performance for 2 entities
# pd.DataFrame(score_m3['ents_per_type'])[['DRUG', 'DISEASE']].sort_index()

In [45]:
idx_orig

209

### Annotated Reference doc

In [46]:
displacy.render(doc_ref, style='ent', options=options) #REFERENCE

### Model 0: Rule-Based Model

In [47]:
displacy.render(model_0(txt), style='ent', options=options) #RULE-BASED

### Model 3: Blank ML Model

In [48]:
# displacy.render(model_3(txt), style='ent', options=options) #BLANK ML MODEL

### Model 1: pre-trained spacy model with trained with pseudo rehearsal

In [49]:
displacy.render(model_1(txt), style='ent', options=options) #BLANK ML MODEL

### Model 1 v1: pre-trained spacy model with trained with pseudo rehearsal

In [50]:
displacy.render(model_1_NS(txt), style='ent', options=options) #BLANK ML MODEL

### Spacy default model: en_core_web_sm

In [30]:
# nlp = spacy.load('en_core_web_sm')
# displacy.render(nlp(txt), style='ent') #BLANK ML MODEL

In [31]:
txt = 'bentyl'
tmp_df[tmp_df['text'].str.lower().str.contains(txt)]

Unnamed: 0,text,model1_NS,model1_NS_v2,model1_NS_drug


In [775]:
test_examples = [
    ("Trump says he's answered Mueller's Russia inquiry questions \u2013 live",{"entities":[[0,5,"PERSON"],[25,32,"PERSON"],[35,41,"GPE"]]}),
    ("Alexander Zverev reaches ATP Finals semis then reminds Lendl who is boss",{"entities":[[0,16,"PERSON"],[55,60,"PERSON"]]}),
    ("Britain's worst landlord to take nine years to pay off string of fines",{"entities":[[0,7,"GPE"]]}),
    ("Tom Watson: people's vote more likely given weakness of May's position",{"entities":[[0,10,"PERSON"],[56,59,"PERSON"]]}),
]
examples = []
for raw_text, annotations in test_examples:
    #doc = nlp.make_doc(raw_text) !Gives zeroes as output
    doc = nlp(raw_text)
    example = Example.from_dict(doc, annotations)
    examples.append(example)

scorer = Scorer(nlp)
scores = scorer.score(examples)
# print (scores)

### DEBUG: Compare model1_NS_v1, model1_NS_v2

In [1028]:
label = 'DRUG'
model = model_1
idx=[]
p=[]
r=[]
f=[]
for i, (raw_text, annotations) in enumerate(val_data):
    score, _ = get_example_score(model, raw_text, annotations)
    try:
        #print (i, score['ents_per_type'][label])
        p.append(score['ents_per_type'][label]['p'])
        r.append(score['ents_per_type'][label]['r'])
        f.append(score['ents_per_type'][label]['f'])
        idx.append(i)
    except:
        pass

In [1043]:
# score_m1_NS_df = pd.DataFrame.from_dict({'idx':idx, 'p':p, 'r':r,'f':f})
score_m1_NS_df.shape

(106, 4)

In [1044]:
# score_m1_df = pd.DataFrame.from_dict({'idx':idx, 'p':p, 'r':r,'f':f})
score_m1_df.shape

(97, 4)

In [1202]:
mrgd_df = pd.merge(score_m1_NS_df, score_m1_df, how='outer', on='idx')
mrgd_df[mrgd_df['p_y'].isnull()]
#Indices where model_1_NS predicts some DRUG entities but none are annotated in data
#21,  35,  55,  70, 106, 107, 111, 140, 169, 183#

Unnamed: 0,idx,p_x,r_x,f_x,p_y,r_y,f_y
8,21,0.0,0.0,0.0,,,
15,35,0.0,0.0,0.0,,,
23,55,0.0,0.0,0.0,,,
29,70,0.0,0.0,0.0,,,
45,106,0.0,0.0,0.0,,,
46,107,0.0,0.0,0.0,,,
48,111,0.0,0.0,0.0,,,
65,140,0.0,0.0,0.0,,,
83,169,0.0,0.0,0.0,,,
89,183,0.0,0.0,0.0,,,


In [1261]:
mrgd_df = pd.merge(score_m1_NS_df, score_m1_df, how='inner', on='idx')
mrgd_df[mrgd_df['r_x'] != mrgd_df['r_y']][['idx', 'r_x', 'r_y']]
#Indices where model_1_480_NS predicts DRUG entities with higher recall than model_1_480_NS_v2
# 18, 23, 27, 33, 38, 43, 44, 49, 66, 67, 76, 80, 94, 101, 139, 148, 151, 157, 162, 164, 168, 173, 174, 186, 189, 191, 194, 207, 208	

Unnamed: 0,idx,r_x,r_y
6,18,0.5,0.25
8,23,0.833333,0.75
10,27,1.0,0.0
13,33,1.0,0.75
15,38,0.4,0.6
16,43,0.333333,0.222222
17,44,1.0,0.0
18,49,1.0,0.666667
23,66,1.0,0.5
24,67,1.0,0.916667
