# #### Examples of ConceptNet queries

In [7]:
from concept_net_util import crawl_concept_net

res = crawl_concept_net('tsunami')
print('Tsunami: ', res)

res = crawl_concept_net('drive')
print('Drive: ', res)

res = crawl_concept_net('smoke')
print('Smoke: ', res)

Tsunami:  ['IsA calamity', 'IsA wave']
Drive:  ['HasPrerequisite get in the car', 'HasPrerequisite drive it', 'HasPrerequisite get a license', 'HasPrerequisite have a car', 'HasPrerequisite start the car', 'UsedFor storing data', 'HasSubevent pay attention', 'HasPrerequisite drive']
Smoke:  ['HasPrerequisite light the cigarette', 'HasPrerequisite buy some cigarettes', 'UsedFor smoke', 'CreatedBy fire', 'HasProperty dangerous', 'HasPrerequisite have a source of fire', 'HasPrerequisite have a cigarette']


# #### Examples of embedders

In [8]:
from transformers import BertTokenizer
#from pytorch_pretrained_bert import BertTokenizer

from mention_masking_reasoner import MentionMaskingReasoner
from knowledge_encoder import KnowledgeEncoder


tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
mme = MentionMaskingReasoner(tokenizer)
ke = KnowledgeEncoder(tokenizer)

sample = 'The patient was admitted to the hospital on the 1st of January due to a car crash.'
event1_index = (3, 3)
event2_index = (16, 16)

mask_encoded, encoded, e1, e2 = mme.mask_sentence(sample, event1_index, event2_index)
knowledge_encoded, e1, e2 = ke.encode_knowledge(sample, event1_index, event2_index)

print('Encoded: ', encoded)
print('Encoded mask: ', mask_encoded)
print('Encoded knowledge: ', knowledge_encoded)

decoded = tokenizer.convert_ids_to_tokens(encoded)
mask_decoded = tokenizer.convert_ids_to_tokens(mask_encoded)
knowledge_decoded = tokenizer.convert_ids_to_tokens(knowledge_encoded)

print('Embedded: ', decoded)
print('Embedded mask: ', mask_decoded)
print('Embedded knowledge: ', knowledge_decoded)

Encoded:  [101, 1996, 5776, 2001, 4914, 2000, 1996, 2902, 2006, 1996, 3083, 1997, 2254, 2349, 2000, 1037, 2482, 5823, 102]
Encoded mask:  [101, 1996, 5776, 2001, 103, 2000, 1996, 2902, 2006, 1996, 3083, 1997, 2254, 2349, 2000, 1037, 2482, 103, 102]
Encoded knowledge:  [101, 1996, 5776, 2001, 4914, 2000, 1996, 2902, 2006, 1996, 3083, 1997, 2254, 2349, 2000, 1037, 2482, 5823, 5214, 11253, 5823, 2038, 6342, 4783, 15338, 5823, 5320, 1037, 5823, 5450, 11253, 18855, 102]
Embedded:  ['[CLS]', 'the', 'patient', 'was', 'admitted', 'to', 'the', 'hospital', 'on', 'the', '1st', 'of', 'january', 'due', 'to', 'a', 'car', 'crash', '[SEP]']
Embedded mask:  ['[CLS]', 'the', 'patient', 'was', '[MASK]', 'to', 'the', 'hospital', 'on', 'the', '1st', 'of', 'january', 'due', 'to', 'a', 'car', '[MASK]', '[SEP]']
Embedded knowledge:  ['[CLS]', 'the', 'patient', 'was', 'admitted', 'to', 'the', 'hospital', 'on', 'the', '1st', 'of', 'january', 'due', 'to', 'a', 'car', 'crash', 'capable', '##of', 'crash', 'has', '

# #### CausalTB Dataset

In [9]:
import pickle

# CausalTB dataset (processed with the script preprocess_causaltb.py)
DATA_TRAIN_PATH = 'data/causaltb_train.pickle'
DATA_TRAIN_MASK_PATH = 'data/causaltb_train_mask.pickle'
DATA_TEST_PATH = 'data/causaltb_test.pickle'
DATA_TEST_MASK_PATH = 'data/causaltb_test_mask.pickle'


with open(DATA_TRAIN_PATH, 'rb') as f:
    train_set = pickle.load(f)

with open(DATA_TEST_PATH, 'rb') as f:
    test_set = pickle.load(f)

with open(DATA_TRAIN_MASK_PATH, 'rb') as f:
    train_set_mask = pickle.load(f)

with open(DATA_TEST_MASK_PATH, 'rb') as f:
    test_set_mask = pickle.load(f)

print('Train', len(train_set), 'Test', len(test_set))

Train 7608 Test 846


# #### LitBank dataset (no labels)

In [21]:
# LitBank dataset (processed with the script preprocess_litbank.py)

full_ds_path = 'data/litbank_full.pickle'
prepared_ds_path = 'data/litbank_prepared.pickle'

with open(full_ds_path, 'rb') as f:
    litbank_full = pickle.load(f)

with open(prepared_ds_path, 'rb') as f:
    litbank_events = pickle.load(f)

litbank_all_events = [y for x in [litbank_events[x] for x in litbank_events] for y in x]

# #### Model Training (set model path to save the model)

In [None]:
from train import Trainer
model_trainer = Trainer()

EPOCHS = 100
BATCH_SIZE = 7

tt = Trainer()
(precision, recall, f1), model, mask_model = tt.train(train_set, train_set_mask, test_set, test_set_mask, model_path=None, epochs=EPOCHS, batch_size=BATCH_SIZE)

print(precision, recall, f1)

# #### Predictor using trained model

In [12]:
from kemmg_predictor import KEMMGPredictor

MODEL_PATH = 'models/bert_causal_model.pt'
MODEL_MASK_PATH = 'models/bert_causal_model_mask.pt'

predictor = KEMMGPredictor(MODEL_PATH, MODEL_MASK_PATH)

# ##### Single prediction

In [13]:
sample1 = 'The patient was admitted to the hospital because of a heart attack.' # e1 = 'admitted', e2 = 'heart attack'
sample2 = 'The earthquake caused a tsunami.' # e1 = 'earthquake', e2 = 'tsunami'
sample3 = 'Both earthquake and tsunami are natural disasters.' # e1 = 'earthquake', e2 = 'tsunami'
s1_e1_index = (1, 1)
s1_e2_index = (10, 11)
s2_e1_index = (1, 1)
s2_e2_index = (4, 4)
s3_e1_index = (1, 1)
s3_e2_index = (3, 3)

p1 = predictor.predict(sample1, s1_e1_index, s1_e2_index)
p2 = predictor.predict(sample2, s2_e1_index, s2_e2_index)
p3 = predictor.predict(sample3, s3_e1_index, s3_e2_index)

print('Prediction for sample 1: ', p1)
print('Prediction for sample 2: ', p2)
print('Prediction for sample 3: ', p3)

Prediction for sample 1:  1
Prediction for sample 2:  1
Prediction for sample 3:  0


# ##### Batch prediction

In [22]:
predictions = predictor.predict_all(litbank_all_events, batch_size=5)

  3%|▎         | 258/8252 [07:51<4:03:19,  1.83s/it]


KeyboardInterrupt: 