### Example code for extracting trees from a trained model file.

In [2]:
import torch
from snli.model import SNLIModel
from utils.vocab import Vocab
import torch.nn as nn
import torch.nn.functional as F

In [3]:
torch.set_grad_enabled(False)
vocab = Vocab.from_file('pretrained/vocab.pkl', add_pad=True, add_unk=True)
model = SNLIModel(prim_num_classes=3, num_words=len(vocab), word_dim=300, hidden_dim=300,
                  clf_hidden_dim=1024, clf_num_layers=1, bidirectional=True, dropout_prob=0.13,
                  use_leaf_rnn=True, intra_attention=True, use_batchnorm=True)
model.load_state_dict(torch.load('pretrained/model.pkl', map_location='cpu'))
model.eval()

SNLIModel(
  (word_embedding): Embedding(43439, 300)
  (encoder): BinaryTreeLSTM(
    (att_wt): Linear(in_features=600, out_features=1, bias=True)
    (pre_att): Linear(in_features=600, out_features=600, bias=True)
    (leaf_rnn_cell): LSTMCell(300, 300)
    (leaf_rnn_cell_bw): LSTMCell(300, 300)
    (treelstm_layer): BinaryTreeLSTMLayer(
      (comp_linear): Linear(in_features=1200, out_features=3000, bias=True)
    )
  )
  (classifier): SNLIClassifier(
    (bn_mlp_input): BatchNorm1d(2400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn_mlp_output): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout): Dropout(p=0.13)
    (mlp): Sequential(
      (0): Sequential(
        (0): Linear(in_features=2400, out_features=1024, bias=True)
        (1): ReLU()
      )
    )
    (clf_linear): Linear(in_features=1024, out_features=3, bias=True)
  )
  (dropout): Dropout(p=0.13)
)

In [7]:
def prepare_input(sent_words):
    word_ids = torch.LongTensor([[vocab.word_to_id(w) for w in sent_words]])
    length = torch.LongTensor([len(sent_words)])
    word_ids_emb = model.word_embedding(word_ids)
    return word_ids_emb, length

In [5]:
from nltk.tokenize import word_tokenize
def get_embedding(sentence):
    words = word_tokenize(sentence.lower())
    enc_input, enc_length = prepare_input(words)
    h = model.encoder.forward(enc_input, enc_length, return_select_masks=True)[0]
    return h

In [8]:
h = get_embedding('I like to drink Orange Juice.') # Correct Answer
h1 = get_embedding('I love orange juice. ') # Acceptable answer
h2 = get_embedding('I like drinking orange juice. ')# Acceptable answer
h3 = get_embedding('I like to eat Oranges. ')# Non Acceptable answer
h4 = get_embedding('I do not like orange juice. ')#Non Acceptable answer

In [9]:
cos = nn.CosineSimilarity(dim=1, eps=1e-6)

In [10]:
print(cos(h,h1))
print(cos(h,h2))
print(cos(h,h3))
print(cos(h,h4))

tensor([0.8091])
tensor([0.9130])
tensor([0.6160])
tensor([0.5849])


In [None]:
import spacy

nlp = spacy.load("en_core_web_md")  # make sure to use larger model!
tokens = nlp("dog cat banana")`

In [43]:
!python3 -m spacy download en_core_web_md

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/opt/anaconda3/bin/python3 -m pip install --upgrade pip' command.[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_md')


In [48]:
!pip3 install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

Defaulting to user installation because normal site-packages is not writeable
Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz (12.0 MB)
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py) ... [?25ldone
[?25h  Created wheel for en-core-web-sm: filename=en_core_web_sm-2.2.0-py3-none-any.whl size=12019123 sha256=eefc517010517b0142ff1691f08f08f86fe7a210473c1eea17be7d67177946ad
  Stored in directory: /home/ritesh/.cache/pip/wheels/64/69/41/6f820cf1d7488a0381a2059f66ec9f8f23116f7c67d18f3d8d
Successfully built en-core-web-sm
You should consider upgrading via the '/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [52]:
import spacy
import en_core_web_md

nlp = en_core_web_md.load()  # make sure to use larger model!
tokens = nlp("dog cat banana")

In [54]:
h = nlp('I like to drink Orange Juice.')
h1 = nlp('I love orange juice. ')
h2 = nlp('I like drinking orange juice. ')
h3 = nlp('I like to eat Oranges. ')
h4 = nlp('I do not like orange juice. ')

In [58]:
print(h.similarity(h1))
print(h.similarity(h2))
print(h.similarity(h3))
print(h.similarity(h4))

0.9440739137877243
0.9730649327106621
0.9034411879800215
0.9513260826224522
