In [1]:
from collections import Counter
from itertools import product
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch_model_base import TorchModelBase
from torch_rnn_classifier import TorchRNNClassifier, TorchRNNClassifierModel
from torch_rnn_classifier import TorchRNNClassifier
import nli
import os
import utils

In [2]:
GLOVE_HOME = "/Users/bszalapski/Documents/StanfordCourses/CS224U/cs224u/data/glove.6B"

DATA_HOME = os.path.join("/Users/bszalapski/Documents/StanfordCourses/CS224U/cs224u_project/cs224uSNLI/data", "nlidata")

SNLI_HOME = os.path.join(DATA_HOME, "snli_1.0")

MULTINLI_HOME = os.path.join(DATA_HOME, "multinli_1.0")

ANNOTATIONS_HOME = os.path.join(DATA_HOME, "multinli_1.0_annotations")

In [3]:
train_reader = nli.SNLITrainReader(SNLI_HOME, samp_percentage=0.10, random_state=42)

In [4]:
def fit_softmax_classifier_with_preselected_params(X, y):       
    mod = LogisticRegression(
        fit_intercept=True, 
        penalty='l1', 
        solver='saga',  ## Required for penalty='ll'.
        multi_class='ovr',
        C=0.4)
    mod.fit(X, y)
    return mod

In [5]:
glove_lookup = utils.glove2dict(
    os.path.join(GLOVE_HOME, 'glove.6B.50d.txt'))

In [6]:
def glove_leaves_phi(t1, t2, np_func=np.sum):
    """Represent `tree` as a combination of the vector of its words.
    
    Parameters
    ----------
    t1 : nltk.Tree   
    t2 : nltk.Tree   
    np_func : function (default: np.sum)
        A numpy matrix operation that can be applied columnwise, 
        like `np.mean`, `np.sum`, or `np.prod`. The requirement is that 
        the function take `axis=0` as one of its arguments (to ensure
        columnwise combination) and that it return a vector of a 
        fixed length, no matter what the size of the tree is.
    
    Returns
    -------
    np.array
            
    """    
    prem_vecs = _get_tree_vecs(t1, glove_lookup, np_func)  
    hyp_vecs = _get_tree_vecs(t2, glove_lookup, np_func)
    return (prem_vecs, hyp_vecs)
    
    
def _get_tree_vecs(tree, lookup, np_func):
    if hasattr(tree, 'leaves'):
        allvecs = np.array([lookup[w] for w in tree.leaves() if w in lookup])
    else:
        allvecs = np.array([lookup[w] for w in tree if w in lookup])
    if len(allvecs) == 0:
        dim = len(next(iter(lookup.values())))
        feats = np.zeros(dim)    
    else:       
        feats = np_func(allvecs, axis=0)      
    return feats

In [7]:
class TorchRNNSentenceEncoderDataset(torch.utils.data.Dataset):
    def __init__(self, sequences, seq_lengths, y):
        self.prem_seqs, self.hyp_seqs = sequences
        self.prem_lengths, self.hyp_lengths = seq_lengths
        self.y = y
        assert len(self.prem_seqs) == len(self.y)

    @staticmethod
    def collate_fn(batch):
        X_prem, X_hyp, prem_lengths, hyp_lengths, y = zip(*batch)
        prem_lengths = torch.LongTensor(prem_lengths)
        hyp_lengths = torch.LongTensor(hyp_lengths)
        y = torch.LongTensor(y)
        return (X_prem, X_hyp), (prem_lengths, hyp_lengths), y

    def __len__(self):
        return len(self.prem_seqs)

    def __getitem__(self, idx):
        return (self.prem_seqs[idx], self.hyp_seqs[idx],
                self.prem_lengths[idx], self.hyp_lengths[idx],
                self.y[idx])

In [8]:
class TorchRNNSentenceEncoderClassifierModel(TorchRNNClassifierModel):
    def __init__(self, vocab_size, embed_dim, embedding, use_embedding,
                 hidden_dim, output_dim, bidirectional, device, srl_vocab_size):
        
        super(TorchRNNSentenceEncoderClassifierModel, self).__init__(vocab_size, embed_dim,
                                                                     embedding, use_embedding,
                                                                     hidden_dim, output_dim, bidirectional,
                                                                     device)
        self.hypothesis_rnn = nn.LSTM(
            input_size=2 * self.embed_dim,
            hidden_size=hidden_dim,
            batch_first=True,
            bidirectional=self.bidirectional
        )
        if bidirectional:
            classifier_dim = hidden_dim * 2 * 2
        else:
            classifier_dim = hidden_dim * 2
        self.classifier_layer = nn.Linear(classifier_dim, output_dim)
        self.srl_embed = nn.Embedding(srl_vocab_size + 1, self.embed_dim)
        

    def forward(self, X, tags, seq_lengths):
        X_prem, X_hyp = X
        prem_lengths, hyp_lengths = seq_lengths
        prem_state = self.rnn_forward(X_prem, prem_lengths, self.rnn)
        hyp_state = self.rnn_forward(X_hyp, hyp_lengths, self.hypothesis_rnn)
        state = torch.cat((prem_state, hyp_state), dim=1)
        logits = self.classifier_layer(state)
        return logits

In [9]:
class TorchRNNSentenceEncoderClassifier(TorchRNNClassifier):

    def build_dataset(self, X, y):
        X_prem, X_hyp = zip(*X)
        X_prem, prem_lengths = self._prepare_dataset(X_prem)
        X_hyp, hyp_lengths = self._prepare_dataset(X_hyp)
        return TorchRNNSentenceEncoderDataset(
            (X_prem, X_hyp), (prem_lengths, hyp_lengths), y)

    def build_graph(self):
        return TorchRNNSentenceEncoderClassifierModel(
            len(self.vocab),
            embedding=self.embedding,
            embed_dim=self.embed_dim,
            use_embedding=self.use_embedding,
            hidden_dim=self.hidden_dim,
            output_dim=self.n_classes_,
            bidirectional=self.bidirectional,
            device=self.device)

    def predict_proba(self, X):
        with torch.no_grad():
            X_prem, X_hyp = zip(*X)
            X_prem, prem_lengths = self._prepare_dataset(X_prem)
            X_hyp, hyp_lengths = self._prepare_dataset(X_hyp)
            preds = self.model((X_prem, X_hyp), (prem_lengths, hyp_lengths))
            preds = torch.softmax(preds, dim=1).cpu().numpy()
            return preds

In [10]:
# # This still has bugs
# # Adapted from: https://github.com/PrashantRanjan09/Structured-Self-Attentive-Sentence-Embedding
# class SelfAttentiveModel(nn.Module):

#     def __init__(self, vocab_size=10000, embed_dim=300, hidden_dim=64, max_seq_len=200, da=32, r=16, batch_size=500,
#                 embedding=None, use_embedding=True, bidirectional=True, device='cpu', output_dim=3):
#         super(SelfAttentiveModel, self).__init__()
#         self.vocab_size = vocab_size
#         self.batch_size = batch_size
#         self.embed_dim = embed_dim
#         self.hidden_dim = hidden_dim
#         self.max_seq_len = max_seq_len
#         self.da = da
#         self.r = r
#         self.device = device
#         self.use_embedding = use_embedding
#         self.bidirectional = bidirectional

#         self.embedding = nn.Embedding(vocab_size, embed_dim)
#         self.bilstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_dim, batch_first=True, bidirectional=True)
#         self.lin1 = nn.Linear(2 * hidden_dim, da)
#         self.lin2 = nn.Linear(da, r)
#         self.lin3 = nn.Linear(100, output_dim)

#     def forward(self, x, seq_lengths):
#         state = self.rnn_forward(x, seq_lengths, self.bilstm)
#         out = self.lin1(state)
#         out = torch.tanh(out)
#         out = self.lin2(out)
#         out_a = F.softmax(out, dim=0)
#         temp1 = out_a.permute(0, 2, 1)
#         temp2 = state
#         out = torch.bmm(temp1, temp2).squeeze(0)  # AH
#         out = out.view(len(x), -1)
#         out = self.lin3(out)
#         return out
    
#     def rnn_forward(self, x, seq_lengths, rnn):
#         x = torch.nn.utils.rnn.pad_sequence(x, batch_first=True)
#         x = x.to(self.device, non_blocking=True)
#         seq_lengths = seq_lengths.to(self.device)
#         seq_lengths, sort_idx = seq_lengths.sort(0, descending=True)
#         x = x[sort_idx]
#         if self.use_embedding:
#             embs = self.embedding(x)
#         else:
#             embs = x
#         embs = torch.nn.utils.rnn.pack_padded_sequence(
#             embs, batch_first=True, lengths=seq_lengths)
#         outputs, state = rnn(embs)
#         state = self.get_batch_final_states(state)
#         if self.bidirectional:
#             state = torch.cat((state[0], state[1]), dim=1)
#         _, unsort_idx = sort_idx.sort(0)
#         state = state[unsort_idx]
#         state = state.unsqueeze(0)
#         return state

#     def get_batch_final_states(self, state):
#         if self.bilstm.__class__.__name__ == 'LSTM':
#             return state[0].squeeze(0)
#         else:
#             return state.squeeze(0)

#     @staticmethod
#     def _define_embedding(embedding, vocab_size, embed_dim):
#         if embedding is None:
#             return nn.Embedding(vocab_size, embed_dim)
#         else:
#             embedding = torch.tensor(embedding, dtype=torch.float)
#             return nn.Embedding.from_pretrained(embedding)

In [11]:
# # This still has bugs
# class SNLIBiLSTMAttentiveClassifierModel(nn.Module):
#     def __init__(self, vocab_size, embed_dim, embedding, use_embedding, hidden_dim,
#                  output_dim, bidirectional, device, max_seq_len=200,
#                  da=32, r=16, batch_size=500):
#         super(SNLIBiLSTMAttentiveClassifierModel, self).__init__()

#         self.vocab_size = vocab_size
#         self.batch_size = batch_size
#         self.embed_dim = embed_dim
#         self.hidden_dim = hidden_dim
#         self.output_dim = output_dim
#         self.max_seq_len = max_seq_len
#         self.da = da
#         self.r = r
#         self.device = device
#         self.use_embedding = use_embedding
#         self.embedding = embedding

#         self.prem = SelfAttentiveModel(vocab_size=vocab_size,
#                                        embed_dim=embed_dim,
#                                        hidden_dim=hidden_dim,
#                                        max_seq_len=max_seq_len,
#                                        da=da,
#                                        r=r,
#                                        batch_size=batch_size,
#                                        embedding=embedding,
#                                        use_embedding=use_embedding)

#         self.hyp = SelfAttentiveModel(vocab_size=vocab_size,
#                                       embed_dim=embed_dim,
#                                       hidden_dim=hidden_dim,
#                                       max_seq_len=max_seq_len,
#                                       da=da,
#                                       r=r,
#                                       batch_size=batch_size,
#                                       embedding=embedding,
#                                       use_embedding=use_embedding)

#         self.output_layer = nn.Linear(12, output_dim)

#     def forward(self, x, seq_lengths):
#         x_prem, x_hyp = x
#         seq_length_prem, seq_length_hyp = seq_lengths
#         encoded_prem = self.prem(x_prem, seq_length_prem)
#         encoded_hyp = self.hyp(x_hyp, seq_length_hyp)

#         mult = encoded_prem * encoded_hyp
#         diff = encoded_prem - encoded_hyp
#         print(encoded_prem.size(), encoded_hyp.size(), mult.size(), diff.size())

#         soft_in = torch.cat((encoded_prem, mult, diff, encoded_hyp), dim=1)
#         print(soft_in.size())
#         print(8 * self.hidden_dim, self.output_dim)
#         logits = self.output_layer(soft_in)
#         return logits

In [12]:
# # This still has bugs
# class SNLIBiLSTMAttentionClassifier(TorchRNNClassifier):

#     def build_dataset(self, X, y):
#         X_prem, X_hyp = zip(*X)
#         X_prem, prem_lengths = self._prepare_dataset(X_prem)
#         X_hyp, hyp_lengths = self._prepare_dataset(X_hyp)
#         return TorchRNNSentenceEncoderDataset(
#             (X_prem, X_hyp), (prem_lengths, hyp_lengths), y)

#     def build_graph(self):
#         return SNLIBiLSTMAttentiveClassifierModel(
#             len(self.vocab),
#             embedding=self.embedding,
#             embed_dim=self.embed_dim,
#             use_embedding=self.use_embedding,
#             hidden_dim=self.hidden_dim,
#             output_dim=self.n_classes_,
#             bidirectional=self.bidirectional,
#             device=self.device)

#     def predict_proba(self, X):
#         with torch.no_grad():
#             X_prem, X_hyp = zip(*X)
#             X_prem, prem_lengths = self._prepare_dataset(X_prem)
#             X_hyp, hyp_lengths = self._prepare_dataset(X_hyp)
#             preds = self.model((X_prem, X_hyp), (prem_lengths, hyp_lengths))
#             preds = torch.softmax(preds, dim=1).cpu().numpy()
#             return preds

In [19]:
class GloveSRLModel(nn.Module):
    def __init__(self,
            vocab,
            embed_dim,
            embedding,
            use_embedding,
            hidden_dim,
            output_dim,
            bidirectional,
            device,
            srl_vocab,
            lookup):
        super(TorchRNNClassifierModel, self).__init__()
        self.use_embedding = use_embedding
        self.device = device
        self.embed_dim = embed_dim
        self.bidirectional = bidirectional
        self.vocab = vocab
        self.srl_vocab = srl_vocab
        self.lookup = lookup
        self.lookup_embedding = None
        words = []
        vecs = []
        idx = 0
        self.word2idx = {}
        for w in lookup.keys():
            words.append(w)
            self.word2idx[w] = idx
            idx += 1
            vecs.append(lookup[w])
        glove_weights = np.array(vecs)

        # Graph
        self.word_embedding = nn.Embedding.from_pretrained(glove_weights, freeze=True)
        self.embed_dim = self.word_embedding.embedding_dim
        self.srl_embedding = nn.Embedding(len(srl_vocab) + 1, self.embed_dim)
        
        self.rnn = nn.LSTM(
            input_size=2 * self.embed_dim,
            hidden_size=hidden_dim,
            batch_first=True,
            bidirectional=bidirectional)
        
        self.hypothesis_rnn = nn.LSTM(
            input_size=2 * self.embed_dim,
            hidden_size=hidden_dim,
            batch_first=True,
            bidirectional=self.bidirectional
        )
        
        if bidirectional:
            classifier_dim = hidden_dim * 2 * 2
        else:
            classifier_dim = hidden_dim * 2
        
        self.classifier_layer = nn.Linear(classifier_dim, output_dim)
        
        
    def forward(self, X, seq_lengths):
        prem, hyp = X
        X_prem, tags_prem = prem
        X_hyp, tags_hyp = hyp
        prem_lengths, hyp_lengths = seq_lengths
        
        embeds_prem = torch.cat(self.word_embedding(X_prem), self.srl_embedding(tags_prem))
        embeds_hyp = torch.cat(self.word_embedding(X_hyp), self.srl_embedding(tags_hyp))
        
        prem_state = self.rnn_forward(X_prem, prem_lengths, self.rnn)
        hyp_state = self.rnn_forward(X_hyp, hyp_lengths, self.hypothesis_rnn)
        state = torch.cat((prem_state, hyp_state), dim=1)
        logits = self.classifier_layer(state)
        return logits

    def rnn_forward(self, X, seq_lengths, rnn):
        X = torch.nn.utils.rnn.pad_sequence(X, batch_first=True)
        X = X.to(self.device, non_blocking=True)
        seq_lengths = seq_lengths.to(self.device)
        seq_lengths, sort_idx = seq_lengths.sort(0, descending=True)
        X = X[sort_idx]
        if self.use_embedding:
            embs = self.embedding(X)
        else:
            embs = X
        embs = torch.nn.utils.rnn.pack_padded_sequence(
            embs, batch_first=True, lengths=seq_lengths)
        outputs, state = rnn(embs)
        state = self.get_batch_final_states(state)
        if self.bidirectional:
            state = torch.cat((state[0], state[1]), dim=1)
        _, unsort_idx = sort_idx.sort(0)
        state = state[unsort_idx]
        return state

    def get_batch_final_states(self, state):
        if self.rnn.__class__.__name__ == 'LSTM':
            return state[0].squeeze(0)
        else:
            return state.squeeze(0)

    @staticmethod
    def _define_embedding(embedding, vocab_size, embed_dim):
        if embedding is None:
            return nn.Embedding(vocab_size, embed_dim)
        else:
            embedding = torch.tensor(embedding, dtype=torch.float)
            return nn.Embedding.from_pretrained(embedding)
        
        def fit(self, X, y, **kwargs):
        """Standard `fit` method.

        Parameters
        ----------
        X : np.array
        y : array-like
        kwargs : dict
            For passing other parameters. If 'X_dev' is included,
            then performance is monitored every 10 epochs; use
            `dev_iter` to control this number.

        Returns
        -------
        self

        """
        # Incremental performance:
        X_dev = kwargs.get('X_dev')
        if X_dev is not None:
            dev_iter = kwargs.get('dev_iter', 10)
        # Data prep:
        self.classes_ = sorted(set(y))
        self.n_classes_ = len(self.classes_)
        class2index = dict(zip(self.classes_, range(self.n_classes_)))
        y = [class2index[label] for label in y]
        dataset = self.build_dataset(X, y)
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=False,
            pin_memory=True,
            collate_fn=dataset.collate_fn)
        if not self.use_embedding:
            # Infer `embed_dim` from `X` in this case:
            self.embed_dim = X[0][0].shape[0]
        # Graph:
        self.model = self.build_graph()
        self.model.to(self.device)
        self.model.train()
        # Make sure this value is up-to-date; self.`model` might change
        # it if it creates an embedding:
        self.embed_dim = self.model.embed_dim
        # Optimization:
        loss = nn.CrossEntropyLoss()
        optimizer = self.optimizer(
            self.model.parameters(),
            lr=self.eta,
            weight_decay=self.l2_strength)
        # Train:
        for iteration in range(1, self.max_iter+1):
            epoch_error = 0.0
            for X_batch, batch_seq_lengths, y_batch in dataloader:
                y_batch = y_batch.to(self.device, non_blocking=True)
                # print(self.model.__class__.__name__)
                # print(len(X_batch), len(batch_seq_lengths))
                batch_preds = self.model(X_batch, batch_seq_lengths)
                err = loss(batch_preds, y_batch)
                epoch_error += err.item()
                # Backprop:
                optimizer.zero_grad()
                err.backward()
                optimizer.step()
            # Incremental predictions where possible:
            if X_dev is not None and iteration > 0 and iteration % dev_iter == 0:
                self.dev_predictions[iteration] = self.predict(X_dev)
            self.errors.append(epoch_error)
            progress_bar("Finished epoch {} of {}; error is {}".format(
                iteration, self.max_iter, epoch_error))
        return self

In [13]:
def simple_example():
    vocab = ['a', 'b', '$UNK']

    # Reversals are good, and other pairs are bad:
    train = [
        [(list('ab'), list('ba')), 'good'],
        [(list('aab'), list('baa')), 'good'],
        [(list('abb'), list('bba')), 'good'],
        [(list('aabb'), list('bbaa')), 'good'],
        [(list('ba'), list('ba')), 'bad'],
        [(list('baa'), list('baa')), 'bad'],
        [(list('bba'), list('bab')), 'bad'],
        [(list('bbaa'), list('bbab')), 'bad'],
        [(list('aba'), list('bab')), 'bad']]

    test = [
        [(list('baaa'), list('aabb')), 'bad'],
        [(list('abaa'), list('baaa')), 'bad'],
        [(list('bbaa'), list('bbaa')), 'bad'],
        [(list('aaab'), list('baaa')), 'good'],
        [(list('aaabb'), list('bbaaa')), 'good']]

    mod = TorchRNNSentenceEncoderClassifier(
        vocab,
        max_iter=100,
        embed_dim=50,
        hidden_dim=50)

    X, y = zip(*train)
    mod.fit(X, y)

    X_test, y_test = zip(*test)
    preds = mod.predict(X_test)

    print("\nPredictions:")
    for ex, pred, gold in zip(X_test, preds, y_test):
        score = "correct" if pred == gold else "incorrect"
        print("{0:>6} {1:>6} - predicted: {2:>4}; actual: {3:>4} - {4}".format(
            "".join(ex[0]), "".join(ex[1]), pred, gold, score))

In [14]:
# simple_example()

In [15]:
def sentence_encoding_rnn_phi(t1, t2):
    """Map `t1` and `t2` to a pair of lits of leaf nodes."""
    if hasattr(t1, 'leaves'):
        return (t1.leaves(), t2.leaves())
    else:
        return (t1, t2)

In [None]:
import allennlp
from allennlp.predictors.predictor import Predictor
PREDICTOR = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz")

In [None]:
glove_dim = 50
glove_src = os.path.join(GLOVE_HOME, 'glove.6B.{}d.txt'.format(glove_dim))
# Creates a dict mapping strings (words) to GloVe vectors:
GLOVE = utils.glove2dict(glove_src)

In [16]:
def randvec(w, n=50, lower=-1.0, upper=1.0):
    """Returns a random vector of length `n`. `w` is ignored."""
    return utils.randvec(n=n, lower=lower, upper=upper)


def glove_vec(w, lookup, is_srl=False):    
    """Return `w`'s GloVe representation if available, else return 
    a random vector."""
    vec = lookup.get(w, randvec(w, n=glove_dim))
    if w not in lookup and is_srl:
        print(f"Adding {w} to lookup")
        lookup[w] = vec
    return vec


def _get_glove_vecs(s, lookup, np_func=np.concatenate, is_srl=False):
    if hasattr(s, 'leaves'):
        allvecs = np.array([glove_vec(w.lower(), lookup, is_srl) for w in s.leaves()])
    else:
        allvecs = np.array([glove_vec(w.lower(), lookup, is_srl) for w in s])
    if len(allvecs) == 0:
        dim = len(next(iter(lookup.values())))
        feats = np.zeros(dim)    
    else:       
        feats = np_func(allvecs, axis=0)      
    return feats
        

def srl_phi(t1, t2):
    """
    Uses the Allen NLP Semantic Role Labeler and returns the words in a list, concatenated with their SRL tag
    e.g. t1 = ["The", "cat", "ran", "fast", "."]
    then return = ["The_O", "cat_B-ARG0", "ran_B-V" "fast_B-ARGM-MNR", "._O"]
    """
    if hasattr(t1, 'leaves'):
        s1 = t1.leaves()
        s2 = t2.leaves()
    else:
        s1 = t1
        s2 = t2
    srl1 = PREDICTOR.predict(sentence=" ".join(s1))
    srl2 = PREDICTOR.predict(sentence=" ".join(s2))
    tags_1 = _get_best_tags(srl1)
    tags_2 = _get_best_tags(srl2)
    glove_1 = _get_glove_vecs(s1, GLOVE)
    glove_2 = _get_glove_vecs(s2, GLOVE)
    glove_tags_1 = _get_glove_vecs(tags_1, GLOVE, is_srl=True)
    glove_tags_2 = _get_glove_vecs(tags_2, GLOVE, is_srl=True)

#     ret_1 = [np.concatenate((a_, b_), axis=0) for a_, b_ in zip(glove_1, glove_tags_1)]
#     ret_2 = [np.concatenate((a_, b_), axis=0) for a_, b_ in zip(glove_2, glove_tags_2)]
#     print(tags_1, glove_tags_1)
    ret_1 = np.concatenate((glove_1, glove_tags_1), axis=0)
    ret_2 = np.concatenate((glove_2, glove_tags_2), axis=0)
    return (ret_1, ret_2)
    
def _get_best_tags(srl):
    if len(srl['verbs']) == 0:
        print(f"Sentence without tags: {' '.join(srl['words'])}")
        ret = ["O" for w in srl['words']]
        return ret
    elif len(srl['verbs']) == 1:
        return srl['verbs'][0]['tags']
    max_tags = 0
    final_tags = None
    for v in srl['verbs']:
        tag_count = 0
        for t in v['tags']:
            if t != 'O':
                tag_count += 1
        if tag_count > max_tags:
            final_tags = v['tags']
            max_tags = tag_count
    if final_tags is None:
        final_tags = srl['verbs'][0]['tags']
    
    return final_tags

In [30]:
def get_sentence_encoding_vocab(X, n_words=None):
    wc = Counter([w[0] for pair in X for ex in pair for w in ex])
    wc = wc.most_common(n_words) if n_words else wc.items()
    vocab = {w for w, c in wc}
    vocab.add("$UNK")
#     vocab.add(glove_vec("$UNK", GLOVE, is_srl=True))
    return vocab

In [31]:
def get_srl_tag_encoding_vocab():
    tag_file = "/Users/bszalapski/Documents/StanfordCourses/CS224U/cs224u_project/cs224uSNLI/BiLSTM/tags.txt"
    tags = []
    with open(tag_file, "r") as tp:
        for l in tp.readlines():
            tags.append(l.strip())
    return set(tags)

In [32]:
def fit_sentence_encoding_rnn(X, y):
#     print(X)
    vocab = get_sentence_encoding_vocab(X, n_words=10000)
    tag_vocab = get_srl_tag_encoding_vocab()
    mod = GloveSRLModel(
        vocab, hidden_dim=50, max_iter=50, srl_vocab=tag_vocab, lookup=glove_lookup)
    mod.fit(X, y)
    return mod

In [33]:
# def fit_sentence_encoding_bilstm(X, y):   
#     vocab = get_sentence_encoding_vocab(X, n_words=10000)
#     mod = SNLIBiLSTMAttentionClassifier(vocab)
#     mod.fit(X, y)
#     return mod

In [34]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Standard SNLI trainer, using SNLI for train and validation sets
train_reader = nli.NLIReader(os.path.join(DATA_HOME, "snli_1.0/preprocessed_snli_1.0_train.jsonl"))
assess_reader = None
# _ = nli.experiment(
#     train_reader=preprocessed_reader, 
#     phi=sentence_encoding_rnn_phi,
#     train_func=fit_sentence_encoding_rnn,
#     assess_reader=None,
#     random_state=42,
#     vectorize=False)
feats = []
labels = []
raw_examples = []
for ex in train_reader.read():
    feats.append(((ex.sentence1, ex.tags1), (ex.sentence2, ex.tags2)))
    labels.append(ex.gold_label)
    raw_examples.append((ex.sentence1, ex.sentence2))
train = {
    'X': feats,
    'y': labels,
    'raw_examples': raw_examples
}
X_train = train['X']
y_train = train['y']
if assess_reader == None:
    X_train, X_assess, y_train, y_assess, raw_train, raw_assess = train_test_split(
        X_train, y_train, raw_examples, train_size=0.7, test_size=None, random_state=42)
    assess = {
        'X': X_assess,
        'y': y_assess,
        'raw_examples': raw_assess
    }
mod = fit_sentence_encoding_rnn(X_train, y_train)
preds = mod.predict(X_assess)
print(classification_report(y_assess, predictions, digits=3))

TypeError: __init__() got an unexpected keyword argument 'max_iter'

In [None]:
addamod_train_reader = nli.AddamodTrainReader(DATA_HOME)
addamod_dev_reader = nli.AddamodDevReader(DATA_HOME)
subobj_train_reader = nli.SubObjTrainReader(DATA_HOME)
subobj_dev_reader = nli.SubObjDevReader(DATA_HOME)
breaking_reader = nli.BreakingSNLIReader(DATA_HOME)

In [None]:
# Use this cell for training on SNLI, using an adversarial reader as the validation set.

_ = nli.experiment(
    train_reader=nli.SNLITrainReader(SNLI_HOME, samp_percentage=0.10), 
    phi=sentence_encoding_rnn_phi,
    train_func=fit_sentence_encoding_rnn,
#     assess_reader=addamod_dev_reader,
    assess_reader=subobj_dev_reader,
    random_state=42,
    vectorize=False)

In [None]:
import allennlp
from allennlp.predictors.predictor import Predictor
predictor = Predictor.from_path("https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz")
predictor.predict(
  sentence="Did Uriah honestly think he could beat the game in under three hours?"
)