In [2]:

import time
import argparse
import sys, os

import numpy as np
import random

import torch
from torch.autograd import Variable
import torch.nn as nn

from data import get_nli, get_batch, build_vocab
from mutils import get_optimizer, dotdict
from models import NLINet


In [3]:

def get_multinli(data_path):
    s1 = {}
    s2 = {}
    target = {}
    
    dico_label = {'entailment':0,  'neutral':1, 'contradiction':2}
    
    for data_type in ['train', 'dev', 'test']:
        s1[data_type], s2[data_type], target[data_type] = {}, {}, {}
        s1[data_type]['path'] = os.path.join(data_path, 's1.' + data_type)
        s2[data_type]['path'] = os.path.join(data_path, 's2.' + data_type)
        target[data_type]['path'] = os.path.join(data_path, 'labels.' + data_type)
        
        s1[data_type]['sent'] = [line.rstrip() for line in open(s1[data_type]['path'], 'r')]
        s2[data_type]['sent'] = [line.rstrip() for line in open(s2[data_type]['path'], 'r')]
        target[data_type]['data'] = np.array([dico_label[line.rstrip('\n')] for line in open(target[data_type]['path'], 'r')])
        
        assert len(s1[data_type]['sent']) == len(s2[data_type]['sent']) == len(target[data_type]['data'])
        
        print('** {0} DATA : Found {1} pairs of {2} sentences.'.format(
                            data_type.upper(), len(s1[data_type]['sent']), data_type))
        
        
    train = {'s1':s1['train']['sent'], 's2':s2['train']['sent'], 'label':target['train']['data']}
    dev = {'s1':s1['dev']['sent'], 's2':s2['dev']['sent'], 'label':target['dev']['data']}
    test  = {'s1':s1['test']['sent'] , 's2':s2['test']['sent'] , 'label':target['test']['data'] }
    return train, dev, test

In [4]:
ls dataset3/MultiNLI_2/

dev.matched.multinli.txt          s1.train
dev.matched.multinli.txt.tok      s2.dev.matched
dev.mismatched.multinli.txt       s2.dev.mismatched
dev.mismatched.multinli.txt.tok   s2.test.matched.unlabeled
labels.dev.matched                s2.test.mismatched.unlabeled
labels.dev.mismatched             s2.train
labels.test.matched.unlabeled     test.matched.unlabeled.multinli.txt
labels.test.mismatched.unlabeled  test.matched.unlabeled.multinli.txt.tok
labels.train                      test.mismatched.unlabeled.multinli.txt
s1.dev.matched                    test.mismatched.unlabeled.multinli.txt.tok
s1.dev.mismatched                 train.multinli.txt
s1.test.matched.unlabeled         train.multinli.txt.tok
s1.test.mismatched.unlabeled


In [5]:
ls dataset2/MultiNLI/multinli_0.9/

Icon                               multinli_0.9_dev_mismatched.txt
multinli_0.9_dev_matched.jsonl     multinli_0.9_train.jsonl
multinli_0.9_dev_matched.txt       multinli_0.9_train.txt
multinli_0.9_dev_mismatched.jsonl  paper.pdf


In [6]:


GLOVE_PATH = "dataset/GloVe/glove.840B.300d.txt"


parser = argparse.ArgumentParser(description='NLI training')
# paths
parser.add_argument("--nlipath", type=str, default='dataset/SNLI/', help="NLI data path (SNLI or MultiNLI)")
parser.add_argument("--outputdir", type=str, default='savedir2/', help="Output directory")
parser.add_argument("--outputmodelname", type=str, default='model.pickle')


# training
parser.add_argument("--n_epochs", type=int, default=20)
parser.add_argument("--batch_size", type=int, default=64)
parser.add_argument("--dpout_model", type=float, default=0., help="encoder dropout")
parser.add_argument("--dpout_fc", type=float, default=0., help="classifier dropout")
parser.add_argument("--nonlinear_fc", type=float, default=0, help="use nonlinearity in fc")
parser.add_argument("--optimizer", type=str, default="sgd,lr=0.1", help="adam or sgd,lr=0.1")
parser.add_argument("--lrshrink", type=float, default=5, help="shrink factor for sgd")
parser.add_argument("--decay", type=float, default=0.99, help="lr decay")
parser.add_argument("--minlr", type=float, default=1e-5, help="minimum lr")
parser.add_argument("--max_norm", type=float, default=5., help="max norm (grad clipping)")

#model
parser.add_argument("--encoder_type", type=str, default='BLSTMEncoder', help="see list of encoders")
parser.add_argument("--enc_lstm_dim", type=int, default=2048, help="encoder nhid dimension")
parser.add_argument("--n_enc_layers", type=int, default=1, help="encoder num layers")
parser.add_argument("--fc_dim", type=int, default=512, help="nhid of fc layers")
parser.add_argument("--n_classes", type=int, default=3, help="entailment/neutral/contradiction")
parser.add_argument("--pool_type", type=str, default='max', help="max or mean")

# gpu
parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID")
parser.add_argument("--seed", type=int, default=1234, help="seed")


params, _ = parser.parse_known_args(" ".split())


In [7]:
train, valid, test = get_nli(params.nlipath)

** TRAIN DATA : Found 549367 pairs of train sentences.
** DEV DATA : Found 9842 pairs of dev sentences.
** TEST DATA : Found 9824 pairs of test sentences.


In [32]:
type(train['s1'])

list

In [33]:

# set gpu device
torch.cuda.set_device(params.gpu_id)

# print parameters passed, and all parameters
# print('\ntogrep : {0}\n'.format(sys.argv[1:]))
print(params)


"""
SEED
"""
np.random.seed(params.seed)
torch.manual_seed(params.seed)
torch.cuda.manual_seed(params.seed)

"""
DATA
"""
train, valid, test = get_nli(params.nlipath)
word_vec = build_vocab(train['s1'] + train['s2'] + valid['s1'] + valid['s2'] + test['s1'] + test['s2'], GLOVE_PATH)

for split in ['s1', 's2']:
    for data_type in ['train', 'valid', 'test']:
        eval(data_type)[split] = np.array([['<s>'] + [word for word in sent.split() if word in word_vec] +\
                                          ['</s>'] for sent in eval(data_type)[split]])        

params.word_emb_dim = 300


# """
# MODEL
# """
# # model config
# config_nli_model = {
#     'n_words'        :  len(word_vec)          ,
#     'word_emb_dim'   :  params.word_emb_dim   ,
#     'enc_lstm_dim'   :  params.enc_lstm_dim   ,
#     'n_enc_layers'   :  params.n_enc_layers   ,
#     'dpout_model'    :  params.dpout_model    ,
#     'dpout_fc'       :  params.dpout_fc       ,
#     'fc_dim'         :  params.fc_dim         ,
#     'bsize'          :  params.batch_size     ,
#     'n_classes'      :  params.n_classes      ,
#     'pool_type'      :  params.pool_type      ,
#     'nonlinear_fc'   :  params.nonlinear_fc   ,
#     'encoder_type'   :  params.encoder_type   ,
#     'use_cuda'       :  True                  ,

# }

# # model
# encoder_types = ['BLSTMEncoder', 'BLSTMprojEncoder', 'BGRUlastEncoder', 'InnerAttentionMILAEncoder',\
#                  'InnerAttentionYANGEncoder', 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder']
# assert params.encoder_type in encoder_types, "encoder_type must be in " + str(encoder_types)
# nli_net = NLINet(config_nli_model)
# print(nli_net)

Namespace(batch_size=64, decay=0.99, dpout_fc=0.0, dpout_model=0.0, enc_lstm_dim=2048, encoder_type='BLSTMEncoder', fc_dim=512, gpu_id=0, lrshrink=5, max_norm=5.0, minlr=1e-05, n_classes=3, n_enc_layers=1, n_epochs=20, nlipath='dataset/SNLI/', nonlinear_fc=0, optimizer='sgd,lr=0.1', outputdir='savedir2/', outputmodelname='model.pickle', pool_type='max', seed=1234)
** TRAIN DATA : Found 549367 pairs of train sentences.
** DEV DATA : Found 9842 pairs of dev sentences.
** TEST DATA : Found 9824 pairs of test sentences.
Found 38957(/43479) words with glove vectors
Vocab size : 38957


In [36]:
# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
optimizer = optim_fn(nli_net.parameters(), **optim_params)

# cuda by default
nli_net.cuda()
loss_fn.cuda()
#src_embeddings.cuda()


    
"""
TRAIN
"""
#src_embeddings.volatile = True
val_acc_best = -1e10
adam_stop = False
stop_training = False
lr = optim_params['lr'] if 'sgd' in params.optimizer else None
#index_pad =word2id['<p>']

In [37]:
test['s1']

array([ list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       ...,
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 'in', 'a', 'classroom', '.', '</s>']),
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 'in', 'a', 'classroom', '.', '</s>']),
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 

In [38]:
def evaluate(epoch, eval_type='valid', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop
#     print("val acc best, lr, stop_training, adam_stop: ", val_acc_best, lr, stop_training, adam_stop)
    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))
    
    s1    = valid['s1']    if eval_type == 'valid' else test['s1']
    s2    = valid['s2']    if eval_type == 'valid' else test['s2']
    target = valid['label'] if eval_type == 'valid' else test['label']

    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
        tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()
        k = s1_batch.size(1)  # actual batch size
#         print("yo, I am here")
        first_arg = (s1_batch, s1_len)
        second_arg = (s2_batch, s2_len)
        # model forward
        output = nli_net(first_arg, second_arg )
        
        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
        print(pred)
        
    # save model
    eval_acc  = round(100 * correct / len(s1),2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} : {2}'.format(epoch, eval_type, eval_acc))
    
    if eval_type == 'valid' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            if not os.path.exists(params.outputdir):
                os.makedirs(params.outputdir)
            torch.save(nli_net, os.path.join(params.outputdir, params.outputmodelname))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'.format(params.lrshrink, optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    return eval_acc, s1,s2

In [40]:
epoch = 1
# Run best model on test set.
del nli_net
nli_net = torch.load(os.path.join(params.outputdir, params.outputmodelname))

print('\nTEST : Epoch {0}'.format(epoch))
# evaluate(1e6, 'valid', True)
_, s1, s2 = evaluate(0, 'test', True)



TEST : Epoch 1

    2
    0
    2
    1
    1
    1
    0
    2
    2
    1
    0
    0
    2
    0
    2
    2
    0
    1
    0
    2
    0
    2
    0
    1
    1
    2
    0
    0
    1
    2
    2
    1
    0
    0
    2
    2
    2
    0
    2
    0
    1
    2
    2
    1
    1
    2
    1
    0
    1
    2
    1
    2
    2
    2
    2
    0
    0
    2
    0
    0
    2
    1
    1
    0
[torch.cuda.LongTensor of size 64x1 (GPU 0)]


    2
    1
    2
    2
    2
    1
    0
    2
    1
    1
    2
    0
    1
    0
    0
    0
    0
    0
    0
    0
    0
    0
    2
    1
    0
    1
    2
    1
    2
    0
    2
    2
    0
    1
    1
    0
    2
    1
    0
    2
    0
    2
    0
    2
    1
    0
    0
    2
    1
    0
    2
    1
    0
    2
    0
    0
    0
    1
    2
    1
    0
    2
    1
    0
[torch.cuda.LongTensor of size 64x1 (GPU 0)]


    2
    2
    1
    0
    1
    0
    1
    1
    2
    1
    0
    2
    1
    1
    0
    2
    1
    2
    0
    1
 


    0
    1
    1
    2
    2
    0
    0
    2
    2
    1
    0
    1
    0
    1
    2
    0
    1
    2
    1
    0
    2
    1
    2
    0
    1
    0
    2
    1
    0
    2
    1
    0
    1
    2
    0
    1
    2
    0
    2
    2
    1
    0
    0
    2
    2
    2
    0
    1
    0
    1
    2
    0
    1
    2
    0
    0
    0
    0
    0
    2
    0
    2
    1
    2
[torch.cuda.LongTensor of size 64x1 (GPU 0)]


    0
    2
    1
    0
    0
    2
    2
    1
    2
    0
    0
    2
    2
    2
    0
    0
    1
    2
    0
    0
    0
    1
    2
    0
    0
    1
    2
    0
    0
    2
    2
    0
    2
    1
    0
    2
    1
    2
    1
    0
    2
    0
    0
    2
    0
    0
    2
    1
    1
    2
    0
    2
    0
    2
    1
    0
    2
    2
    1
    0
    2
    0
    2
    1
[torch.cuda.LongTensor of size 64x1 (GPU 0)]


    1
    2
    0
    0
    2
    2
    2
    0
    1
    1
    2
    0
    0
    2
    0
    1
    0
    2
    2
    2
    0
    0
    2

KeyboardInterrupt: 

In [42]:
test['s1']

array([ list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       list(['<s>', 'This', 'church', 'choir', 'sings', 'to', 'the', 'masses', 'as', 'they', 'sing', 'joyous', 'songs', 'from', 'the', 'book', 'at', 'a', 'church', '.', '</s>']),
       ...,
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 'in', 'a', 'classroom', '.', '</s>']),
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 'in', 'a', 'classroom', '.', '</s>']),
       list(['<s>', 'A', 'man', 'in', 'a', 'black', 'leather', 'jacket', 'and', 'a', 'book', 'in', 'his', 'hand', 'speaks', 

In [None]:
#continue to generate multinli csvs