In [1]:
import os
import sys
import time
import argparse
import pickle
import numpy as np
import re
import inspect
import time 
import torch
from torchqrnn import QRNN
import torch
from torch import optim
from torch.autograd import Variable
import torch.nn as nn
from MakeData import MakeData

#from data import get_nli, get_batch, build_vocab
#from mutils import get_optimizer
#from models import NLINet

start_time = time.time()
W2V_PATH = "/home/dc/cs230_project/dataset/GloVe/glove.840B.300d.txt"


parser = argparse.ArgumentParser(description='Training')
# paths
parser.add_argument("--outputdir", type=str, default='savedir/', help="Output directory")
parser.add_argument("--outputmodelname", type=str, default='3layernonlinear_2features_adam_small.pickle')
parser.add_argument("--modeldir", type=str, default='rocandmodel/', help="roc and model directory")
parser.add_argument("--rocdir", type=str, default='rocandmodel/', help="roc and model directory")


# training
parser.add_argument("--n_epochs", type=int, default=25)
parser.add_argument("--batch_size", type=int, default=128)
#this only works if num_layers>1
parser.add_argument("--dpout_model", type=float, default=.2, help="encoder dropout")
#this is only for the dropout after batchnorm in nonlinear
parser.add_argument("--dpout_fc", type=float, default=0.2, help="classifier dropout")
parser.add_argument("--nonlinear_fc", type=float, default=1, help="use nonlinearity in fc")
parser.add_argument("--optimizer", type=str, default="adam", help="adam or sgd,lr=0.1")
parser.add_argument("--lrshrink", type=float, default=5, help="shrink factor for sgd")
parser.add_argument("--weight_decay", type=float, default=5e-4, help="weight decay for sgd")

parser.add_argument("--decay", type=float, default=0.99, help="lr decay")
parser.add_argument("--minlr", type=float, default=1e-5, help="minimum lr")
parser.add_argument("--max_norm", type=float, default=5., help="max norm (grad clipping)")

# model
parser.add_argument("--encoder_type", type=str, default='InferSent', help="see list of encoders")
parser.add_argument("--enc_lstm_dim", type=int, default=2048, help="encoder nhid dimension")
parser.add_argument("--n_enc_layers", type=int, default=1, help="encoder num layers")
parser.add_argument("--fc_dim", type=int, default=256, help="nhid of fc layers")
parser.add_argument("--n_classes", type=int, default=2, help="duplicate/not duplicate")
parser.add_argument("--pool_type", type=str, default='max', help="max or mean")
parser.add_argument("--word_emb_dim", type=int, default='300', help="embedding dim")
parser.add_argument("--LSTM_num_layers", type=int, default='2', help="LSTM num layers")
parser.add_argument("--data_dir", type=str, default='/home/dc/cs230_project/dataset', help="store duplicate questions")

# gpu
parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID")
parser.add_argument("--seed", type=int, default=4, help="seed")


params, _ = parser.parse_known_args()

# set gpu device
torch.cuda.set_device(params.gpu_id)

# print parameters passed, and all parameters
print('\ntogrep : {0}\n'.format(sys.argv[1:]))
print(params)

"""
SEED
"""
np.random.seed(params.seed)
torch.manual_seed(params.seed)
torch.cuda.manual_seed(params.seed)

make_data = MakeData()
train, valid, test,word_vec = make_data.quora(big=False,small=True,clean=False)

print(f"checkpoint after formatting: len(train[s1]):{len(train['s1'])} ,len(train[s2]):{len(train['s2'])} \
      ,len(train[label]):{len(train['label'])}, len(valid[s2]):{len(valid['s1'])} ,len(valid[s2]):{len(valid['s2'])}, \
      len(valid[label]):{len(valid['label'])},len(test[s2]):{len(test['s1'])}, len(test[s2]):{len(test['s2'])} \
      ,len(valid[label]):{len(valid['label'])},len(word_vec):{len(word_vec)}")


"""
MODEL
"""
# model config
config_nli_model = {
    'n_words'        :  300          ,
    'word_emb_dim'   :  params.word_emb_dim   ,
    'enc_lstm_dim'   :  params.enc_lstm_dim   ,
    'n_enc_layers'   :  params.n_enc_layers   ,
    'dpout_model'    :  params.dpout_model    ,
    'dpout_fc'       :  params.dpout_fc       ,
    'fc_dim'         :  params.fc_dim         ,
    'bsize'          :  params.batch_size     ,
    'n_classes'      :  params.n_classes      ,
    'pool_type'      :  params.pool_type      ,
    'nonlinear_fc'   :  params.nonlinear_fc   ,
    'encoder_type'   :  params.encoder_type   ,
    'use_cuda'       :  True                  ,

}


class InferSent(nn.Module):

    def __init__(self, config):
        super(InferSent, self).__init__()
        self.bsize = config['bsize']
        self.word_emb_dim = config['word_emb_dim']
        self.enc_lstm_dim = config['enc_lstm_dim']
        self.pool_type = config['pool_type']
        self.dpout_model = config['dpout_model']
        self.version = 1 if 'version' not in config else config['version']

        self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, params.LSTM_num_layers,
                                bidirectional=True, dropout=self.dpout_model)

        assert self.version in [1, 2]
        if self.version == 1:
            self.bos = '<s>'
            self.eos = '</s>'
            self.max_pad = True
            self.moses_tok = False
        elif self.version == 2:
            self.bos = '<p>'
            self.eos = '</p>'
            self.max_pad = False
            self.moses_tok = True

    def is_cuda(self):
        # either all weights are on cpu or they are on gpu
        return self.enc_lstm.bias_hh_l0.data.is_cuda

    def forward(self, sent_tuple):
        # sent_len: [max_len, ..., min_len] (bsize)
        # sent: Variable(seqlen x bsize x worddim)
        sent, sent_len = sent_tuple

        # Sort by length (keep idx) https://github.com/pytorch/pytorch/issues/3584
        sent_len_sorted, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len)
        idx_unsort = np.argsort(idx_sort)
        idx_sort = torch.from_numpy(idx_sort).cuda() if self.is_cuda() else torch.from_numpy(idx_sort)
        sent = sent.index_select(1, Variable(idx_sort))

        # Padding perf increase
        sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len_sorted)
        sent_output = self.enc_lstm(sent_packed)[0]  # seqlen x batch x 2*nhid
        sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0]

        # Un-sort by length
        idx_unsort = torch.from_numpy(idx_unsort).cuda() if self.is_cuda() \
            else torch.from_numpy(idx_unsort)
        sent_output = sent_output.index_select(1, Variable(idx_unsort))

        # Pooling
        if self.pool_type == "mean":
            sent_len = Variable(torch.FloatTensor(sent_len.copy())).unsqueeze(1).cuda()
            emb = torch.sum(sent_output, 0).squeeze(0)
            emb = emb / sent_len.expand_as(emb)
        elif self.pool_type == "max":
            if not self.max_pad:
                sent_output[sent_output == 0] = -1e9
            emb = torch.max(sent_output, 0)[0]
            if emb.ndimension() == 3:
                emb = emb.squeeze(0)
                assert emb.ndimension() == 2

        return emb

    def set_w2v_path(self, w2v_path):
        self.w2v_path = w2v_path

    def get_word_dict(self, sentences, tokenize=True):
        # create vocab of words
        word_dict = {}
        sentences = [s.split() if not tokenize else self.tokenize(s) for s in sentences]
        for sent in sentences:
            for word in sent:
                if word not in word_dict:
                    word_dict[word] = ''
        word_dict[self.bos] = ''
        word_dict[self.eos] = ''
        return word_dict

    def get_w2v(self, word_dict):
        assert hasattr(self, 'w2v_path'), 'w2v path not set'
        # create word_vec with w2v vectors
        word_vec = {}
        with open(self.w2v_path) as f:
            for line in f:
                word, vec = line.split(' ', 1)
                if word in word_dict:
                    word_vec[word] = np.fromstring(vec, sep=' ')
        print('Found %s(/%s) words with w2v vectors' % (len(word_vec), len(word_dict)))
        return word_vec

    def get_w2v_k(self, K):
        assert hasattr(self, 'w2v_path'), 'w2v path not set'
        # create word_vec with k first w2v vectors
        k = 0
        word_vec = {}
        with open(self.w2v_path) as f:
            for line in f:
                word, vec = line.split(' ', 1)
                if k <= K:
                    word_vec[word] = np.fromstring(vec, sep=' ')
                    k += 1
                if k > K:
                    if word in [self.bos, self.eos]:
                        word_vec[word] = np.fromstring(vec, sep=' ')

                if k > K and all([w in word_vec for w in [self.bos, self.eos]]):
                    break
        return word_vec

    def build_vocab(self, sentences, tokenize=True):
        assert hasattr(self, 'w2v_path'), 'w2v path not set'
        word_dict = self.get_word_dict(sentences, tokenize)
        self.word_vec = self.get_w2v(word_dict)
        print('Vocab size : %s' % (len(self.word_vec)))

    # build w2v vocab with k most frequent words
    def build_vocab_k_words(self, K):
        assert hasattr(self, 'w2v_path'), 'w2v path not set'
        self.word_vec = self.get_w2v_k(K)
        print('Vocab size : %s' % (K))

    def update_vocab(self, sentences, tokenize=True):
        assert hasattr(self, 'w2v_path'), 'warning : w2v path not set'
        assert hasattr(self, 'word_vec'), 'build_vocab before updating it'
        word_dict = self.get_word_dict(sentences, tokenize)

        # keep only new words
        for word in self.word_vec:
            if word in word_dict:
                del word_dict[word]

        # udpate vocabulary
        if word_dict:
            new_word_vec = self.get_w2v(word_dict)
            self.word_vec.update(new_word_vec)
        else:
            new_word_vec = []
        print('New vocab size : %s (added %s words)'% (len(self.word_vec), len(new_word_vec)))

    def get_batch(self, batch):
        # sent in batch in decreasing order of lengths
        # batch: (bsize, max_len, word_dim)
        embed = np.zeros((len(batch[0]), len(batch), self.word_emb_dim))

        for i in range(len(batch)):
            for j in range(len(batch[i])):
                embed[j, i, :] = self.word_vec[batch[i][j]]

        return torch.FloatTensor(embed)

    def tokenize(self, s):
        from nltk.tokenize import word_tokenize
        if self.moses_tok:
            s = ' '.join(word_tokenize(s))
            s = s.replace(" n't ", "n 't ")  # HACK to get ~MOSES tokenization
            return s.split()
        else:
            return word_tokenize(s)

    def prepare_samples(self, sentences, bsize, tokenize, verbose):
        sentences = [[self.bos] + s.split() + [self.eos] if not tokenize else
                     [self.bos] + self.tokenize(s) + [self.eos] for s in sentences]
        n_w = np.sum([len(x) for x in sentences])

        # filters words without w2v vectors
        for i in range(len(sentences)):
            s_f = [word for word in sentences[i] if word in self.word_vec]
            if not s_f:
                import warnings
                warnings.warn('No words in "%s" (idx=%s) have w2v vectors. \
                               Replacing by "</s>"..' % (sentences[i], i))
                s_f = [self.eos]
            sentences[i] = s_f

        lengths = np.array([len(s) for s in sentences])
        n_wk = np.sum(lengths)
        if verbose:
            print('Nb words kept : %s/%s (%.1f%s)' % (
                        n_wk, n_w, 100.0 * n_wk / n_w, '%'))

        # sort by decreasing length
        lengths, idx_sort = np.sort(lengths)[::-1], np.argsort(-lengths)
        sentences = np.array(sentences)[idx_sort]

        return sentences, lengths, idx_sort

    def encode(self, sentences, bsize=64, tokenize=True, verbose=False):
        tic = time.time()
        sentences, lengths, idx_sort = self.prepare_samples(
                        sentences, bsize, tokenize, verbose)

        embeddings = []
        for stidx in range(0, len(sentences), bsize):
            batch = Variable(self.get_batch(
                        sentences[stidx:stidx + bsize]), volatile=True)
            if self.is_cuda():
                batch = batch.cuda()
            batch = self.forward(
                (batch, lengths[stidx:stidx + bsize])).data.cpu().numpy()
            embeddings.append(batch)
        embeddings = np.vstack(embeddings)

        # unsort
        idx_unsort = np.argsort(idx_sort)
        embeddings = embeddings[idx_unsort]

        if verbose:
            print('Speed : %.1f sentences/s (%s mode, bsize=%s)' % (
                    len(embeddings)/(time.time()-tic),
                    'gpu' if self.is_cuda() else 'cpu', bsize))
        return embeddings

    def visualize(self, sent, tokenize=True):

        sent = sent.split() if not tokenize else self.tokenize(sent)
        sent = [[self.bos] + [word for word in sent if word in self.word_vec] + [self.eos]]

        if ' '.join(sent[0]) == '%s %s' % (self.bos, self.eos):
            import warnings
            warnings.warn('No words in "%s" have w2v vectors. Replacing \
                           by "%s %s"..' % (sent, self.bos, self.eos))
        batch = Variable(self.get_batch(sent), volatile=True)

        if self.is_cuda():
            batch = batch.cuda()
        output = self.enc_lstm(batch)[0]
        output, idxs = torch.max(output, 0)
        # output, idxs = output.squeeze(), idxs.squeeze()
        idxs = idxs.data.cpu().numpy()
        argmaxs = [np.sum((idxs == k)) for k in range(len(sent[0]))]

        # visualize model
        import matplotlib.pyplot as plt
        x = range(len(sent[0]))
        y = [100.0 * n / np.sum(argmaxs) for n in argmaxs]
        plt.xticks(x, sent[0], rotation=45)
        plt.bar(x, y)
        plt.ylabel('%')
        plt.title('Visualisation of words importance')
        plt.show()

        return output, idxs


class NLINet(nn.Module):
    def __init__(self, config):
        super(NLINet, self).__init__()

        # classifier
        self.nonlinear_fc = config['nonlinear_fc']
        self.fc_dim = config['fc_dim']
        self.n_classes = config['n_classes']
        self.enc_lstm_dim = config['enc_lstm_dim']
        self.encoder_type = config['encoder_type']
        self.dpout_fc = config['dpout_fc']

        self.encoder = eval(self.encoder_type)(config)
        self.inputdim = 2*2*self.enc_lstm_dim
        self.inputdim = 4*self.inputdim if self.encoder_type in \
                        ["ConvNetEncoder", "InnerAttentionMILAEncoder"] else self.inputdim
        self.inputdim = ((int)(self.inputdim/2)) if self.encoder_type == "LSTMEncoder" \
                                        else self.inputdim
        if self.nonlinear_fc:
            self.classifier = nn.Sequential(
                nn.Linear(self.inputdim, self.fc_dim),
                nn.BatchNorm1d(self.fc_dim),
                nn.ReLU(),
                nn.Dropout(p=self.dpout_fc),
                
                nn.Linear(self.fc_dim, self.fc_dim),
                nn.BatchNorm1d(self.fc_dim),
                nn.ReLU(),
                nn.Dropout(p=self.dpout_fc),
                
                nn.Linear(self.fc_dim, self.fc_dim),
                nn.BatchNorm1d(self.fc_dim),
                nn.ReLU(),
                nn.Dropout(p=self.dpout_fc),
                
                nn.Linear(self.fc_dim, self.n_classes),
                
                )
        else:
            print(f"self.inputdim:{self.inputdim}, self.fc_dim:{self.fc_dim}")
            print(type(self.inputdim),type(self.fc_dim))
            self.classifier = nn.Sequential(
                nn.Linear(self.inputdim, self.fc_dim),
                nn.Linear(self.fc_dim, self.fc_dim),
                nn.Linear(self.fc_dim, self.n_classes)
                )

    def forward(self, s1, s2):
        # s1 : (s1, s1_len)
        u = self.encoder(s1)
        v = self.encoder(s2)

        features = torch.cat((u, v), 1)
        output = self.classifier(features)
        return output

    def encode(self, s1):
        emb = self.encoder(s1)
        return emb


def get_optimizer(s):
    """
    Parse optimizer parameters.
    Input should be of the form:
        - "sgd,lr=0.01"
        - "adagrad,lr=0.1,lr_decay=0.05"
    """
    if "," in s:
        method = s[:s.find(',')]
        optim_params = {}
        for x in s[s.find(',') + 1:].split(','):
            split = x.split('=')
            assert len(split) == 2
            assert re.match("^[+-]?(\d+(\.\d*)?|\.\d+)$", split[1]) is not None
            optim_params[split[0]] = float(split[1])
    else:
        method = s
        optim_params = {}

    if method == 'adadelta':
        optim_fn = optim.Adadelta
    elif method == 'adagrad':
        optim_fn = optim.Adagrad
    elif method == 'adam':
        optim_fn = optim.Adam
    elif method == 'adamax':
        optim_fn = optim.Adamax
    elif method == 'asgd':
        optim_fn = optim.ASGD
    elif method == 'rmsprop':
        optim_fn = optim.RMSprop
    elif method == 'rprop':
        optim_fn = optim.Rprop
    elif method == 'sgd':
        optim_fn = optim.SGD
        assert 'lr' in optim_params
    else:
        raise Exception('Unknown optimization method: "%s"' % method)

    # check that we give good parameters to the optimizer
    expected_args = inspect.getargspec(optim_fn.__init__)[0]
    assert expected_args[:2] == ['self', 'params']
    if not all(k in expected_args[2:] for k in optim_params.keys()):
        raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
            str(expected_args[2:]), str(optim_params.keys())))

    return optim_fn, optim_params




# model
encoder_types = ['InferSent', 'BLSTMprojEncoder', 'BGRUlastEncoder',
                 'InnerAttentionMILAEncoder', 'InnerAttentionYANGEncoder',
                 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder']
assert params.encoder_type in encoder_types, "encoder_type must be in " + \
                                             str(encoder_types)
nli_net = NLINet(config_nli_model)
print(nli_net)


# loss
weight = torch.FloatTensor(params.n_classes).fill_(1)
#BCE next w2 categories
loss_fn = nn.CrossEntropyLoss(weight=weight)
loss_fn.size_average = False

# optimizer
optim_fn, optim_params = get_optimizer(params.optimizer)
optimizer = optim_fn(nli_net.parameters(), **optim_params)

# cuda by default
nli_net.cuda()
loss_fn.cuda()




"""
TRAIN
"""
val_acc_best = 10
adam_stop = False
stop_training = False
lr = optim_params['lr'] if 'sgd' in params.optimizer else None


def get_batch(batch, word_vec):
    # sent in batch in decreasing order of lengths (bsize, max_len, word_dim)
    lengths = np.array([len(x) for x in batch])
    max_len = np.max(lengths)
    embed = np.zeros((max_len, len(batch), 300))

    for i in range(len(batch)):
        for j in range(len(batch[i])):
            embed[j, i, :] = word_vec[batch[i][j]]

    return torch.from_numpy(embed).float(), lengths



def trainepoch(epoch):
    print('\nTRAINING : Epoch ' + str(epoch))
    nli_net.train()
    all_costs = []
    logs = []
    words_count = 0

    last_time = time.time()
    correct = 0.
    # shuffle the data
    permutation = np.random.permutation(len(train['s1']))
    #print(f"type(permutation):{type(permutation)}")
    #print(f"type(train['s1']):{type(train['s1'])}")
    
    s1 = train['s1'][permutation]
    s2 = train['s2'][permutation]
    target = train['label'][permutation]
    

    optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\
        and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr']
    print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))

    for stidx in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size],
                                     word_vec)
        s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size],
                                     word_vec)
        #print(type(s1_batch),type(s2_batch)) #should be list
        #print(f"s1_len:{s1_len},s2_len:{s2_len}")
        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
        target_batch=target[stidx:stidx + params.batch_size]
        #print(f"target_batch.shape:{target_batch.shape}")
        #print(f"target_batch:{target_batch}")
        #print(f"target shape:{target.shape}")
        #print(f"target:{target[stidx:stidx + params.batch_size]}")
        tgt_batch = Variable(torch.LongTensor(target[stidx:stidx + params.batch_size])).cuda()
        k = s1_batch.size(1)  # actual batch size
        #print(f"tgt_batch:{tgt_batch}")
        #print(f"k:{k}")
        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        #print(f"type(tgt_batch):{type(tgt_batch)}")
        #print(f"type(output):{type(output)}")
        #print(f"output size:{output.size()}")
        
        #print(f"output:{output}")
        #
        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
        assert len(pred) == len(s1[stidx:stidx + params.batch_size])

        # loss
        loss = loss_fn(output, tgt_batch)
        all_costs.append(loss.item())
        words_count += (s1_batch.nelement() + s2_batch.nelement()) / params.word_emb_dim

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient clipping (off by default)
        shrink_factor = 1
        total_norm = 0

        for p in nli_net.parameters():
            if p.requires_grad:
                p.grad.data.div_(k)  # divide by the actual batch size
                total_norm += p.grad.data.norm() ** 2
        total_norm = np.sqrt(total_norm)

        if total_norm > params.max_norm:
            shrink_factor = params.max_norm / total_norm
        current_lr = optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam)
        optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update

        # optimizer step
        optimizer.step()
        optimizer.param_groups[0]['lr'] = current_lr
        
        if len(all_costs) == 100:
            print(type(correct),correct,correct.item())
            #logs.append('{0} ; loss {1} accuracy:{2} ;'.format(stidx,round(np.mean(all_costs), 2),round(100.*correct.item()/(stidx+k), 2)))
            logs.append('{0} ; loss {1} ; sentence/s {2} ; words/s {3} ; accuracy train : {4}'.format(
                            stidx, round(np.mean(all_costs), 2),
                            int(len(all_costs) * params.batch_size / (time.time() - last_time)),
                            int(words_count * 1.0 / (time.time() - last_time)), 
                            round(100.*correct.item()/(stidx+k), 2)))
            print(logs[-1])
            last_time = time.time()
            words_count = 0
            all_costs = []
    train_acc = round(100 * correct.item()/len(s1), 2)
    print('results : epoch {0} ; mean accuracy train : {1}'
          .format(epoch, train_acc))
    return train_acc



def save_list(my_list,filename):
    with open(filename, 'wb') as f:
        pickle.dump(my_list,f)
        f.close()
    print("list saved to file!")

def read_list(filename):
    with open(filename,'rb') as f:
        my_list = pickle.load(f)
    return my_list

def evaluate(epoch, eval_type='valid', final_eval=False):
    nli_net.eval()
    correct = 0.
    global val_acc_best, lr, stop_training, adam_stop

    if eval_type == 'valid':
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    s1 = valid['s1'] if eval_type == 'valid' else test['s1']
    s2 = valid['s2'] if eval_type == 'valid' else test['s2']
    target = valid['label'] if eval_type == 'valid' else test['label']
    
    predictions=[]
    targets=[]
    
    for i in range(0, len(s1), params.batch_size):
        # prepare batch
        s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
        s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
        s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
        tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()
        
        # model forward
        output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
        
        pred = output.data.max(1)[1]
        predictions.append(pred.cpu().data.numpy().tolist())
        targets.append(tgt_batch.cpu().data.numpy().tolist())
        correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
        
        
        
    # save model
    if eval_type == 'valid':
        save_list(predictions,"valid_"+params.outputmodelname+"_predict.pkl")
        save_list(targets,"valid_"+params.outputmodelname+"_targets.pkl")
    else:
        save_list(predictions,"test_"+params.outputmodelname+"_predict.pkl")
        save_list(targets,"test_"+params.outputmodelname+"_targets.pkl")
    
    
    
    
    
    eval_acc = round(100 * correct.item() / len(s1), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
    else:
        print('togrep : results : epoch {0} ; mean accuracy {1} :\
              {2}'.format(epoch, eval_type, eval_acc))

    if eval_type == 'valid' and epoch <= params.n_epochs:
        if eval_acc > val_acc_best:
            print('saving model at epoch {0}'.format(epoch))
            if not os.path.exists(params.outputdir):
                os.makedirs(params.outputdir)
            torch.save(nli_net.state_dict(), os.path.join(params.outputdir,
                       params.outputmodelname))
            val_acc_best = eval_acc
        else:
            if 'sgd' in params.optimizer:
                optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
                print('Shrinking lr by : {0}. New lr = {1}'
                      .format(params.lrshrink,
                              optimizer.param_groups[0]['lr']))
                if optimizer.param_groups[0]['lr'] < params.minlr:
                    stop_training = True
            if 'adam' in params.optimizer:
                # early stopping (at 2nd decrease in accuracy)
                stop_training = adam_stop
                adam_stop = True
    return eval_acc


"""
Train model on Natural Language Inference task
"""
epoch = 1
print(f"total num epochs:{params.n_epochs}")

while not stop_training and epoch <= params.n_epochs:
    train_acc = trainepoch(epoch)
    eval_acc = evaluate(epoch, 'valid')
    epoch += 1
    
#nli_net.save_state_dict(os.path.join(params.outputdir, params.outputmodelname))
# Run best model on test set.
#nli_net.load_state_dict(os.path.join(params.outputdir, params.outputmodelname))
print("saving state dict")
torch.save(nli_net.state_dict,os.path.join(params.outputdir, params.outputmodelname + "_statedict.pt"))
print("done saving state dict")


print('\nTEST : Epoch {0}'.format(epoch))
print('calculating validation error')
evaluate(1e6, 'valid', True)
print('calculating test error')
evaluate(0, 'test', True)

# Save full model
torch.save(nli_net, os.path.join(params.outputdir, params.outputmodelname + '_fullmodel.pt'))
#save encoder
torch.save(nli_net.encoder.state_dict(), os.path.join(params.outputdir, params.outputmodelname + '.encoder.pt'))

elapsed_time = time.time() - start_time

print("fin",elapsed_time)


Namespace(data_dir='/home/dc/cs230_project/dataset')

togrep : ['-f', '/run/user/1000/jupyter/kernel-84c6d9f5-f2b2-4c60-97a8-bf9b044efa8d.json']

Namespace(LSTM_num_layers=2, batch_size=128, data_dir='/home/dc/cs230_project/dataset', decay=0.99, dpout_fc=0.2, dpout_model=0.2, enc_lstm_dim=2048, encoder_type='InferSent', fc_dim=256, gpu_id=0, lrshrink=5, max_norm=5.0, minlr=1e-05, n_classes=2, n_enc_layers=1, n_epochs=25, nonlinear_fc=1, optimizer='adam', outputdir='savedir/', outputmodelname='3layernonlinear_2features_adam_small.pickle', pool_type='max', seed=4, weight_decay=0.0005, word_emb_dim=300)
loading small
quora checkpoint len(train[s1]):60623,len(train[s2]):60623,          len(train[label]):60623
len(valid['s1']):20208, len(valid[s2]):20208,           len(valid['label']):20208
len(test['s1']):20208,len(test['s2']):20208,           len(test['label']):20208
Found 47877(/106290) words with glove vectors
Vocab size : 47877
checkpoint after formatting: len(train[s1]):60623 ,len(tra



total num epochs:25

TRAINING : Epoch 1
Learning rate : 0.001
<class 'torch.Tensor'> tensor(8290) 8290
12672 ; loss 0.62 ; sentence/s 177 ; words/s 13200 ; accuracy train : 64.77
<class 'torch.Tensor'> tensor(17106) 17106
25472 ; loss 0.59 ; sentence/s 173 ; words/s 13353 ; accuracy train : 66.82
<class 'torch.Tensor'> tensor(25990) 25990
38272 ; loss 0.58 ; sentence/s 176 ; words/s 13156 ; accuracy train : 67.68
<class 'torch.Tensor'> tensor(34914) 34914
51072 ; loss 0.58 ; sentence/s 176 ; words/s 13107 ; accuracy train : 68.19
results : epoch 1 ; mean accuracy train : 68.56

VALIDATION : Epoch 1
list saved to file!
list saved to file!
togrep : results : epoch 1 ; mean accuracy valid :              70.95
saving model at epoch 1

TRAINING : Epoch 2
Learning rate : 0.001
<class 'torch.Tensor'> tensor(9147) 9147
12672 ; loss 0.55 ; sentence/s 176 ; words/s 12968 ; accuracy train : 71.46
<class 'torch.Tensor'> tensor(18243) 18243
25472 ; loss 0.56 ; sentence/s 177 ; words/s 13065 ; accur

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


done saving state dict

TEST : Epoch 5
calculating validation error

VALIDATION : Epoch 1000000.0
list saved to file!
list saved to file!
finalgrep : accuracy valid : 73.29
calculating test error
list saved to file!
list saved to file!
finalgrep : accuracy test : 73.16
fin 1604.5672342777252


In [None]:
Namespace(data_dir='/home/dc/cs230_project/dataset')

togrep : ['-f', '/run/user/1000/jupyter/kernel-84c6d9f5-f2b2-4c60-97a8-bf9b044efa8d.json']

Namespace(LSTM_num_layers=1, batch_size=128, data_dir='/home/dc/cs230_project/dataset', decay=0.99, dpout_fc=0.0, dpout_model=0.0, enc_lstm_dim=2048, encoder_type='InferSent', fc_dim=512, gpu_id=0, lrshrink=5, max_norm=5.0, minlr=1e-05, n_classes=2, n_enc_layers=1, n_epochs=25, nonlinear_fc=1, optimizer='sgd,lr=0.1', outputdir='savedir/', outputmodelname='3layernonlinear_small.pickle', pool_type='max', seed=4, weight_decay=0.0005, word_emb_dim=300)
loading small
quora checkpoint len(train[s1]):60623,len(train[s2]):60623,          len(train[label]):60623
============
len(valid['s1']):20208, len(valid[s2]):20208,           len(valid['label']):20208
============
len(test['s1']):20208,len(test['s2']):20208,           len(test['label']):20208
Found 47877(/106290) words with glove vectors
Vocab size : 47877
checkpoint after formatting: len(train[s1]):60623 ,len(train[s2]):60623       ,len(train[label]):60623, len(valid[s2]):20208 ,len(valid[s2]):20208,       len(valid[label]):20208,len(test[s2]):20208, len(test[s2]):20208       ,len(valid[label]):20208,len(word_vec):47877
NLINet(
  (encoder): InferSent(
    (enc_lstm): LSTM(300, 2048, bidirectional=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=8192, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.0)
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.0)
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.0)
    (12): Linear(in_features=512, out_features=2, bias=True)
  )
)
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/ipykernel_launcher.py:451: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() or inspect.getfullargspec()
total num epochs:25

TRAINING : Epoch 1
Learning rate : 0.1
<class 'torch.Tensor'> tensor(7675) 7675
12672 ; loss 0.66 ; sentence/s 489 ; words/s 36377 ; accuracy train : 59.96
<class 'torch.Tensor'> tensor(16018) 16018
25472 ; loss 0.63 ; sentence/s 478 ; words/s 36814 ; accuracy train : 62.57
<class 'torch.Tensor'> tensor(24485) 24485
38272 ; loss 0.61 ; sentence/s 476 ; words/s 35583 ; accuracy train : 63.76
<class 'torch.Tensor'> tensor(33070) 33070
51072 ; loss 0.6 ; sentence/s 482 ; words/s 35757 ; accuracy train : 64.59
results : epoch 1 ; mean accuracy train : 65.17

VALIDATION : Epoch 1
list saved to file!
list saved to file!
togrep : results : epoch 1 ; mean accuracy valid :              69.05
saving model at epoch 1

TRAINING : Epoch 2
Learning rate : 0.099
<class 'torch.Tensor'> tensor(8939) 8939
12672 ; loss 0.58 ; sentence/s 481 ; words/s 35322 ; accuracy train : 69.84
<class 'torch.Tensor'> tensor(17858) 17858
25472 ; loss 0.58 ; sentence/s 484 ; words/s 35645 ; accuracy train : 69.76
<class 'torch.Tensor'> tensor(26873) 26873
38272 ; loss 0.57 ; sentence/s 482 ; words/s 35780 ; accuracy train : 69.98
<class 'torch.Tensor'> tensor(36008) 36008
51072 ; loss 0.56 ; sentence/s 484 ; words/s 35955 ; accuracy train : 70.33
results : epoch 2 ; mean accuracy train : 70.46

VALIDATION : Epoch 2
list saved to file!
list saved to file!
togrep : results : epoch 2 ; mean accuracy valid :              70.65
saving model at epoch 2

TRAINING : Epoch 3
Learning rate : 0.09801
<class 'torch.Tensor'> tensor(9261) 9261
12672 ; loss 0.55 ; sentence/s 480 ; words/s 35803 ; accuracy train : 72.35
<class 'torch.Tensor'> tensor(18541) 18541
25472 ; loss 0.55 ; sentence/s 497 ; words/s 34506 ; accuracy train : 72.43
<class 'torch.Tensor'> tensor(27806) 27806
38272 ; loss 0.54 ; sentence/s 487 ; words/s 35496 ; accuracy train : 72.41
<class 'torch.Tensor'> tensor(37223) 37223
51072 ; loss 0.54 ; sentence/s 471 ; words/s 36489 ; accuracy train : 72.7
results : epoch 3 ; mean accuracy train : 72.78

VALIDATION : Epoch 3
list saved to file!
list saved to file!
togrep : results : epoch 3 ; mean accuracy valid :              72.23
saving model at epoch 3

TRAINING : Epoch 4
Learning rate : 0.0970299
<class 'torch.Tensor'> tensor(9467) 9467
12672 ; loss 0.52 ; sentence/s 473 ; words/s 36619 ; accuracy train : 73.96
<class 'torch.Tensor'> tensor(19088) 19088
25472 ; loss 0.51 ; sentence/s 475 ; words/s 35497 ; accuracy train : 74.56
<class 'torch.Tensor'> tensor(28571) 28571
38272 ; loss 0.52 ; sentence/s 484 ; words/s 35587 ; accuracy train : 74.4
<class 'torch.Tensor'> tensor(38079) 38079
51072 ; loss 0.51 ; sentence/s 488 ; words/s 34877 ; accuracy train : 74.37
results : epoch 4 ; mean accuracy train : 74.39

VALIDATION : Epoch 4
list saved to file!
list saved to file!
togrep : results : epoch 4 ; mean accuracy valid :              72.59
saving model at epoch 4

TRAINING : Epoch 5
Learning rate : 0.096059601
<class 'torch.Tensor'> tensor(9709) 9709
12672 ; loss 0.5 ; sentence/s 484 ; words/s 35618 ; accuracy train : 75.85
<class 'torch.Tensor'> tensor(19515) 19515
25472 ; loss 0.49 ; sentence/s 481 ; words/s 35667 ; accuracy train : 76.23
<class 'torch.Tensor'> tensor(29224) 29224
38272 ; loss 0.5 ; sentence/s 482 ; words/s 35829 ; accuracy train : 76.1
<class 'torch.Tensor'> tensor(39029) 39029
51072 ; loss 0.49 ; sentence/s 482 ; words/s 35157 ; accuracy train : 76.23
results : epoch 5 ; mean accuracy train : 76.28

VALIDATION : Epoch 5
list saved to file!
list saved to file!
togrep : results : epoch 5 ; mean accuracy valid :              73.88
saving model at epoch 5

TRAINING : Epoch 6
Learning rate : 0.09509900499
<class 'torch.Tensor'> tensor(9864) 9864
12672 ; loss 0.48 ; sentence/s 479 ; words/s 35937 ; accuracy train : 77.06
<class 'torch.Tensor'> tensor(19741) 19741
25472 ; loss 0.48 ; sentence/s 481 ; words/s 35389 ; accuracy train : 77.11
<class 'torch.Tensor'> tensor(29756) 29756
38272 ; loss 0.46 ; sentence/s 481 ; words/s 35559 ; accuracy train : 77.49
<class 'torch.Tensor'> tensor(39700) 39700
51072 ; loss 0.47 ; sentence/s 481 ; words/s 35704 ; accuracy train : 77.54
results : epoch 6 ; mean accuracy train : 77.58

VALIDATION : Epoch 6
list saved to file!
list saved to file!
togrep : results : epoch 6 ; mean accuracy valid :              74.18
saving model at epoch 6

TRAINING : Epoch 7
Learning rate : 0.0941480149401
<class 'torch.Tensor'> tensor(10180) 10180
12672 ; loss 0.44 ; sentence/s 480 ; words/s 36437 ; accuracy train : 79.53
<class 'torch.Tensor'> tensor(20326) 20326
25472 ; loss 0.45 ; sentence/s 477 ; words/s 35162 ; accuracy train : 79.4
<class 'torch.Tensor'> tensor(30446) 30446
38272 ; loss 0.45 ; sentence/s 484 ; words/s 35598 ; accuracy train : 79.29
<class 'torch.Tensor'> tensor(40536) 40536
51072 ; loss 0.45 ; sentence/s 485 ; words/s 35429 ; accuracy train : 79.17
results : epoch 7 ; mean accuracy train : 79.25

VALIDATION : Epoch 7
list saved to file!
list saved to file!
togrep : results : epoch 7 ; mean accuracy valid :              74.98
saving model at epoch 7

TRAINING : Epoch 8
Learning rate : 0.093206534790699
<class 'torch.Tensor'> tensor(10340) 10340
12672 ; loss 0.42 ; sentence/s 486 ; words/s 35450 ; accuracy train : 80.78
<class 'torch.Tensor'> tensor(20750) 20750
25472 ; loss 0.42 ; sentence/s 475 ; words/s 35651 ; accuracy train : 81.05
<class 'torch.Tensor'> tensor(31102) 31102
38272 ; loss 0.42 ; sentence/s 489 ; words/s 35027 ; accuracy train : 80.99
<class 'torch.Tensor'> tensor(41372) 41372
51072 ; loss 0.43 ; sentence/s 483 ; words/s 35428 ; accuracy train : 80.8
results : epoch 8 ; mean accuracy train : 80.74

VALIDATION : Epoch 8
list saved to file!
list saved to file!
togrep : results : epoch 8 ; mean accuracy valid :              75.1
saving model at epoch 8

TRAINING : Epoch 9
Learning rate : 0.09227446944279201
<class 'torch.Tensor'> tensor(10552) 10552
12672 ; loss 0.4 ; sentence/s 483 ; words/s 35580 ; accuracy train : 82.44
<class 'torch.Tensor'> tensor(21099) 21099
25472 ; loss 0.4 ; sentence/s 474 ; words/s 35980 ; accuracy train : 82.42
<class 'torch.Tensor'> tensor(31640) 31640
38272 ; loss 0.4 ; sentence/s 482 ; words/s 35343 ; accuracy train : 82.4
<class 'torch.Tensor'> tensor(42162) 42162
51072 ; loss 0.4 ; sentence/s 489 ; words/s 35740 ; accuracy train : 82.35
results : epoch 9 ; mean accuracy train : 82.36

VALIDATION : Epoch 9
list saved to file!
list saved to file!
togrep : results : epoch 9 ; mean accuracy valid :              74.53
Shrinking lr by : 5. New lr = 0.0184548938885584

TRAINING : Epoch 10
Learning rate : 0.018270344949672817
<class 'torch.Tensor'> tensor(10821) 10821
12672 ; loss 0.37 ; sentence/s 479 ; words/s 35889 ; accuracy train : 84.54
<class 'torch.Tensor'> tensor(21626) 21626
25472 ; loss 0.37 ; sentence/s 488 ; words/s 35286 ; accuracy train : 84.48
<class 'torch.Tensor'> tensor(32452) 32452
38272 ; loss 0.37 ; sentence/s 473 ; words/s 35806 ; accuracy train : 84.51
<class 'torch.Tensor'> tensor(43231) 43231
51072 ; loss 0.37 ; sentence/s 485 ; words/s 35219 ; accuracy train : 84.44
results : epoch 10 ; mean accuracy train : 84.42

VALIDATION : Epoch 10
list saved to file!
list saved to file!
togrep : results : epoch 10 ; mean accuracy valid :              75.76
saving model at epoch 10

TRAINING : Epoch 11
Learning rate : 0.01808764150017609
<class 'torch.Tensor'> tensor(10892) 10892
12672 ; loss 0.36 ; sentence/s 475 ; words/s 36440 ; accuracy train : 85.09
<class 'torch.Tensor'> tensor(21746) 21746
25472 ; loss 0.36 ; sentence/s 487 ; words/s 35107 ; accuracy train : 84.95
<class 'torch.Tensor'> tensor(32658) 32658
38272 ; loss 0.36 ; sentence/s 486 ; words/s 35541 ; accuracy train : 85.05
<class 'torch.Tensor'> tensor(43529) 43529
51072 ; loss 0.36 ; sentence/s 481 ; words/s 35812 ; accuracy train : 85.02
results : epoch 11 ; mean accuracy train : 84.94

VALIDATION : Epoch 11
list saved to file!
list saved to file!
togrep : results : epoch 11 ; mean accuracy valid :              75.67
Shrinking lr by : 5. New lr = 0.003617528300035218

TRAINING : Epoch 12
Learning rate : 0.003581353017034866
<class 'torch.Tensor'> tensor(10935) 10935
12672 ; loss 0.35 ; sentence/s 480 ; words/s 35747 ; accuracy train : 85.43
<class 'torch.Tensor'> tensor(21850) 21850
25472 ; loss 0.36 ; sentence/s 486 ; words/s 35195 ; accuracy train : 85.35
<class 'torch.Tensor'> tensor(32808) 32808
38272 ; loss 0.36 ; sentence/s 482 ; words/s 35719 ; accuracy train : 85.44
<class 'torch.Tensor'> tensor(43794) 43794
51072 ; loss 0.35 ; sentence/s 474 ; words/s 36091 ; accuracy train : 85.54
results : epoch 12 ; mean accuracy train : 85.52

VALIDATION : Epoch 12
list saved to file!
list saved to file!
togrep : results : epoch 12 ; mean accuracy valid :              75.78
saving model at epoch 12

TRAINING : Epoch 13
Learning rate : 0.0035455394868645173
<class 'torch.Tensor'> tensor(10940) 10940
12672 ; loss 0.35 ; sentence/s 487 ; words/s 35684 ; accuracy train : 85.47
<class 'torch.Tensor'> tensor(21907) 21907
25472 ; loss 0.35 ; sentence/s 474 ; words/s 35940 ; accuracy train : 85.57
<class 'torch.Tensor'> tensor(32776) 32776
38272 ; loss 0.36 ; sentence/s 479 ; words/s 35793 ; accuracy train : 85.35
<class 'torch.Tensor'> tensor(43692) 43692
51072 ; loss 0.35 ; sentence/s 481 ; words/s 35401 ; accuracy train : 85.34
results : epoch 13 ; mean accuracy train : 85.44

VALIDATION : Epoch 13
list saved to file!
list saved to file!
togrep : results : epoch 13 ; mean accuracy valid :              75.76
Shrinking lr by : 5. New lr = 0.0007091078973729035

TRAINING : Epoch 14
Learning rate : 0.0007020168183991745
<class 'torch.Tensor'> tensor(10923) 10923
12672 ; loss 0.35 ; sentence/s 480 ; words/s 36041 ; accuracy train : 85.34
<class 'torch.Tensor'> tensor(21887) 21887
25472 ; loss 0.35 ; sentence/s 472 ; words/s 36071 ; accuracy train : 85.5
<class 'torch.Tensor'> tensor(32837) 32837
38272 ; loss 0.35 ; sentence/s 484 ; words/s 35422 ; accuracy train : 85.51
<class 'torch.Tensor'> tensor(43839) 43839
51072 ; loss 0.35 ; sentence/s 486 ; words/s 35501 ; accuracy train : 85.62
results : epoch 14 ; mean accuracy train : 85.61

VALIDATION : Epoch 14
list saved to file!
list saved to file!
togrep : results : epoch 14 ; mean accuracy valid :              75.74
Shrinking lr by : 5. New lr = 0.00014040336367983488

TRAINING : Epoch 15
Learning rate : 0.00013899933004303652
<class 'torch.Tensor'> tensor(11011) 11011
12672 ; loss 0.35 ; sentence/s 480 ; words/s 35403 ; accuracy train : 86.02
<class 'torch.Tensor'> tensor(21945) 21945
25472 ; loss 0.35 ; sentence/s 470 ; words/s 36422 ; accuracy train : 85.72
<class 'torch.Tensor'> tensor(32890) 32890
38272 ; loss 0.35 ; sentence/s 477 ; words/s 35892 ; accuracy train : 85.65
<class 'torch.Tensor'> tensor(43856) 43856
51072 ; loss 0.35 ; sentence/s 484 ; words/s 35567 ; accuracy train : 85.66
results : epoch 15 ; mean accuracy train : 85.59

VALIDATION : Epoch 15
list saved to file!
list saved to file!
togrep : results : epoch 15 ; mean accuracy valid :              75.84
saving model at epoch 15

TRAINING : Epoch 16
Learning rate : 0.00013760933674260615
<class 'torch.Tensor'> tensor(11014) 11014
12672 ; loss 0.35 ; sentence/s 482 ; words/s 35737 ; accuracy train : 86.05
<class 'torch.Tensor'> tensor(21987) 21987
25472 ; loss 0.35 ; sentence/s 478 ; words/s 36395 ; accuracy train : 85.89
<class 'torch.Tensor'> tensor(32961) 32961
38272 ; loss 0.35 ; sentence/s 476 ; words/s 35849 ; accuracy train : 85.84
<class 'torch.Tensor'> tensor(43896) 43896
51072 ; loss 0.36 ; sentence/s 485 ; words/s 35720 ; accuracy train : 85.73
results : epoch 16 ; mean accuracy train : 85.77

VALIDATION : Epoch 16
list saved to file!
list saved to file!
togrep : results : epoch 16 ; mean accuracy valid :              75.73
Shrinking lr by : 5. New lr = 2.7521867348521228e-05

TRAINING : Epoch 17
Learning rate : 2.7246648675036015e-05
<class 'torch.Tensor'> tensor(10983) 10983
12672 ; loss 0.35 ; sentence/s 479 ; words/s 35459 ; accuracy train : 85.8
<class 'torch.Tensor'> tensor(21887) 21887
25472 ; loss 0.35 ; sentence/s 478 ; words/s 35508 ; accuracy train : 85.5
<class 'torch.Tensor'> tensor(32873) 32873
38272 ; loss 0.35 ; sentence/s 485 ; words/s 35416 ; accuracy train : 85.61
<class 'torch.Tensor'> tensor(43913) 43913
51072 ; loss 0.35 ; sentence/s 477 ; words/s 35435 ; accuracy train : 85.77
results : epoch 17 ; mean accuracy train : 85.75

VALIDATION : Epoch 17
list saved to file!
list saved to file!
togrep : results : epoch 17 ; mean accuracy valid :              75.72
Shrinking lr by : 5. New lr = 5.449329735007203e-06
saving state dict
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/torch/serialization.py:241: UserWarning: Couldn't retrieve source code for container of type NLINet. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/torch/serialization.py:241: UserWarning: Couldn't retrieve source code for container of type InferSent. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
done saving state dict

TEST : Epoch 18
calculating validation error

VALIDATION : Epoch 1000000.0
list saved to file!
list saved to file!
finalgrep : accuracy valid : 75.72
calculating test error
list saved to file!
list saved to file!
finalgrep : accuracy test : 75.16
fin 2433.8374314308167

1
​


In [None]:
make learning rate bigger. improves training error faster but it also makes difference between train and validation error
bigger faster also. try clean dataset. should see less overfitting with clean dataset. stop when difference between
validation error and train error is 10. 

THIS IS WORSE!!!!

Stuck in local minima. VE doesnt move past 69.xx over epochs. CHANGE TO ADAM FROM SGD

Namespace(data_dir='/home/dc/cs230_project/dataset')

togrep : ['-f', '/run/user/1000/jupyter/kernel-84c6d9f5-f2b2-4c60-97a8-bf9b044efa8d.json']

Namespace(LSTM_num_layers=2, batch_size=128, data_dir='/home/dc/cs230_project/dataset', decay=0.99, dpout_fc=0.2, dpout_model=0.2, enc_lstm_dim=2048, encoder_type='InferSent', fc_dim=256, gpu_id=0, lrshrink=5, max_norm=5.0, minlr=1e-05, n_classes=2, n_enc_layers=1, n_epochs=25, nonlinear_fc=1, optimizer='sgd,lr=0.3', outputdir='savedir/', outputmodelname='3layernonlinear_small.pickle', pool_type='max', seed=4, weight_decay=0.0005, word_emb_dim=300)
loading small
quora checkpoint len(train[s1]):60623,len(train[s2]):60623,          len(train[label]):60623
============
len(valid['s1']):20208, len(valid[s2]):20208,           len(valid['label']):20208
============
len(test['s1']):20208,len(test['s2']):20208,           len(test['label']):20208
Found 47877(/106290) words with glove vectors
Vocab size : 47877
checkpoint after formatting: len(train[s1]):60623 ,len(train[s2]):60623       ,len(train[label]):60623, len(valid[s2]):20208 ,len(valid[s2]):20208,       len(valid[label]):20208,len(test[s2]):20208, len(test[s2]):20208       ,len(valid[label]):20208,len(word_vec):47877
NLINet(
  (encoder): InferSent(
    (enc_lstm): LSTM(300, 2048, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=8192, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.2)
    (4): Linear(in_features=256, out_features=256, bias=True)
    (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.2)
    (8): Linear(in_features=256, out_features=256, bias=True)
    (9): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.2)
    (12): Linear(in_features=256, out_features=2, bias=True)
  )
)
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/ipykernel_launcher.py:451: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() or inspect.getfullargspec()
total num epochs:25

TRAINING : Epoch 1
Learning rate : 0.3
<class 'torch.Tensor'> tensor(7724) 7724
12672 ; loss 0.66 ; sentence/s 182 ; words/s 13521 ; accuracy train : 60.34
<class 'torch.Tensor'> tensor(15976) 15976
25472 ; loss 0.63 ; sentence/s 178 ; words/s 13709 ; accuracy train : 62.41
<class 'torch.Tensor'> tensor(24324) 24324
38272 ; loss 0.62 ; sentence/s 180 ; words/s 13447 ; accuracy train : 63.34
<class 'torch.Tensor'> tensor(32860) 32860
51072 ; loss 0.61 ; sentence/s 181 ; words/s 13423 ; accuracy train : 64.18
results : epoch 1 ; mean accuracy train : 64.72

VALIDATION : Epoch 1
list saved to file!
list saved to file!
togrep : results : epoch 1 ; mean accuracy valid :              66.25
saving model at epoch 1

TRAINING : Epoch 2
Learning rate : 0.297
<class 'torch.Tensor'> tensor(8740) 8740
12672 ; loss 0.59 ; sentence/s 180 ; words/s 13240 ; accuracy train : 68.28
<class 'torch.Tensor'> tensor(17463) 17463
25472 ; loss 0.59 ; sentence/s 181 ; words/s 13371 ; accuracy train : 68.21
<class 'torch.Tensor'> tensor(26365) 26365
38272 ; loss 0.58 ; sentence/s 181 ; words/s 13430 ; accuracy train : 68.66
<class 'torch.Tensor'> tensor(35268) 35268
51072 ; loss 0.58 ; sentence/s 181 ; words/s 13474 ; accuracy train : 68.88
results : epoch 2 ; mean accuracy train : 69.0

VALIDATION : Epoch 2
list saved to file!
list saved to file!
togrep : results : epoch 2 ; mean accuracy valid :              66.92
saving model at epoch 2

TRAINING : Epoch 3
Learning rate : 0.29402999999999996
<class 'torch.Tensor'> tensor(9016) 9016
12672 ; loss 0.57 ; sentence/s 180 ; words/s 13451 ; accuracy train : 70.44
<class 'torch.Tensor'> tensor(17996) 17996
25472 ; loss 0.57 ; sentence/s 185 ; words/s 12877 ; accuracy train : 70.3
<class 'torch.Tensor'> tensor(27052) 27052
38272 ; loss 0.56 ; sentence/s 182 ; words/s 13273 ; accuracy train : 70.45
<class 'torch.Tensor'> tensor(36128) 36128
51072 ; loss 0.56 ; sentence/s 178 ; words/s 13782 ; accuracy train : 70.56
results : epoch 3 ; mean accuracy train : 70.6

VALIDATION : Epoch 3
list saved to file!
list saved to file!
togrep : results : epoch 3 ; mean accuracy valid :              66.43
Shrinking lr by : 5. New lr = 0.05880599999999999

TRAINING : Epoch 4
Learning rate : 0.05821793999999999
<class 'torch.Tensor'> tensor(9125) 9125
12672 ; loss 0.55 ; sentence/s 177 ; words/s 13757 ; accuracy train : 71.29
<class 'torch.Tensor'> tensor(18423) 18423
25472 ; loss 0.55 ; sentence/s 180 ; words/s 13430 ; accuracy train : 71.96
<class 'torch.Tensor'> tensor(27569) 27569
38272 ; loss 0.55 ; sentence/s 181 ; words/s 13338 ; accuracy train : 71.79
<class 'torch.Tensor'> tensor(36777) 36777
51072 ; loss 0.55 ; sentence/s 183 ; words/s 13084 ; accuracy train : 71.83
results : epoch 4 ; mean accuracy train : 71.75

VALIDATION : Epoch 4
list saved to file!
list saved to file!
togrep : results : epoch 4 ; mean accuracy valid :              70.16
saving model at epoch 4

TRAINING : Epoch 5
Learning rate : 0.05763576059999999
<class 'torch.Tensor'> tensor(9195) 9195
12672 ; loss 0.55 ; sentence/s 181 ; words/s 13350 ; accuracy train : 71.84
<class 'torch.Tensor'> tensor(18383) 18383
25472 ; loss 0.55 ; sentence/s 180 ; words/s 13397 ; accuracy train : 71.81
<class 'torch.Tensor'> tensor(27510) 27510
38272 ; loss 0.55 ; sentence/s 181 ; words/s 13490 ; accuracy train : 71.64
<class 'torch.Tensor'> tensor(36632) 36632
51072 ; loss 0.55 ; sentence/s 181 ; words/s 13213 ; accuracy train : 71.55
results : epoch 5 ; mean accuracy train : 71.65

VALIDATION : Epoch 5
list saved to file!
list saved to file!
togrep : results : epoch 5 ; mean accuracy valid :              69.7
Shrinking lr by : 5. New lr = 0.011527152119999998

TRAINING : Epoch 6
Learning rate : 0.011411880598799998
<class 'torch.Tensor'> tensor(9211) 9211
12672 ; loss 0.54 ; sentence/s 179 ; words/s 13467 ; accuracy train : 71.96
<class 'torch.Tensor'> tensor(18398) 18398
25472 ; loss 0.55 ; sentence/s 182 ; words/s 13372 ; accuracy train : 71.87
<class 'torch.Tensor'> tensor(27667) 27667
38272 ; loss 0.54 ; sentence/s 180 ; words/s 13345 ; accuracy train : 72.05
<class 'torch.Tensor'> tensor(36835) 36835
51072 ; loss 0.55 ; sentence/s 182 ; words/s 13495 ; accuracy train : 71.94
results : epoch 6 ; mean accuracy train : 71.93

VALIDATION : Epoch 6
list saved to file!
list saved to file!
togrep : results : epoch 6 ; mean accuracy valid :              69.74
Shrinking lr by : 5. New lr = 0.0022823761197599997

TRAINING : Epoch 7
Learning rate : 0.0022595523585623996
<class 'torch.Tensor'> tensor(9264) 9264
12672 ; loss 0.54 ; sentence/s 180 ; words/s 13662 ; accuracy train : 72.38
<class 'torch.Tensor'> tensor(18452) 18452
25472 ; loss 0.55 ; sentence/s 180 ; words/s 13283 ; accuracy train : 72.08
<class 'torch.Tensor'> tensor(27661) 27661
38272 ; loss 0.55 ; sentence/s 181 ; words/s 13376 ; accuracy train : 72.03
<class 'torch.Tensor'> tensor(36859) 36859
51072 ; loss 0.54 ; sentence/s 181 ; words/s 13275 ; accuracy train : 71.99
results : epoch 7 ; mean accuracy train : 72.13

VALIDATION : Epoch 7
list saved to file!
list saved to file!
togrep : results : epoch 7 ; mean accuracy valid :              69.45
Shrinking lr by : 5. New lr = 0.0004519104717124799

TRAINING : Epoch 8
Learning rate : 0.0004473913669953551
<class 'torch.Tensor'> tensor(9227) 9227
12672 ; loss 0.54 ; sentence/s 182 ; words/s 13296 ; accuracy train : 72.09
<class 'torch.Tensor'> tensor(18526) 18526
25472 ; loss 0.54 ; sentence/s 179 ; words/s 13446 ; accuracy train : 72.37
<class 'torch.Tensor'> tensor(27761) 27761
38272 ; loss 0.54 ; sentence/s 184 ; words/s 13172 ; accuracy train : 72.29
<class 'torch.Tensor'> tensor(36894) 36894
51072 ; loss 0.55 ; sentence/s 181 ; words/s 13316 ; accuracy train : 72.06
results : epoch 8 ; mean accuracy train : 72.18

VALIDATION : Epoch 8
list saved to file!
list saved to file!
togrep : results : epoch 8 ; mean accuracy valid :              69.71
Shrinking lr by : 5. New lr = 8.947827339907103e-05

TRAINING : Epoch 9
Learning rate : 8.858349066508032e-05
<class 'torch.Tensor'> tensor(9242) 9242
12672 ; loss 0.54 ; sentence/s 181 ; words/s 13385 ; accuracy train : 72.2
<class 'torch.Tensor'> tensor(18388) 18388
25472 ; loss 0.54 ; sentence/s 178 ; words/s 13567 ; accuracy train : 71.83
<class 'torch.Tensor'> tensor(27586) 27586
38272 ; loss 0.54 ; sentence/s 181 ; words/s 13301 ; accuracy train : 71.84
<class 'torch.Tensor'> tensor(36838) 36838
51072 ; loss 0.54 ; sentence/s 183 ; words/s 13382 ; accuracy train : 71.95
results : epoch 9 ; mean accuracy train : 72.03

VALIDATION : Epoch 9
list saved to file!
list saved to file!
togrep : results : epoch 9 ; mean accuracy valid :              69.83
Shrinking lr by : 5. New lr = 1.7716698133016062e-05

TRAINING : Epoch 10
Learning rate : 1.7539531151685903e-05
<class 'torch.Tensor'> tensor(9265) 9265
12672 ; loss 0.54 ; sentence/s 181 ; words/s 13567 ; accuracy train : 72.38
<class 'torch.Tensor'> tensor(18519) 18519
25472 ; loss 0.54 ; sentence/s 183 ; words/s 13264 ; accuracy train : 72.34
<class 'torch.Tensor'> tensor(27786) 27786
38272 ; loss 0.54 ; sentence/s 179 ; words/s 13558 ; accuracy train : 72.36
<class 'torch.Tensor'> tensor(36979) 36979
51072 ; loss 0.54 ; sentence/s 182 ; words/s 13262 ; accuracy train : 72.22
results : epoch 10 ; mean accuracy train : 72.27

VALIDATION : Epoch 10
list saved to file!
list saved to file!
togrep : results : epoch 10 ; mean accuracy valid :              69.34
Shrinking lr by : 5. New lr = 3.507906230337181e-06
saving state dict
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/torch/serialization.py:241: UserWarning: Couldn't retrieve source code for container of type NLINet. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/dc/anaconda3/envs/cs230/lib/python3.6/site-packages/torch/serialization.py:241: UserWarning: Couldn't retrieve source code for container of type InferSent. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
done saving state dict

TEST : Epoch 11
calculating validation error

VALIDATION : Epoch 1000000.0
list saved to file!
list saved to file!
finalgrep : accuracy valid : 69.34
calculating test error
list saved to file!
list saved to file!
finalgrep : accuracy test : 69.22
fin 3793.925459384918