In [None]:
import torch
from torch import nn
from torch.nn import functional as func
from torch.nn.utils import rnn
from torch.autograd import Variable
import numpy
import pickle
from sklearn.metrics import accuracy_score
from utils import Dataset, get_script_short_name
import time

hparams = {
    'learning_rate': 0.001,
    'max_epoch': 20,
    'display_step': 20,
    'emb_dim': 300,
    'conv_win': 3,
    'repr_dim': 300,
    'fc1_dim': 300,
    'n_classes': 2,
    'batch_size': 400
}


class LSTM(nn.Module):
    def __init__(self, emb_layer):
        super(LSTM, self).__init__()
        self.emb_layer = emb_layer
        self.lstm_layer = nn.LSTM(input_size=hparams['emb_dim'], hidden_size=int(hparams['repr_dim'] / 2),
                                  batch_first=True, bidirectional=True)

    def forward(self, x, lengths):
        embs = self.emb_layer(x)
        embs_sort, lengths_sort, unsort_idx = self.sort_batch(embs, lengths)
        seq = rnn.pack_padded_sequence(embs_sort, lengths_sort.cpu().numpy(), batch_first=True)
        hs, (hn, cn) = self.lstm_layer(seq)
        out = cn.permute(1, 2, 0).contiguous().view(-1, hparams['repr_dim'])
        return out[unsort_idx]

    @staticmethod
    def sort_batch(x, l):
        l = torch.from_numpy(numpy.asarray(l)).cuda()
        l_sorted, sidx = l.sort(0, descending=True)
        x_sorted = x[sidx]
        _, unsort_idx = sidx.sort()
        return x_sorted, l_sorted, unsort_idx


class Model(nn.Module):
    def __init__(self, emb_layer):
        super(Model, self).__init__()
        self.lstm = LSTM(emb_layer)
        self.fc1 = nn.Linear(hparams['repr_dim'] * 2, hparams['fc1_dim'])
        self.fc2 = nn.Linear(hparams['fc1_dim'], hparams['n_classes'])

    def forward(self, q1, q2, q1_len, q2_len):
        r1 = self.lstm(q1, q1_len)
        r2 = self.lstm(q2, q2_len)
        joint = torch.cat(((r1 - r2).abs(), r1 * r2), dim=1)
        joint = torch.tanh(self.fc1(joint))
        out = self.fc2(joint)
        return out


def run_batch(b_data, b_lengths, model, optimizer=None):
    q1 = Variable(torch.from_numpy(b_data['q1']).cuda())
    q2 = Variable(torch.from_numpy(b_data['q2']).cuda())
    outputs = model(q1, q2, b_lengths['q1'], b_lengths['q2'])
    if optimizer:
        y = Variable(torch.from_numpy(b_data['y']).cuda())
        optimizer.zero_grad()
        loss = func.cross_entropy(outputs, y)
        loss.backward()
        optimizer.step()
        return loss.data.item()
    else:
        _, predicted = outputs.data.max(1)
        prob = func.softmax(outputs, dim=1).data
        return predicted, prob[:, 1]


def run_epoch_eval(dataset, model, output_file=''):
    all_plabels, all_pscores = [], []
    batches, batch_lengths = dataset.get_batches(hparams['batch_size'], ('q1', 'q2', 'y'))
    for b_data, b_lengths in zip(batches, batch_lengths):
        plabels, pscores = run_batch(b_data,b_lengths, model)
        all_plabels.extend(plabels.cpu().numpy().tolist())
        all_pscores.extend(pscores.cpu().numpy().tolist())
    if output_file:
        with open(output_file, 'a') as f:
            for s in all_pscores:
                f.write(f'{s:.4f}\n')
    return accuracy_score(dataset.get_data_item('y'), all_plabels)


def run():
    print('Loading data...')
    with open('data/data_emb', 'rb') as f:
        all_sets, embs, word2idx = pickle.load(f)
    emb_layer = nn.Embedding(embs.shape[0], embs.shape[1])
    emb_layer.weight = nn.Parameter(torch.from_numpy(embs))
    model = Model(emb_layer).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'])
    train_set = Dataset(all_sets[0], shuffle=True, pad_keys=('q1', 'q2'))
    dev_set = Dataset(all_sets[1], shuffle=False, pad_keys=('q1', 'q2'))
    test_set = Dataset(all_sets[2], shuffle=False, pad_keys=('q1', 'q2'))
    step = 0
    sum_loss = 0
    dev_best = 0
    test_score = 0
    print("Starting training...")
    print(hparams)
    start_time = time.time()
    for epoch in range(hparams['max_epoch']):
        batches, batch_lengths = train_set.get_batches(hparams['batch_size'], ('q1', 'q2', 'y'))
        for b_data, b_lengths in zip(batches, batch_lengths):
            sum_loss += run_batch(b_data, b_lengths, model, optimizer)
            step += 1
            if step % hparams['display_step'] == 0:
                avg_loss = sum_loss / hparams['display_step']
                sum_loss = 0
                dev_score = run_epoch_eval(dev_set, model)
                out_str = f'Epoc {epoch} iter {step} took {time.time() - start_time:.1f}s\n' \
                          f'loss:\t{avg_loss:.5f}\tdev score:\t{dev_score:.4f}'
                if dev_score > dev_best:
                    dev_best = dev_score
                    output_file = f'pred/bilstm.pred'
                    test_score = run_epoch_eval(test_set, model, output_file)
                    out_str += f'\t*** New best dev ***\ttest score:\t{test_score:.4f}'
                print(out_str)
                start_time = time.time()
    print('Best model on dev: dev:{:.4f}\ttest:{:.4f}'.format(dev_best, test_score))


if __name__ == '__main__':
    run()


Loading data...
Starting training...
{'learning_rate': 0.001, 'max_epoch': 20, 'display_step': 20, 'emb_dim': 300, 'conv_win': 3, 'repr_dim': 300, 'fc1_dim': 300, 'n_classes': 2, 'batch_size': 400}
Epoc 0 iter 20 took 9.6s
loss:	0.60698	dev score:	0.7035	*** New best dev ***	test score:	0.6971
Epoc 0 iter 40 took 7.4s
loss:	0.54456	dev score:	0.7164	*** New best dev ***	test score:	0.7099
Epoc 0 iter 60 took 7.8s
loss:	0.53002	dev score:	0.7535	*** New best dev ***	test score:	0.7446
Epoc 0 iter 80 took 7.5s
loss:	0.49929	dev score:	0.7596	*** New best dev ***	test score:	0.7535
Epoc 0 iter 100 took 7.6s
loss:	0.47761	dev score:	0.7741	*** New best dev ***	test score:	0.7709
Epoc 0 iter 120 took 7.4s
loss:	0.45796	dev score:	0.7554
Epoc 0 iter 140 took 7.3s
loss:	0.45583	dev score:	0.7860	*** New best dev ***	test score:	0.7784
Epoc 0 iter 160 took 7.2s
loss:	0.44923	dev score:	0.7920	*** New best dev ***	test score:	0.7858
Epoc 0 iter 180 took 7.2s
loss:	0.43700	dev score:	0.7908
Epoc

Epoc 2 iter 2180 took 7.5s
loss:	0.18681	dev score:	0.8560
Epoc 2 iter 2200 took 7.6s
loss:	0.18184	dev score:	0.8605	*** New best dev ***	test score:	0.8579
Epoc 2 iter 2220 took 7.2s
loss:	0.18395	dev score:	0.8613	*** New best dev ***	test score:	0.8594
Epoc 2 iter 2240 took 7.3s
loss:	0.19042	dev score:	0.8598
Epoc 2 iter 2260 took 7.3s
loss:	0.18868	dev score:	0.8591
Epoc 2 iter 2280 took 7.8s
loss:	0.18416	dev score:	0.8553
Epoc 2 iter 2300 took 7.2s
loss:	0.19111	dev score:	0.8597
Epoc 2 iter 2320 took 7.2s
loss:	0.18322	dev score:	0.8592
Epoc 2 iter 2340 took 7.4s
loss:	0.18866	dev score:	0.8573
Epoc 2 iter 2360 took 7.2s
loss:	0.18959	dev score:	0.8586
Epoc 2 iter 2380 took 7.5s
loss:	0.19189	dev score:	0.8587
Epoc 2 iter 2400 took 7.4s
loss:	0.19749	dev score:	0.8610
Epoc 2 iter 2420 took 7.6s
loss:	0.19186	dev score:	0.8602
Epoc 3 iter 2440 took 8.8s
loss:	0.13311	dev score:	0.8608
Epoc 3 iter 2460 took 7.4s
loss:	0.09949	dev score:	0.8574
Epoc 3 iter 2480 took 7.4s
loss:	0.