In [1]:
from torchtext.data import TabularDataset, Field, BucketIterator
from vectors import MultiCCA, VectorVocabField
from utils import pathify, Checkpoint, load_model
from models import SiameseDAN
from torch.autograd import Variable
from tqdm import tqdm

import time
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch.optim as optim

In [2]:
tqdm.monitor_interval = 0 

In [3]:
vectors = MultiCCA(cache=pathify('data'))

In [4]:
def preprocess(l, lang = 'en'):
    return [lang + ':' + s for s in l]

sentence_text = VectorVocabField(lower=True, preprocessing=preprocess)
label_field = Field(sequential=False, use_vocab=False, tensor_type=torch.FloatTensor)

train, val, test = TabularDataset.splits(
    path = pathify('data/sick'), format='tsv', skip_header=True,
    train = 'train.txt', test = 'test.txt', validation = 'trial.txt',
    fields = [('', None), ('', None), ('s1', sentence_text), ('s2', sentence_text), ('', None), ('score', label_field)]
)

In [5]:
sentence_text.build_vocab(train, vectors=vectors)

In [6]:
train_iter, val_iter, test_iter = BucketIterator.splits(
    datasets=(train, test, val), batch_size=32, sort_key = lambda x: len(x.s1), repeat=False)

In [7]:
def run_epoch(model, loss, iterable, training=True):
    batch_accs, batch_losses = [], []
    epoch_start = time.time()
    
    for batch in tqdm(iterable, total=len(iterable)):
        d, q, y = batch.s1.t(), batch.s2.t(), (batch.score > 3).long()
        
        if training:
            model.zero_grad()

        out = model(d, q)
        _, preds = torch.max(out, 1)
        
        accuracy = torch.mean(torch.eq(preds, y).float())
        batch_loss = loss(out, y)

        if training:
            batch_loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), .25)
            opt.step()

        batch_accs.append(accuracy.data[0])
        batch_losses.append(batch_loss.data[0])

        del d, q, y
    
    epoch_end = time.time()
    return np.mean(batch_accs), np.mean(batch_losses), epoch_end - epoch_start

In [8]:
vocab_size, embeddings_dim = sentence_text.vocab.vectors.shape

params = {
    'vocab_size': vocab_size, 
    'embedding_dim': embeddings_dim, 
    'hidden_dim': 100, 
    'num_classes': 2
}

clf = SiameseDAN(**params)
clf.load_pretrained(sentence_text.vocab.vectors, mode='static')

In [9]:
opt = optim.Adam(filter(lambda p: p.requires_grad, clf.parameters()), lr=5e-2)
loss = nn.NLLLoss()
scheduler = optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.1)
checkpointer = Checkpoint(clf, params, pathify('data/models/sickentest'))

save_every = 10

init_acc, _, _ = run_epoch(clf, loss, train_iter, training=False)
best_acc, _, _ = run_epoch(clf, loss, test_iter, training=False)

trn_losses, trn_accs = [0.], [init_acc]
val_losses, val_accs = [0.], [best_acc]

print(best_acc)

for epoch in range(10):
    scheduler.step()
    
    clf.train()
    trn_acc, trn_loss, trn_time = run_epoch(clf, loss, train_iter, training=True)
    trn_losses.append(trn_loss)
    trn_accs.append(trn_acc)
        
    y_onehot = torch.FloatTensor(32, 2)
    clf.eval()
    val_acc, val_loss, val_time = run_epoch(clf, loss, val_iter, training=False)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    checkpointer.update(val_acc)
    print(checkpointer.best)

100%|██████████| 139/139 [00:00<00:00, 354.51it/s]
100%|██████████| 16/16 [00:00<00:00, 426.17it/s]
  8%|▊         | 11/139 [00:00<00:01, 105.65it/s]

0.5243489593267441


100%|██████████| 139/139 [00:01<00:00, 135.75it/s]
100%|██████████| 154/154 [00:00<00:00, 361.12it/s]
  6%|▌         | 8/139 [00:00<00:01, 74.70it/s]

0.7454139609615524


100%|██████████| 139/139 [00:01<00:00, 98.64it/s]
100%|██████████| 154/154 [00:00<00:00, 270.66it/s]
  6%|▌         | 8/139 [00:00<00:01, 75.24it/s]

0.7685470778446692


100%|██████████| 139/139 [00:01<00:00, 93.96it/s]
100%|██████████| 154/154 [00:00<00:00, 366.19it/s]
  6%|▌         | 8/139 [00:00<00:01, 78.85it/s]

0.7709821427797342


100%|██████████| 139/139 [00:01<00:00, 88.17it/s]
100%|██████████| 154/154 [00:00<00:00, 274.34it/s]
  4%|▍         | 6/139 [00:00<00:02, 59.68it/s]

0.7895292206244036


100%|██████████| 139/139 [00:01<00:00, 101.70it/s]
100%|██████████| 154/154 [00:00<00:00, 239.23it/s]
  7%|▋         | 10/139 [00:00<00:01, 98.83it/s]

0.7895292206244036


100%|██████████| 139/139 [00:00<00:00, 149.03it/s]
100%|██████████| 154/154 [00:00<00:00, 376.95it/s]
 10%|█         | 14/139 [00:00<00:00, 139.58it/s]

0.7895292206244036


100%|██████████| 139/139 [00:01<00:00, 129.17it/s]
100%|██████████| 154/154 [00:00<00:00, 326.94it/s]
  5%|▌         | 7/139 [00:00<00:01, 68.88it/s]

0.7945616883890969


100%|██████████| 139/139 [00:01<00:00, 93.42it/s]
100%|██████████| 154/154 [00:00<00:00, 288.56it/s]
  6%|▋         | 9/139 [00:00<00:01, 89.96it/s]

0.7945616883890969


100%|██████████| 139/139 [00:00<00:00, 144.26it/s]
100%|██████████| 154/154 [00:00<00:00, 387.98it/s]
  5%|▌         | 7/139 [00:00<00:02, 64.74it/s]

0.8004464286488372


100%|██████████| 139/139 [00:01<00:00, 91.27it/s]
100%|██████████| 154/154 [00:00<00:00, 335.33it/s]


0.8004464286488372


In [10]:
the_model = load_model(SiameseDAN, pathify('data/models/sickentest'))

In [23]:
a = sentence_text.process(
    [['en:the', 'en:man', 'en:went', 'en:for', 'en:a', 'en:jog'], 
     ['en:he', 'en:went', 'en:jogging']], train=False, device=-1)

In [24]:
the_model.eval()

SiameseDAN(
  (dan): DAN(
    (embedding): Embedding(390271, 512)
    (hidden): Linear(in_features=512, out_features=100, bias=True)
    (norm_hidden): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True)
  )
  (out): Linear(in_features=200, out_features=2, bias=True)
  (norm_out): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True)
)

In [25]:
prob, lbl = the_model(a[:, 0].contiguous().view(1, -1), a[:, 1].contiguous().view(1, -1)).max(dim=1)
lbl.data[0], np.exp(prob.data.numpy())[0]

(1, 0.7740681)