In [1]:
! pwd

/home/jupyter/compexp/nli/code


In [1]:
import os
import torch
import torch.optim as optim
import torch.nn as nn

from torch.utils.data import DataLoader
from data.snli import SNLI, pad_collate
from contextlib import nullcontext
from tqdm import tqdm
import numpy as np
from collections import defaultdict


import models
import util

In [2]:
def run(split, epoch, model, optimizer, criterion, dataloaders, args):
    training = split == "train"
    if training:
        ctx = nullcontext
        model.train()
    else:
        ctx = torch.no_grad
        model.eval()

    ranger = tqdm(dataloaders[split], desc=f"{split} epoch {epoch}")

    loss_meter = util.AverageMeter()
    acc_meter = util.AverageMeter()
    for (s1, s1len, s2, s2len, targets) in ranger:

        if args.cuda:
            s1 = s1.cuda()
            s1len = s1len.cuda()
            s2 = s2.cuda()
            s2len = s2len.cuda()
            targets = targets.cuda()

        batch_size = targets.shape[0]

        with ctx():
            logits = model(s1, s1len, s2, s2len)
            loss = criterion(logits, targets)

        if training:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        preds = logits.argmax(1)
        acc = (preds == targets).float().mean()

        loss_meter.update(loss.item(), batch_size)
        acc_meter.update(acc.item(), batch_size)

        ranger.set_description(
            f"{split} epoch {epoch} loss {loss_meter.avg:.3f} acc {acc_meter.avg:.3f}"
        )

    return {"loss": loss_meter.avg, "acc": acc_meter.avg}


def build_model(vocab_size, model_type, vocab=None, bert=False, embedding_dim=300, hidden_dim=512):
    """
    Build a bowman-style SNLI model
    """
    if bert:
        if vocab is None:
            raise Exception('Bert model requires passing the datasets vocab field')
        model = models.BertEntailmentClassifier(vocab=vocab,freeze_bert=True)
        return model
    enc = models.TextEncoder(
        vocab_size, embedding_dim=embedding_dim, hidden_dim=hidden_dim
    )
    if model_type == "minimal":
        model = models.EntailmentClassifier(enc)
    else:
        model = models.BowmanEntailmentClassifier(enc)
    return model

def serialize(model, dataset):
    return {
        "state_dict": model.state_dict(),
        "stoi": dataset.stoi,
        "itos": dataset.itos,
    }


In [3]:
max_data = 1000
train = SNLI("../data/snli_1.0/", "train", max_data=max_data)
val = SNLI(
    "../data/snli_1.0/", "dev", max_data=max_data, vocab=(train.stoi, train.itos)
)

dataloaders = {
    "train": DataLoader(
        train,
        batch_size=100,
        shuffle=True,
        pin_memory=False,
        num_workers=0,
        collate_fn=pad_collate,
    ),
    "val": DataLoader(
        val,
        batch_size=100,
        shuffle=False,
        pin_memory=True,
        num_workers=0,
        collate_fn=pad_collate,
    ),
}


train: 1000it [00:04, 205.32it/s]
dev: 1000it [00:04, 207.84it/s]


In [4]:
model = build_model(
    len(train.stoi),
    'doesnt matter',
    {'stoi': train.stoi, 'itos': train.itos}, 
    True, 
)
model = model.to('cuda')

for (s1, s1len, s2, s2len, targets) in dataloaders['train']: 
    s1 = s1.to('cuda')
    s1len = s1len.to('cuda')
    s2 = s2.to('cuda')
    
    print(s1.shape, s2.shape)
    s2len = s2len.to('cuda')
    targets = targets.to('cuda')
    outputs = model(s1, s1len, s2, s2len)
    break  # Just for testing the first batch

print(outputs.shape)  # This should work without CUDA errors

torch.Size([33, 100]) torch.Size([20, 100])
torch.Size([100, 3])


In [9]:
ckpt = torch.load('models/snli/0.pth', map_location="cpu")

In [11]:
ckpt.keys()

dict_keys(['state_dict', 'stoi', 'itos'])