In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install git+https://github.com/PetrochukM/PyTorch-NLP.git

Collecting git+https://github.com/PetrochukM/PyTorch-NLP.git
  Cloning https://github.com/PetrochukM/PyTorch-NLP.git to /tmp/pip-req-build-w6n7grv9
  Running command git clone --filter=blob:none --quiet https://github.com/PetrochukM/PyTorch-NLP.git /tmp/pip-req-build-w6n7grv9
  Resolved https://github.com/PetrochukM/PyTorch-NLP.git to commit 53d7edcb8e0c099efce7c2ddf8cd7c44157fcac3
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: pytorch-nlp
  Building wheel for pytorch-nlp (setup.py) ... [?25l- done
[?25h  Created wheel for pytorch-nlp: filename=pytorch_nlp-0.5.0-py3-none-any.whl size=88718 sha256=59474a195c94750fb1a8fa10dfe6eeadd25ac8868d85bd629c30db2f4f480ba1
  Stored in directory: /tmp/pip-ephem-wheel-cache-56b4w9af/wheels/a5/93/b0/9f0138afb1271281613a5af71272c5b246fdd2d421c6fbdf88
Successfully built pytorch-nlp
[0mInstalling collected packages: pytorch-nlp
Successfully installed pytorch-nlp-0.5.0


In [3]:
!nvidia-smi

Sat Jun 22 16:51:38 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla T4                       Off | 00000000:00:0

In [4]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class Bottle(nn.Module):

    def forward(self, input):
        if len(input.size()) <= 2:
            return super(Bottle, self).forward(input)
        size = input.size()[:2]
        out = super(Bottle, self).forward(input.view(size[0]*size[1], -1))
        return out.view(size[0], size[1], -1)


class Linear(Bottle, nn.Linear):
    pass

class Encoder(nn.Module):

    def __init__(self):
        super(Encoder, self).__init__()
        input_size = 300
        self.rnn = nn.LSTM(input_size=input_size, hidden_size=300,
                        num_layers=1, dropout=0.2,
                        bidirectional=True)

    def forward(self, inputs):
        batch_size = inputs.size()[1]
        state_shape = 2, batch_size, 300
        h0 = c0 = Variable(inputs.data.new(*state_shape).zero_())
        outputs, (ht, ct) = self.rnn(inputs, (h0, c0))
        return ht[-2:].transpose(0, 1).contiguous().view(batch_size, -1)

class SNLIClassifier(nn.Module):

    def __init__(self, n_embed, d_out):
        super(SNLIClassifier, self).__init__()
        self.embed = nn.Embedding(n_embed, 100)
        self.projection = Linear(100, 300)
        self.encoder = Encoder()
        self.dropout = nn.Dropout(p=0.2)
        self.relu = nn.ReLU()
        seq_in_size = 4*300
        lin_config = [seq_in_size]*2
        self.out = nn.Sequential(
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(*lin_config),
            self.relu,
            self.dropout,
            Linear(seq_in_size, d_out))

    def forward(self, premise, hypothesis):
        prem_embed = self.embed(premise)
        hypo_embed = self.embed(hypothesis)
        prem_embed = Variable(prem_embed.data)
        hypo_embed = Variable(hypo_embed.data)  
        prem_embed = self.relu(self.projection(prem_embed))
        hypo_embed = self.relu(self.projection(hypo_embed))
        premise = self.encoder(prem_embed)
        hypothesis = self.encoder(hypo_embed)
        scores = self.out(torch.cat([premise, hypothesis], 1))
        return scores

In [5]:
from functools import partial

import glob
import itertools
import os
import time

from torch.utils.data import DataLoader
from torch.utils.data.sampler import SequentialSampler

import torch
import torch.optim as optim
import torch.nn as nn

from torchnlp.samplers import BucketBatchSampler
from torchnlp.datasets import snli_dataset
from torchnlp.encoders.text import WhitespaceEncoder, stack_and_pad_tensors
from torchnlp.encoders import LabelEncoder
from torchnlp import word_to_vector

torch.cuda.set_device(0)

def collate_fn(batch, train=True):
    """ list of tensors to a batch tensors """
    premise_batch, _ = stack_and_pad_tensors([row['premise'] for row in batch])
    hypothesis_batch, _ = stack_and_pad_tensors([row['hypothesis'] for row in batch])
    label_batch = torch.stack([row['label'] for row in batch])

    # PyTorch RNN requires batches to be transposed for speed and integration with CUDA
    transpose = (lambda b: b.t_().squeeze(0).contiguous())

    return (transpose(premise_batch), transpose(hypothesis_batch), transpose(label_batch))

def makedirs(name):
    """helper function for python 2 and 3 to call os.makedirs()
       avoiding an error if the directory to be created already exists"""

    import os, errno

    try:
        os.makedirs(name)
    except OSError as ex:
        if ex.errno == errno.EEXIST and os.path.isdir(name):
            # ignore existing directory
            pass
        else:
            # a different error happened
            raise
            
# load dataset
train, dev, test = snli_dataset(train=True, dev=True, test=True)

# Preprocess
for row in itertools.chain(train, dev, test):
    row['premise'] = row['premise'].lower()
    row['hypothesis'] = row['hypothesis'].lower()

# Make Encoders
sentence_corpus = [row['premise'] for row in itertools.chain(train, dev, test)]
sentence_corpus += [row['hypothesis'] for row in itertools.chain(train, dev, test)]
sentence_encoder = WhitespaceEncoder(sentence_corpus)

label_corpus = [row['label'] for row in itertools.chain(train, dev, test)]
label_encoder = LabelEncoder(label_corpus)

# Encode
for row in itertools.chain(train, dev, test):
    row['premise'] = sentence_encoder.encode(row['premise'])
    row['hypothesis'] = sentence_encoder.encode(row['hypothesis'])
    row['label'] = label_encoder.encode(row['label'])

n_embed = sentence_encoder.vocab_size
d_out = label_encoder.vocab_size
n_cells = 1

# double the number of cells for bidirectional networks
if True:
    n_cells *= 2


model = SNLIClassifier(n_embed, d_out)

# Load word vectors
word_vectors = word_to_vector.aliases['glove.6B.100d']()
for i, token in enumerate(sentence_encoder.vocab):
    model.embed.weight.data[i] = word_vectors[token]

model.cpu()

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=0.001)

iterations = 0
start = time.time()
best_dev_acc = -1
header = '  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy'
dev_log_template = ' '.join(
    '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{:8.6f},{:12.4f},{:12.4f}'
    .split(','))
log_template = ' '.join(
    '{:>6.0f},{:>5.0f},{:>9.0f},{:>5.0f}/{:<5.0f} {:>7.0f}%,{:>8.6f},{},{:12.4f},{}'.split(','))
makedirs('results')
print(header)

for epoch in range(25):
    n_correct, n_total = 0, 0

    train_sampler = SequentialSampler(train)
    train_batch_sampler = BucketBatchSampler(
        train_sampler, 128, True, sort_key=lambda r: len(train[r]['premise']))
    train_iterator = DataLoader(
        train,
        batch_sampler=train_batch_sampler,
        collate_fn=collate_fn,
        pin_memory=torch.cuda.is_available(),
        num_workers=0)
    for batch_idx, (premise_batch, hypothesis_batch, label_batch) in enumerate(train_iterator):

        # switch model to training mode, clear gradient accumulators
        model.train()
        torch.set_grad_enabled(True)
        opt.zero_grad()

        iterations += 1

        # forward pass
        answer = model(premise_batch, hypothesis_batch)

        # calculate accuracy of predictions in the current batch
        n_correct += (torch.max(answer, 1)[1].view(label_batch.size()) == label_batch).sum()
        n_total += premise_batch.size()[1]
        train_acc = 100. * n_correct / n_total

        # calculate loss of the network output with respect to training labels
        loss = criterion(answer, label_batch)

        # backpropagate and update optimizer learning rate
        loss.backward()
        opt.step()

        # checkpoint model periodically
        if iterations % 1000 == 0:
            snapshot_prefix = os.path.join('results', 'snapshot')
            snapshot_path = snapshot_prefix + '_acc_{:.4f}_loss_{:.6f}_iter_{}_model.pt'.format(
                train_acc, loss.item(), iterations)
            torch.save(model, snapshot_path)
            for f in glob.glob(snapshot_prefix + '*'):
                if f != snapshot_path:
                    os.remove(f)

        # evaluate performance on validation set periodically
        if iterations % 1000 == 0:

            # switch model to evaluation mode
            model.eval()
            torch.set_grad_enabled(False)

            # calculate accuracy on validation set
            n_dev_correct, dev_loss = 0, 0

            dev_sampler = SequentialSampler(dev)
            dev_batch_sampler = BucketBatchSampler(
                dev_sampler, 128, True, sort_key=lambda r: len(dev[r]['premise']))
            dev_iterator = DataLoader(
                dev,
                batch_sampler=dev_batch_sampler,
                collate_fn=partial(collate_fn, train=False),
                pin_memory=torch.cuda.is_available(),
                num_workers=0)
            for dev_batch_idx, (premise_batch, hypothesis_batch,
                                label_batch) in enumerate(dev_iterator):
                answer = model(premise_batch, hypothesis_batch)
                n_dev_correct += (torch.max(answer,
                                            1)[1].view(label_batch.size()) == label_batch).sum()
                dev_loss = criterion(answer, label_batch)
            dev_acc = 100. * n_dev_correct / len(dev)

            print(
                dev_log_template.format(time.time() - start, epoch, iterations, 1 + batch_idx,
                                        len(train_sampler),
                                        100. * (1 + batch_idx) / len(train_sampler), loss.item(),
                                        dev_loss.item(), train_acc, dev_acc))

            # update best validation set accuracy
            if dev_acc > best_dev_acc:

                # found a model with better validation set accuracy

                best_dev_acc = dev_acc
                snapshot_prefix = os.path.join('results', 'best_snapshot')
                snapshot_path = snapshot_prefix + '_devacc_{}_devloss_{}__iter_{}_model.pt'.format(
                    dev_acc, dev_loss.item(), iterations)

                # save model, delete previous 'best_snapshot' files
                torch.save(model, snapshot_path)
                for f in glob.glob(snapshot_prefix + '*'):
                    if f != snapshot_path:
                        os.remove(f)

        elif iterations % 1000 == 0:

            # print progress message
            print(
                log_template.format(time.time() - start, epoch, iterations, 1 + batch_idx,
                                    len(train_sampler), 100. * (1 + batch_idx) / len(train_sampler),
                                    loss.item(), ' ' * 8, n_correct / n_total * 100, ' ' * 12))

snli_1.0.zip: 94.6MB [00:07, 13.4MB/s]
glove.6B.zip: 862MB [02:39, 5.42MB/s]                           
100%|██████████| 400000/400000 [00:27<00:00, 14564.25it/s]


  Time Epoch Iteration Progress    (%Epoch)   Loss   Dev/Loss     Accuracy  Dev/Accuracy




   298     0      1000  1000/550152       0% 0.830741 0.841028      53.6742      60.9200
   610     0      2000  2000/550152       0% 0.889661 0.723325      57.8809      63.3500
   928     0      3000  3000/550152       1% 0.757285 0.815956      59.9208      65.1300
  1246     0      4000  4000/550152       1% 0.808628 0.956239      61.3703      65.7500
  1563     1      5000   702/550152       0% 0.715099 0.845020      66.9783      67.1500
  1881     1      6000  1702/550152       0% 0.829306 0.716270      67.1985      67.8200
  2199     1      7000  2702/550152       0% 0.613150 0.674699      67.5197      68.4700
  2519     1      8000  3702/550152       1% 0.633610 0.706520      67.7771      68.6100
  2836     2      9000   404/550152       0% 0.696915 0.802008      69.4249      69.1400
  3151     2     10000  1404/550152       0% 0.695678 0.795504      69.4673      69.0700
  3466     2     11000  2404/550152       0% 0.799542 0.838689      69.6905      69.2300
  3782     2     1200

In [6]:
!pip install torch
import torch
torch.__version__

[0m

'2.1.2'