In [2]:
import os
import sys
import torch

PROJ_DIR = os.path.join(os.environ['WORKSPACE'], 'tutorial/')

if PROJ_DIR not in sys.path:
    sys.path.append(PROJ_DIR)

# Prepare the data

In [3]:
import pickle
from src.dataset import IMDBDatset
from src.utilities import flatten, get_dataloader

with open('data.pickle', 'rb') as fp:
    corpus = pickle.load(fp)
 
dataloaders = {
    'train': get_dataloader(corpus['train'], batch_size=32, shuffle=True),
    'dev':   get_dataloader(corpus['dev'],   batch_size=128, shuffle=False),
    'test':  get_dataloader(corpus['test'],  batch_size=128, shuffle=False)
}

# CNN Layer

In [12]:
import torch
import torch.nn as nn


class CNNLayer(nn.Module):
    def __init__(self, input_dim, channels, kernels, maxlen):
        super(CNNLayer, self).__init__()
        
        assert len(kernels) == len(channels)

        self.input_dim = input_dim
        self.maxlen = maxlen      # maximum sequence length
        self.kernels = kernels    # playing the role of n-gram of different orders
        self.channels = channels  # the number of output channels per convolution layer

        self.cnn = {}
        self.bn = {}

        for kernel, out_channels in zip(kernels, channels):
            self.cnn[f'{kernel}_gram'] = nn.Conv1d(self.input_dim, out_channels, kernel)
            self.bn[f'{kernel}_gram'] = nn.BatchNorm1d(out_channels)

        self.cnn = nn.ModuleDict(self.cnn)
        self.bn = nn.ModuleDict(self.bn)


    def forward(self, embeddings):
        batch_size = embeddings.size(0)
        seq_length = embeddings.size(1)
        seq_maxlen = min(seq_length, self.maxlen)

        # Prepare for sliding the Conv1d across time
        embeddings = embeddings.transpose(1, 2) # -> (batch, embedding, seq_length)

        convs = []
        for kernel, channels in zip(self.kernels, self.channels):
            cnn_key = f'{kernel}_gram'

            convolved = self.cnn[cnn_key](embeddings)           # -> (batch, n_filters, channels)

            curr_shape = convolved.size()
            expt_shape = (batch_size, channels, seq_maxlen - kernel + 1)
            
            assert curr_shape == expt_shape, "Wrong size: {}. Expected {}".format(curr_shape, expt_shape)

            convolved = self.bn[cnn_key](convolved)             # -> (batch, n_filters, channels)
            convolved, _ = torch.max(convolved, dim=2)          # -> (batch, n_filters)
            convolved = torch.nn.functional.relu(convolved)
            convs.append(convolved)

        convs = torch.cat(convs, dim=1)  # -> (batch, sum(n_filters))  dim 1 is the sum of n_filters from all cnn layers

        return {'output': convs}

# CNN Classifier

In [19]:
class CNNClassifier(nn.Module):
    def __init__(self, embedder, extractor):
        super(CNNClassifier, self).__init__()
        self.embedder = embedder
        self.extractor = extractor
        self.classifier = nn.Linear(sum(extractor.channels), 1)
        self.xentropy = nn.BCEWithLogitsLoss()

    def forward(self, tokens, targets=None):
        embedded = self.embedder(tokens)
        extracted = self.extractor(embedded['output'])
        
        logits = self.classifier(extracted['output'])
        loss = None

        if targets is not None:
            logits = logits.view(-1)
            targets = targets.float()
            loss = self.xentropy(logits, targets)

        return {'output': logits, 'loss': loss}

In [20]:
from src.nets.embedder import WordEmbedder

vocab = set(flatten(corpus['train'].tokens + corpus['dev'].tokens))

def create_cnn_classifier():
    embedder = WordEmbedder(vocab, os.path.join(PROJ_DIR, 'glove.6B/glove.6B.100d.txt'))
    extractor = CNNLayer(embedder.emb_dim, channels=[32, 64], kernels=[2, 3], maxlen=64)
    cnn_model = CNNClassifier(embedder, extractor)
    return cnn_model

model = create_cnn_classifier()
model

CNNClassifier(
  (embedder): WordEmbedder(
    (embeddings): Embedding(21695, 100)
  )
  (extractor): CNNLayer(
    (cnn): ModuleDict(
      (2_gram): Conv1d(100, 32, kernel_size=(2,), stride=(1,))
      (3_gram): Conv1d(100, 64, kernel_size=(3,), stride=(1,))
    )
    (bn): ModuleDict(
      (2_gram): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3_gram): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (classifier): Linear(in_features=96, out_features=1, bias=True)
  (xentropy): BCEWithLogitsLoss()
)

# Training the CNN model

In [21]:
from src.utilities import train
import torch.optim as optim

config = {
    'lr': 1e-2,
    'momentum': 0.99,
    'epochs': 10,
    'checkpoint': 'cnn_model.pt'
}

params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.SGD(params, lr=config['lr'], momentum=config['momentum'])
model = train(model, dataloaders, optimizer, config)

E001 [TRAIN] Loss: 0.6406, Acc: 0.6442 [DEV] Loss: 0.5253, Acc: 0.7400 [TEST] Loss: 0.5271, Acc: 0.7308 * 
E002 [TRAIN] Loss: 0.5478, Acc: 0.7209 [DEV] Loss: 0.5075, Acc: 0.7442 [TEST] Loss: 0.5114, Acc: 0.7447 * 
E003 [TRAIN] Loss: 0.5038, Acc: 0.7507 [DEV] Loss: 0.5009, Acc: 0.7472 [TEST] Loss: 0.4998, Acc: 0.7554 * 
E004 [TRAIN] Loss: 0.4881, Acc: 0.7603 [DEV] Loss: 0.6079, Acc: 0.6618 [TEST] Loss: 0.5931, Acc: 0.6702
E005 [TRAIN] Loss: 0.5009, Acc: 0.7507 [DEV] Loss: 0.5663, Acc: 0.7106 [TEST] Loss: 0.5564, Acc: 0.7081
E006 [TRAIN] Loss: 0.4443, Acc: 0.7915 [DEV] Loss: 0.5137, Acc: 0.7456 [TEST] Loss: 0.5240, Acc: 0.7445
E007 [TRAIN] Loss: 0.4071, Acc: 0.8114 [DEV] Loss: 0.5381, Acc: 0.7484 [TEST] Loss: 0.5500, Acc: 0.7456 * 
E008 [TRAIN] Loss: 0.3776, Acc: 0.8299 [DEV] Loss: 0.5451, Acc: 0.7464 [TEST] Loss: 0.5561, Acc: 0.7399
E009 [TRAIN] Loss: 0.3588, Acc: 0.8390 [DEV] Loss: 0.5985, Acc: 0.7412 [TEST] Loss: 0.6113, Acc: 0.7449
E010 [TRAIN] Loss: 0.3394, Acc: 0.8506 [DEV] Loss: 0