In [1]:
import os
import json
from data.dataset import NERDataset
from models.networks import GlobalContextualDeepTransition
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, Callback
import torch
from torch.nn.utils.rnn import PackedSequence, pad_packed_sequence, pack_padded_sequence

In [2]:
with open('config.json', 'r') as file:
    kwargs = json.load(file)
print("Init model params =", json.dumps(kwargs, indent=4))
model = GlobalContextualDeepTransition(**kwargs)

Init model params = {
    "numChars": 100,
    "charEmbedding": 128,
    "numWords": 21388,
    "wordEmbedding": 300,
    "contextOutputUnits": 128,
    "contextTransitionNumber": 4,
    "encoderUnits": 256,
    "decoderUnits": 256,
    "transitionNumber": 4,
    "numTags": 17
}


In [3]:
numParams = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {numParams:,}") # 7,443,753

Trainable parameters: 7,443,797


In [4]:
base = 'data/conll03'
sourceName = f'{base}/eng.train.src'
targetName = f'{base}/eng.train.trg'
gloveFile = f'{base}/trimmed.300d.Cased.txt'
symbFile = f'{base}/sym.glove'
data = NERDataset(sourceName, targetName, gloveFile, symbFile)
loader = data.getLoader(10, shuffle=False)
batch = next(iter(loader))

In [5]:
with torch.no_grad():
    loss = model.training_step(batch, 0)
    print(loss.item())

3.008681297302246


In [9]:
import torch.nn.functional as F

class LabelSmoothingLoss(torch.nn.Module):
    def __init__(self, smoothing: float = 0.1, reduction="mean", weight=None):
        super(LabelSmoothingLoss, self).__init__()
        self.epsilon = smoothing
        self.reduction = reduction
        self.weight = weight

    def reduce_loss(self, loss):
        if self.reduction == "mean":
            return loss.mean()
        elif self.reduction == "sum":
            return loss.sum()
        else:
            return loss

    def linear_combination(self, x, y):
        return self.epsilon * x + (1 - self.epsilon) * y

    def forward(self, preds, target):
        if self.weight is not None:
            self.weight = self.weight.to(preds.device)

        if self.training:
            n = preds.size(-1)
            log_preds = F.log_softmax(preds, dim=-1)
            loss = self.reduce_loss(-log_preds.sum(dim=-1))
            nll = F.nll_loss(
                log_preds, target, reduction=self.reduction, weight=self.weight
            )
            return self.linear_combination(loss / n, nll)
        else:
            return torch.nn.functional.cross_entropy(preds, target, weight=self.weight)

In [11]:
fn = LabelSmoothingLoss()
for i, batch in enumerate(loader):
    with torch.no_grad():
        words, chars, charMask, targets = batch

        # compute the global representation and concat with word and char representations
        wcg = model.contextEncoder(words, chars, charMask)

        # encode concaatentated input and decode logits using sequence labeller
        logits = model.sequenceLabeller.enforced_logits(wcg, targets)

        loss = model.smoothingLoss(logits.data, targets.data)
        
        other = fn(logits.data, targets.data)
        print(loss.item() - other.item())
    if i==7:
        break

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
