In [1]:
import os
import json
from data.dataset import NERDataset
from models.networks import GlobalContextualDeepTransition
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, Callback

In [2]:
with open('config.json', 'r') as file:
    kwargs = json.load(file)
print("Init model params =", json.dumps(kwargs, indent=4))
model = GlobalContextualDeepTransition(**kwargs)

Init model params = {
    "numChars": 100,
    "charEmbedding": 128,
    "numWords": 21388,
    "wordEmbedding": 300,
    "contextOutputUnits": 128,
    "contextTransitionNumber": 4,
    "encoderUnits": 256,
    "decoderUnits": 256,
    "transitionNumber": 4,
    "numTags": 17
}


In [3]:
numParams = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {numParams:,}") # 7,313,34

Trainable parameters: 7,443,753


In [4]:
for k, v in model.contextEncoder.state_dict().items():
    print(k.ljust(63).replace('.', '/'), 'shape', str(v.numpy().shape).ljust(12), v.numel())

gloveBias                                                       shape (1, 300)     300
cnn/embedding/weight                                            shape (100, 128)   12800
cnn/conv1d/weight                                               shape (128, 128, 3) 49152
cnn/conv1d/bias                                                 shape (128,)       128
outerCnn/embedding/weight                                       shape (100, 128)   12800
outerCnn/conv1d/weight                                          shape (128, 128, 3) 49152
outerCnn/conv1d/bias                                            shape (128,)       128
glove/weight                                                    shape (21388, 300) 6416400
forwardEncoder/linearGRU/reset_gate                             shape (556, 128)   71168
forwardEncoder/linearGRU/update_gate                            shape (556, 128)   71168
forwardEncoder/linearGRU/linear_gate                            shape (556, 128)   71168
forwardEncoder/linearGR