In [1]:
%connect_info

{
  "shell_port": 62950,
  "iopub_port": 62951,
  "stdin_port": 62952,
  "control_port": 62954,
  "hb_port": 62953,
  "ip": "127.0.0.1",
  "key": "dc3a9179-acb91e638e46135670ee05ca",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-fd5039f1-171c-48e8-9399-eff02f485d98.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [None]:
%matplotlib inline

In [None]:
import sys
sys.argv = sys.argv[:1]

In [None]:
import os
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext.data import BucketIterator

from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix, classification_report


from ignite.engine import Engine, Events
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.contrib.handlers import ProgressBar

from models import CNNClassifier
from preprocess import load_tokenized_data, SentenceDataset, SkorchBucketIterator
from utils import build_model_name, convert_flags_to_dict, define_cnn_flags

from transformers import AutoModel

In [5]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [6]:
bert_type = 'distilbert-base-multilingual-cased'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
METRIC = "F1"

In [7]:
(x_train, y_train), (x_val, y_val), (x_dev, y_dev) = load_tokenized_data(
    datafile='{}/data/{}.tokenized.pkl'.format(os.getcwd(), bert_type),
    language_codes=['DE', 'GA', 'HI', 'PT', 'ZH'],
    seed=SEED)

175
228
99
338
204


In [8]:
len(y_train)

22579

In [9]:
transformer = AutoModel.from_pretrained(bert_type)

In [10]:
config = {
    'nfilters': 128,
    'kernels': [1, 2, 3, 4, 5],
    'pool_stride': 3,
    'dropout': 0.2,
    'output_activation': 'sigmoid',
    'transformer_device': 'cpu',
    'bert': transformer
}

In [11]:
model = CNNClassifier(config)
# model.to(DEVICE)   # pylint: disable=no-member
model.freeze_transformer()

In [12]:
from skorch import NeuralNetClassifier
from skorch.callbacks import Freezer
from skorch.callbacks import ProgressBar, EpochScoring, EarlyStopping, Checkpoint
from skorch.helper import predefined_split

In [13]:
net = NeuralNetClassifier(
    module=model,
    #
    iterator_train=SkorchBucketIterator,    
    iterator_train__batch_size=32,
    iterator_train__sort_key=lambda x: len(x.sentence),
    iterator_train__shuffle=True,
    iterator_train__device=DEVICE,
    #
    iterator_valid=SkorchBucketIterator, 
    iterator_valid__batch_size=32,
    iterator_valid__sort_key=lambda x: len(x.sentence),
    iterator_valid__shuffle=True,
    iterator_valid__device=DEVICE,
    
    train_split=predefined_split(SentenceDataset(data=(x_val, y_val))),
    
    optimizer=torch.optim.Adam,
    criterion=nn.BCELoss,
    callbacks=[
        ProgressBar(batches_per_epoch=len(x_train) // 128 + 1),
        EpochScoring(METRIC, lower_is_better=False),
        EarlyStopping(monitor=METRIC, patience=5),
        Checkpoint(monitor=METRIC)
        
    ],

    device=DEVICE
)

In [14]:
net.fit(SentenceDataset(data=(x_train, y_train)), y=None)

HBox(children=(FloatProgress(value=0.0, max=177.0), HTML(value='')))

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=CNNClassifier(
    (transformer): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(119547, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0): TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_features=768, bias=True)
            )
            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=

In [None]:
model = net.module

In [None]:
test_iterator = SkorchBucketIterator(
    dataset=SentenceDataset(data=(x_dev, y_dev)),
    batch_size=1,
    sort_key=lambda x: len(x.sentence),
    shuffle=False,
    device=DEVICE)

In [None]:
model.eval()
preds = []
for i, (x, y) in enumerate(test_iterator):
    if i >= 10:
        break
    y_pred = model(x).squeeze()
    y_pred = torch.argmax(y_pred, dim=1)
    y_pred = y_pred.detach()
    y_pred = [1 if i == 0 else 2 for i in y_pred.tolist()]
    preds.append(y_pred)

In [None]:
len(preds[0])

In [None]:
x, y = next(iter(test_iterator))

In [None]:
len(y[0])

In [None]:
[1 if i == 0 else 2 for i in torch.argmax(pred, dim=1).detach()]

In [None]:
len(y_dev)

In [None]:
y_dev = np.concatenate(y_dev[0:10])

In [None]:
y_dev.shape

In [None]:
y_dev

In [None]:
y_pred = np.concatenate(preds)
y_pred.shape

In [None]:
print(classification_report(y_dev, y_pred))

In [None]:
next(model.fully_connected.parameters()).device.type

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
to_categorical(y_train[3])[:, 1:].shape

In [None]:
len(y_train[3])

In [None]:
x = torch.randn(4, 2)

In [None]:
m = (x > 0).int()

In [None]:
m

In [None]:
x * m