In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
!nvidia-smi

Wed Jul  8 13:05:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P0    58W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!pip install skorch transformers 
!pip install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
# !CUDA_LAUNCH_BLOCKING=1

Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [4]:
%matplotlib inline

In [5]:
import sys
sys.argv = sys.argv[:1]

In [6]:
import argparse
import os

import numpy as np
from sklearn.utils.class_weight import compute_class_weight

from skorch.callbacks import ProgressBar, EarlyStopping, Checkpoint
from skorch.helper import predefined_split
from transformers import AutoModel, AutoTokenizer

In [7]:
from eval_scripts.evaluate import Main

ID_LABELS = {
    0: '*',
    1: 'IAV',
    2: 'IRV',
    3: 'LVC.cause',
    4: 'LVC.full',
    5: 'LS.ICV',
    6: 'MVC',
    7: 'VID',
    8: 'VPC.full',
    9: 'VPC.semi',
}

def evaluate_model(net, test_iterator, tokenizer, args):
    preds = []
    sents = []
    i = 0
    for x, y in test_iterator:
        y_pred = net.predict(x)
    #     i += 1
        if i % 40 == 0:
            print(i)
        i += 1
        sub_tokens = []
        sub_preds = []
        text = []
        predictions = []
        tokens = tokenizer.convert_ids_to_tokens(x.detach().cpu().numpy().reshape(-1))
        # tokens = tokens
        y_pred = y_pred.cpu().detach().reshape(-1).tolist()
        for t, p in zip(tokens, y_pred):
            if '#' in t:
                sub_tokens.append(t.replace('#', ''))
                sub_preds.append(p)
            else:
                if sub_tokens:
                    old_token = ''.join([text[-1]] + sub_tokens)
                    old_pred = sum(sub_preds)
                    text = text[0:-1]
                    text.append(old_token)
                    predictions = predictions[0:-1]
                    predictions.append(old_pred
                                       if old_pred == 0
                                       else (sub_preds[0]
                                             if sub_preds[0] > 0
                                             else max(sub_preds)))
                    old_token = t
                    old_pred = p
                    sub_tokens = []
                    sub_preds = []
                else:
                    old_token = t
                    old_pred = p
                text.append(old_token)
                predictions.append(old_pred)
                assert len(text[1:-1]) == len(predictions[1:-1])
        sents.append(text[1:-1])
        preds += predictions[1:-1]

    binary = args.labels == 'binary'
    output_count = 0
    with open(args.dev_file, 'r') as dev:
        with open(args.dev_file.replace('dev.cupt', 'temp.cupt'), 'w') as test:
            for line in dev:
                feats = line.split()
                if not line.startswith('#') and line != '\n' and '-' not in feats[0]:
                    prediction = preds[output_count]
                    if prediction == 0:
                        label = '*'
                    else:
                        label = ID_LABELS.get(prediction, '*')
                        # label = 1
                    new_line = '\t'.join(
                        [str(f) for f in feats[0:-1]] + [str(label)] + ['\n'])
                    test.write(new_line)
                    output_count += 1
                else:
                    test.write(line)

    # post-process the file to get the predictions into cupt format
    with open(args.dev_file.replace('dev.cupt', 'temp.cupt'), 'r') as temp:
        with open(args.dev_file.replace('dev.cupt', 'system.cupt'), 'w') as test:
            current_prediction = [1, None]
            verb_found = False
            for line in temp:
                feats = line.split('\t')
                if not line.startswith('#') and line != '\n' and '-' not in feats[0]:

                    if feats[10] == '*':
                        test.write(line)
                        # print(line)
                    else:

                        if current_prediction[1] is None:

                            label = '{}:{}'.format(current_prediction[0], feats[10])
                            # label = str(current_prediction[0])
                            verb_found = True if feats[3] == 'VERB' else False
                            current_prediction[1] = feats[10]

                        else:

                            if feats[10] == current_prediction[1]:

                                if verb_found and feats[3] != 'VERB':
                                    label = current_prediction[0]

                                elif verb_found and feats[3] == 'VERB':
                                    current_prediction[0] = current_prediction[0] + 1
                                    current_prediction[1] = feats[10]
                                    label = '{}:{}'.format(current_prediction[0], feats[10])
                                    # label = str(current_prediction[0])

                                elif not verb_found:
                                    label = current_prediction[0]
                                    verb_found = True if feats[3] == 'VERB' else False

                            else:
                                current_prediction[0] = current_prediction[0] + 1
                                current_prediction[1] = feats[10]
                                label = '{}:{}'.format(current_prediction[0], feats[10])
                                # label = str(current_prediction[0])
                                verb_found = True if feats[3] == 'VERB' else False
                        new_line = '\t'.join(feats[0:-2] + [str(label)] + ['\n'])
                        test.write(new_line)
                        # print(new_line)
                else:
                    if line == '\n':
                        current_prediction = [1, None]
                        verb_found = False
                    test.write(line)
                    # print(line)

    if args.eval:
        _run_sript(args)

def _run_sript(args):

    args.debug = False
    args.combinatorial = True
    args.gold_file = open(args.dev_file, 'r')
    args.prediction_file = open(args.dev_file.replace('dev.cupt', 'system.cupt'), 'r')
    args.train_file = open(args.dev_file.replace('dev.cupt', 'train.cupt'), 'r')
    args.debug = False
    print('\n\nRunning shared-task eval script\n\n')
    Main(args).run()

In [8]:
import pickle

import numpy as np
from sklearn.model_selection import train_test_split

features = {
    '*': 0,
    'IAV': 1,
    'IRV': 2,
    'LVC.cause': 3,
    'LVC.full': 4,
    'LS.ICV': 5,
    'MVC': 6,
    'VID': 7,
    'VPC.full': 8,
    'VPC.semi': 9,
}


def load_tokenized_data(datafile,
                        language_codes,
                        percent=1.0,
                        seed=42,
                        binary=False,
                        split=True):

    with open(datafile, 'rb') as f:
        data = pickle.load(f)
    x_train, y_train = [], []
    x_val, y_val = [], []
    x_dev, y_dev = {}, {}
    for code in language_codes:

        true_x, true_y = [], []
        false_x, false_y = [], []
        for i, (xsample, ysample) in enumerate(
                zip(data[code]['x_train'], data[code]['y_train'])):

            if sum(ysample) > 0:
                true_x.append(xsample)
                if binary:
                    ysample = [0 if y == 0 else 1 for y in ysample]
                true_y.append(ysample)

        max_len = max([len(y) for y in true_y])
        for xsample, ysample in zip(data[code]['x_train'],
                                    data[code]['y_train']):
            if sum(ysample) == 0 and len(ysample) < max_len:
                false_x.append(xsample)
                false_y.append(ysample)

        false_x = np.array(false_x)
        false_y = np.array(false_y)

        np.random.seed(seed)
        idx = np.random.randint(len(false_y), size=int(percent * len(true_y)))
        false_x = false_x[idx].tolist()
        false_y = false_y[idx].tolist()

        x_train += true_x + false_x
        y_train += true_y + false_y

        x_dev[code] = data[code]["x_dev"]
        y_dev[code] = data[code]["y_dev"]


    if split:
        x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                          y_train,
                                                          random_state=seed,
                                                          test_size=0.15,
                                                          shuffle=True)
    else:
        x_val, y_val = [], []
        for code in language_codes:
            x_val += data[code]["x_dev"]
            y_val += data[code]["y_dev"]

        
    del data

    return (x_train, y_train), (x_val, y_val), (x_dev, y_dev)


In [9]:
""" Utilities module. """


def build_model_name(args, model='rnn-cnn'):
    name = ''
    if model == 'rnn':
        name = ("{0}.{1}.{2}layers.{3}lstm.{4}dropout.{5}init.{6}activation"
                "{7}clipnorm.{8}batch.{9}epochs".format(
                    args.bert_type, args.metric, args.nlayers, args.lstm_size,
                    args.dropout, args.initrange, args.output_activation,
                    args.clipnorm, args.batch_size, args.max_epochs
                ))
    elif model == 'cnn':
        name = ("{0}.{1}.{2}filters.{3}kernels.{4}poolstride.{5}dropout."
                "{6}activation.{7}batch.{8}epochs".format(
                    args.bert_type, args.metric, args.nfilters, args.kernels,
                    args.pool_stride, args.dropout, args.output_activation,
                    args.batch_size, args.max_epochs
                ))
    elif model == 'nn':
        name = ("{0}.{1}.{2}hidden.{3}dropout.{4}activation.{5}batch.{6}epochs".format(
                    args.bert_type, args.metric, args.hidden_size, args.dropout,
                    args.output_activation, args.batch_size, args.max_epochs
                ))
    elif model == 'cnn-rnn':
         name = ("{0}.{1}.{2}filters.{3}kernels.{4}poolstride.{5}layers."
                "{6}lstm.{7}dropout.{8}init.{9}activation.{10}batch."
                "{11}epochs".format(
                    args.bert_type, args.metric, args.nfilters, args.kernels,
                    args.pool_stride, args.nlayers, args.lstm_size, args.dropout,
                    args.initrange, args.output_activation, args.batch_size,
                    args.max_epochs
                ))
    return name

In [10]:
import torch
import skorch
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from torchtext.data import Dataset, Field, Example, BucketIterator
from tensorflow.keras.utils import to_categorical


class SkorchBucketIterator(BucketIterator):

    def __init__(self,
                 dataset,
                 batch_size,
                 sort_key=None,
                 device=None,
                 batch_size_fn=None,
                 train=True,
                 repeat=False,
                 shuffle=None,
                 sort=None,
                 sort_within_batch=None,
                 one_hot=True,
                 num_classes=2):
        self.one_hot = one_hot
        self.num_classes = num_classes
        super(SkorchBucketIterator,
              self).__init__(dataset, batch_size, sort_key, device,
                             batch_size_fn, train, repeat, shuffle, sort,
                             sort_within_batch)

    def __iter__(self):
        for batch in super().__iter__():
            # We make a small modification: Instead of just returning batch
            # we return batch.text and batch.label, corresponding to X and y
            # if self.train:
            if self.one_hot:
                y = batch.labels.to('cpu')
                y = to_categorical(y, num_classes=self.num_classes)
                y = torch.tensor(y).to(self.device)
                batch.labels = y
            else:
                batch.labels = batch.labels.float()
            yield batch.sentence, batch.labels


class SentenceDataset(Dataset):

    def __init__(self, data, min_len=5, **kwargs):
        self.min_len = min_len
        text_field = Field(use_vocab=False, pad_token=0, batch_first=True)
        label_field = Field(use_vocab=False, pad_token=-1, batch_first=True)
        fields = [("sentence", text_field), ("labels", label_field)]
        examples = []
        for (x, y) in zip(data[0], data[1]):
            if len(x) < self.min_len:     # pad all sequences shorter than this
                x += [0] * (5 - len(x))
                y += [-1] * (5 - len(y))
            examples.append(Example.fromlist([x, y], fields))
        super().__init__(examples, fields, **kwargs)


class IdiomClassifier(skorch.NeuralNetClassifier):

    def __init__(self, print_report=True, class_weights=None, score_average='binary', *args, **kwargs):
        self.print_report = print_report
        self.class_weights = class_weights
        self.score_average = score_average
        if class_weights is None:
            self.class_weights = [1.0, 1.0]
        super(IdiomClassifier, self).__init__(*args, **kwargs)
        self.set_params(callbacks__valid_acc=None)
        self.set_params(criterion__reduction='none')

    def get_loss(self, y_pred, y_true, X, *args, **kwargs):
        if isinstance(self.criterion_, torch.nn.BCELoss):
            loss = super().get_loss(
                y_pred.view(-1), y_true.view(-1), X, *args, **kwargs)
        else:
            if isinstance(self.criterion_, torch.nn.NLLLoss):
                y_pred = self.module.output_activation(y_pred, dim=2)
            loss = super().get_loss(
                y_pred.view(-1, self.module.noutputs),
                y_true.long().view(-1), X, *args, **kwargs)

        if self.class_weights is not None:
            weights = torch.ones_like(y_true) * y_true
            for w, weight in enumerate(self.class_weights):
                weights = torch.where(
                    y_true == w,
                    torch.tensor(weight).float().to(self.device),
                    weights)
            loss = (loss * weights.view(-1))
        if isinstance(self.criterion_, torch.nn.BCELoss):
            mask = (y_true >= 0).int()
            loss = (loss * mask.view(-1))
        return loss.mean()

    def predict_proba(self, X):
        self.module.eval()
        y_pred = self.module(X)

        if self.module.output_activation == 'softmax':
            y_pred = F.softmax(y_pred, dim=2)
        else:
            y_pred = torch.sigmoid(y_pred)

        return y_pred

    def predict(self, X):
        y_pred = self.predict_proba(X)

        if self.module.noutputs > 1:
            y_pred = torch.argmax(y_pred, dim=2)
        else:
            y_pred = (y_pred > 0.5).int()

        return y_pred

    def score(self, X, y=None):
        self.module.eval()
        ds = self.get_dataset(X)
        target_iterator = self.get_iterator(ds, training=False)

        y_true = []
        y_pred = []
        for x, y in target_iterator:
            preds = self.predict(x)
            y_pred.append(preds.view(-1))
            if len(y.shape) > 2:
                y = torch.argmax(y, dim=2)
            y_true.append(y.view(-1))
        y_true = torch.cat(y_true).cpu().view(-1).detach().numpy().tolist()
        y_pred = torch.cat(y_pred).cpu().view(-1).detach().numpy().tolist()

        tt, tp = [], []
        for t, p in zip(y_true, y_pred):
            if t >= 0:
                tt.append(t)
                tp.append(p)

        y_true = tt
        y_pred = tp

        if self.print_report:
            print('Confusion matrix')
            print(confusion_matrix(y_true, y_pred))
            print(classification_report(y_true, y_pred))
        return f1_score(y_true, y_pred, average=self.score_average)


class CustomScorer(skorch.callbacks.EpochScoring):

    def on_epoch_end(self, net, dataset_train, dataset_valid, **kwargs):
        current_score = net.score(dataset_valid)
        self._record_score(net.history, current_score)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NNClassifier(nn.Module):
    def __init__(self, config, transformer, transformer_device):
        super(NNClassifier, self).__init__()

        self.transformer_device = transformer_device
        self.model_device = transformer_device
        self.transformer = transformer
        self.dropout = nn.Dropout(config.dropout)
        self.ninputs = transformer.embeddings.word_embeddings.embedding_dim
        if config.hidden_size > 0:
            self.fully_connected1 = nn.Linear(self.ninputs, config.hidden_size)
            ninputs_to_classifier = config.hidden_size
        else:
            self.fully_connected1 = None
            ninputs_to_classifier = self.ninputs

        self.noutputs = 1
        if config.labels == 'multilabel':
            self.noutputs = config.num_outputs
        else:
            if config.output_activation == 'softmax':
                self.noutputs = 2

        self.fully_connected = nn.Linear(ninputs_to_classifier, self.noutputs)

        self.output_activation = ('sigmoid'  # pylint: disable=no-member
                                  if self.noutputs == 1
                                  else ('sigmoid'
                                        if config.output_activation == 'sigmoid'
                                        else 'softmax'))

    def to(self, *args, **kwargs):
        self = super().to(*args, **kwargs)
        self.transformer = self.transformer.to(
            torch.device(self.transformer_device))
        self.model_device = next(self.fully_connected.parameters()).device.type
        return self

    def freeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = False

    def unfreeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = x.to(self.transformer_device)
        m = (x > 0).int()
        x = self.transformer(x, attention_mask=m)[0]
        #
        #
        if self.transformer_device != self.model_device:
            x = x.to(self.model_device)

        if self.fully_connected1 is not None:
            x = F.elu(self.fully_connected1(self.dropout(x)))

        return self.fully_connected(self.dropout(x))


class CNNClassifier(nn.Module):
    def __init__(self, config, transformer, transformer_device):
        super(CNNClassifier, self).__init__()

        self.transformer_device = transformer_device
        self.model_device = transformer_device
        self.transformer = transformer
        self.convolutions = nn.ModuleList([
            nn.Conv1d(
                in_channels=transformer.embeddings.word_embeddings.embedding_dim,
                out_channels=config.nfilters,
                kernel_size=kernel_size,
                stride=1) for kernel_size in config.kernels])

        self.pool_stride = config.pool_stride
        self.dropout = nn.Dropout(config.dropout)

        ninputs = (config.nfilters // config.pool_stride) * len(config.kernels)

        self.noutputs = 1
        if config.labels == 'multilabel':
            self.noutputs = config.num_outputs
        else:
            if config.output_activation == 'softmax':
                self.noutputs = 2

        self.fully_connected = nn.Linear(ninputs, self.noutputs)

        self.output_activation = ('sigmoid'  # pylint: disable=no-member
                                  if self.noutputs == 1
                                  else ('sigmoid'
                                        if config.output_activation == 'sigmoid'
                                        else 'softmax'))

    def to(self, *args, **kwargs):
        self = super().to(*args, **kwargs)
        self.transformer = self.transformer.to(
            torch.device(self.transformer_device))
        self.model_device = next(self.fully_connected.parameters()).device.type
        return self

    def freeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = False

    def unfreeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = x.to(self.transformer_device)
        m = (x > 0).int()
        x = self.transformer(x, attention_mask=m)[0].transpose(1, 2)
        #
        seq_len = x.shape[-1]
        #
        if self.transformer_device != self.model_device:
            x = x.to(self.model_device)
        #
        x = [F.elu(conv(x)).transpose(1, 2) for conv in self.convolutions]
        x = [nn.functional.pad(i, (0, 0, 0, seq_len - i.shape[1])) for i in x]
        x = [F.max_pool1d(c, self.pool_stride) for c in x]
        x = torch.cat(x, dim=2)  # pylint: disable=no-member

        return self.fully_connected(self.dropout(x))

class RNNClassifier(nn.Module):
    def __init__(self, config, transformer, transformer_device):
        super(RNNClassifier, self).__init__()

        self.transformer_device = transformer_device
        self.model_device = transformer_device
        self.transformer = transformer

        self.lstm = nn.LSTM(
            input_size=transformer.embeddings.word_embeddings.embedding_dim,
            hidden_size=config.lstm_size,
            num_layers=config.nlayers,
            batch_first=True,
            dropout=config.dropout)

        self.dropout = nn.Dropout(config.dropout)
        self.noutputs = 1
        if config.labels == 'multilabel':
            self.noutputs = config.num_outputs
        else:
            if config.output_activation == 'softmax':
                self.noutputs = 2

        self.fully_connected = nn.Linear(config.lstm_size, self.noutputs)

        self.output_activation = ('sigmoid'  # pylint: disable=no-member
                                  if self.noutputs == 1
                                  else ('sigmoid'
                                        if config.output_activation == 'sigmoid'
                                        else 'softmax'))
        self.init_weights(config.initrange)

    def to(self, *args, **kwargs):
        self = super().to(*args, **kwargs)
        self.transformer = self.transformer.to(
            torch.device(self.transformer_device))
        self.model_device = next(self.fully_connected.parameters()).device.type
        return self

    def freeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = False

    def unfreeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = x.to(self.transformer_device)
        m = (x > 0).int()
        x = self.transformer(x, attention_mask=m)[0]
        #
        seq_len = x.shape[-1]
        #
        if self.transformer_device != self.model_device:
            x = x.to(self.model_device)
        #
        x, _ = self.lstm(x)

        return self.fully_connected(self.dropout(x))

    def init_weights(self, initrange):
        for names in self.lstm._all_weights:
            for name in filter(lambda n: "bias" in n, names):
                bias = getattr(self.lstm, name)
                n = bias.size(0)
                start, end = n//4, n//2
                bias.data[start:end].fill_(1.)
            for name in filter(lambda n: "weight" in n,  names):
                weight = getattr(self.lstm, name)
                weight.data.uniform_(-initrange, initrange)

        self.fully_connected.bias.data.fill_(0)
        self.fully_connected.weight.data.uniform_(-initrange, initrange)


class CNNRNNClassifier(nn.Module):
    def __init__(self, config, transformer, transformer_device):
        super(CNNRNNClassifier, self).__init__()

        self.transformer_device = transformer_device
        self.model_device = transformer_device
        self.transformer = transformer

        self.convolutions = nn.ModuleList([
            nn.Conv1d(
                in_channels=transformer.embeddings.word_embeddings.embedding_dim,
                out_channels=config.nfilters,
                kernel_size=kernel_size,
                stride=1) for kernel_size in config.kernels])

        self.pool_stride = config.pool_stride

        ninputs = (config.nfilters // config.pool_stride) * len(config.kernels)
        self.lstm = nn.LSTM(
            input_size=ninputs,
            hidden_size=config.lstm_size,
            num_layers=config.nlayers,
            batch_first=True,
            dropout=config.dropout)

        self.dropout = nn.Dropout(config.dropout)

        self.noutputs = 1
        if config.labels == 'multilabel':
            self.noutputs = config.num_outputs
        else:
            if config.output_activation == 'softmax':
                self.noutputs = 2
        self.fully_connected = nn.Linear(config.lstm_size, self.noutputs)

        self.output_activation = ('sigmoid'  # pylint: disable=no-member
                                  if self.noutputs == 1
                                  else ('sigmoid'
                                        if config.output_activation == 'sigmoid'
                                        else 'softmax'))
        self.init_weights(config.initrange)

    def to(self, *args, **kwargs):
        self = super().to(*args, **kwargs)
        self.transformer = self.transformer.to(
            torch.device(self.transformer_device))
        self.model_device = next(self.fully_connected.parameters()).device.type
        return self

    def freeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = False

    def unfreeze_transformer(self):
        for param in self.transformer.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = x.to(self.transformer_device)
        m = (x > 0).int()
        x = self.transformer(x, attention_mask=m)[0].transpose(1, 2)
        #
        seq_len = x.shape[-1]
        if self.transformer_device != self.model_device:
            x = x.to(self.model_device)
        #
        x = [F.elu(conv(x)).transpose(1, 2) for conv in self.convolutions]
        x = [nn.functional.pad(i, (0, 0, 0, seq_len - i.shape[1])) for i in x]
        x = [F.max_pool1d(c, self.pool_stride) for c in x]
        x = torch.cat(x, dim=2)  # pylint: disable=no-member
        x = self.dropout(x)
        #
        x, _ = self.lstm(x)
        return self.fully_connected(self.dropout(x))

    def init_weights(self, initrange):
        for conv in self.convolutions:
            conv.weight.data.uniform_(-initrange, initrange)
            conv.bias.data.fill_(0)
        for names in self.lstm._all_weights:
            for name in filter(lambda n: "bias" in n, names):
                bias = getattr(self.lstm, name)
                n = bias.size(0)
                start, end = n//4, n//2
                bias.data[start:end].fill_(1.)
            for name in filter(lambda n: "weight" in n,  names):
                weight = getattr(self.lstm, name)
                weight.data.uniform_(-initrange, initrange)
        self.fully_connected.weight.data.uniform_(-initrange, initrange)
        self.fully_connected.bias.data.fill_(0)

In [12]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')     # pylint: disable=no-member
LANGUAGE_CODES = ['DE', 'EL', 'EU', 'FR', 'GA', 'HE', 'HI', 'IT', 'PL', 'PT', 'RO', 'SV', 'TR',  'ZH']
# LANGUAGE_CODES = ['DE',  'GA',  'HI', 'PT',  'ZH']
CWD = os.getcwd()
BASE_DIR = '/content/gdrive/My Drive/mwe_sharedtask/data'     # this will point to the user's home
TRAIN_DIR = "transformer/cnn"
DEVICE

device(type='cuda')

In [13]:
parser = argparse.ArgumentParser(description='Classifier using CNNs')
parser.add_argument(
    '--bert_type',
    type=str,
    default='distilbert-base-multilingual-cased',
    help='transormer model [should be a miltilingual model]')
parser.add_argument(
    '--bert_device',
    type=str,
    default='gpu',
    help='device to run the transformer model')
parser.add_argument(
    '--labels',
    type=str,
    default='multilabel',
    help='multilabel or binary classification')
parser.add_argument(
    '--metric',
    type=str,
    default='f1',
    help='sklearn metric to evaluate the model while training')
parser.add_argument(
    '--nfilters',
    type=int,
    default=768,
    help='number of convolution filters')
parser.add_argument(
    '--kernels',
    type=list,
    default=[1, 2, 3, 4, 5],
    help='number of convolution filters')
parser.add_argument(
    '--pool_stride',
    type=int,
    default=5,
    help='size of the stride for the pooling operation')
parser.add_argument(
    '--hidden_size',
    type=int,
    default=100,
    help='size of pre-classifier in case of feedforward')
parser.add_argument(
    '--nlayers',
    type=int,
    default=2,
    help='number of convolution filters')
parser.add_argument(
    '--lstm_size',
    type=int,
    default=100,
    help='number of convolution filters')
parser.add_argument(
    '--dropout',
    type=float,
    default=0.5,
    help='dropout probability for the dense layer')
parser.add_argument(
    '--initrange',
    type=float,
    default=0.1,
    help='range to initialize the lstm layers')
parser.add_argument(
    '--clipnorm',
    type=float,
    default=5.0,
    help='limit to clip the l2 norm of gradients')
parser.add_argument(
    '--output_activation',
    type=str,
    default='softmax',
    help='output activation')
parser.add_argument(
    '--batch_size',
    type=int,
    default=32,
    help='training batch size')
parser.add_argument(
    '--eval_batch_size',
    type=int,
    default=1,
    help='validation/evaluation batch size')
parser.add_argument(
    '--max_epochs',
    type=int,
    default=100,
    help='max number of epochs to train the model')
parser.add_argument(
    "--train_dir",
    type=str,
    default=os.path.join(BASE_DIR, TRAIN_DIR) + "/",
    help="Train dir")
parser.add_argument(
    "--eval",
    action="store_true",
    help="eval at the end of the training process")


_StoreTrueAction(option_strings=['--eval'], dest='eval', nargs=0, const=True, default=False, type=None, choices=None, help='eval at the end of the training process', metavar=None)

In [14]:
args = parser.parse_args()
args.kernels = [int(i) for i in args.kernels if ',' not in str(i)]
transformer_device = torch.device(
    'cuda' if torch.cuda.is_available() and args.bert_device == 'gpu'
    else 'cpu')

ONE_HOT_OUTPUT = False #args.output_activation == 'softmax' and args.labels == 'binary'
transformer_device

device(type='cuda')

In [15]:
(x_train, y_train), (x_val, y_val), (x_dev, y_dev) = load_tokenized_data(
    datafile='{}/{}{}.tokenized.all.pkl'.format(BASE_DIR, args.bert_type, '' if args.labels == 'binary' else '.multilabel'),
    language_codes=LANGUAGE_CODES, percent=0.0, split=False,
    seed=SEED)

targets = np.concatenate(y_train).reshape(-1)
class_weights = compute_class_weight(class_weight='balanced',
                                     classes=np.unique(targets),
                                     y=targets)
args.num_outputs = len(class_weights)
args.num_outputs

10

In [16]:
class_weights

array([1.11256854e-01, 1.98904153e+02, 8.01573662e+00, 3.47987189e+01,
       2.45680017e+00, 2.79995846e+03, 2.29504792e+01, 3.02331141e+00,
       3.44626586e+01, 2.35138630e+01])

In [17]:
np.unique(targets)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
tokenizer = AutoTokenizer.from_pretrained(args.bert_type)
transformer = AutoModel.from_pretrained(args.bert_type, force_download=True)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=541808922.0, style=ProgressStyle(descri…




In [19]:
model = CNNRNNClassifier(args, transformer, transformer_device)
model_name = build_model_name(args, model='cnn-rnn')
model.to(DEVICE)     # pylint: disable=no-member
model.freeze_transformer()
print()




In [20]:
metric = 'valid_loss'
lower_is_better = True if metric == 'valid_loss' else False

progress_bar = ProgressBar(batches_per_epoch=len(x_train) // args.batch_size + 1)
scorer = CustomScorer(scoring=None, name='F1', lower_is_better=False, use_caching=False)
early_stopping =  EarlyStopping(monitor=metric, patience=20, lower_is_better=lower_is_better)
checkpoint = Checkpoint(
    monitor='{}_best'.format(metric),
    dirname=args.train_dir,
    f_params='{}.params.pt'.format(model_name),
    f_optimizer='{}.optimizer.pt'.format(model_name),
    f_history='{}.history.json'.format(model_name))

In [21]:
net = IdiomClassifier(
    module=model,
    class_weights=class_weights,
    print_report=False,
    score_average='weighted',
     #
    iterator_train=SkorchBucketIterator,
    iterator_train__batch_size=args.batch_size,
    iterator_train__sort_key=lambda x: len(x.sentence),
    iterator_train__shuffle=True,
    iterator_train__device=DEVICE,
    iterator_train__one_hot=ONE_HOT_OUTPUT,
     #
    iterator_valid=SkorchBucketIterator,
    iterator_valid__batch_size=args.eval_batch_size,
    iterator_valid__sort_key=lambda x: len(x.sentence),
    iterator_valid__shuffle=True,
    iterator_valid__device=DEVICE,
    iterator_valid__one_hot=ONE_HOT_OUTPUT,

    train_split=predefined_split(SentenceDataset(data=(x_val, y_val))),
    optimizer=torch.optim.SGD,
    criterion=nn.BCELoss if args.labels == 'binary' else nn.CrossEntropyLoss,
    criterion__ignore_index=-1,
    criterion__reduction='none',
    callbacks=[progress_bar, scorer, early_stopping, checkpoint],
    # callbacks=[scorer, early_stopping, checkpoint],
    device=DEVICE,
)

In [None]:
net.fit(SentenceDataset(data=(x_train, y_train)), y=None, epochs=args.max_epochs)
print()

  1%|          | 10/1198 [00:04<08:22,  2.37it/s, train_loss=0.545]

In [None]:
net.initialize()
net.load_params(checkpoint=checkpoint)

In [None]:
# print(model_name)
net.print_report = True
args.eval = True
# LANGUAGE_CODES =['DE', 'GA', 'HI', 'PT', 'ZH']
# if args.eval:
for code in LANGUAGE_CODES:
    print('#' * 20)
    print('# Evaluating Language: {}'.format(code))
    print('#' * 20)
    test_iterator = SkorchBucketIterator(
        dataset=SentenceDataset(data=(x_dev[code], y_dev[code])),
        batch_size=1,
        sort=False,
        sort_key=lambda x: len(x.sentence),
        shuffle=False,
        train=False,
        one_hot=ONE_HOT_OUTPUT,
        device=DEVICE)
    args.dev_file = '{}/{}/dev.cupt'.format(BASE_DIR, code)
    evaluate_model(net, test_iterator, tokenizer, args)

In [None]:
print("#" * 20)
print("\nTraining finished!!!")