# Models training for sequence labeling

## Imports

In [None]:
!pip3 install gdown spacy stanza torch==1.13.1 accelerate transformers evaluate seqeval --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.5/802.5 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m219.1/219.1 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m112.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m64.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m3.1 MB/s

In [None]:
from ast import literal_eval
from google.colab import drive
import locale
import os
import random
import numpy as np
import pandas as pd

import spacy
import stanza

import torch

from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import TrainingArguments, Trainer
from transformers import pipeline

import evaluate

In [None]:
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
ner = evaluate.load('seqeval')

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [None]:
def seed_everything(seed=42) -> None:
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False

In [None]:
seed_everything()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
BIO_sent = ['B-POS', 'I-POS', 'B-NEG', 'I-NEG', 'B-NEUT', 'I-NEUT', 'O']

## Getting data

In [None]:
# !gdown 10e9c3EgaIqrxYkt_q69hZsjuc1E9sYu- -O aspects --folder

In [None]:
!cp -r '/content/drive/MyDrive/Summarization/aspects' '/content/'

Sample:

In [None]:
generic = lambda x: literal_eval(x)
converters = {'sentence_tokens': generic,
        'aspect_labels': generic,
        'sentiment_labels': generic}

In [None]:
train = pd.read_csv('/content/aspects/train_max_ner.tsv', delimiter='\t', converters=converters)

In [None]:
train.head()

Unnamed: 0,review_id,sentence_text,sentence_tokens,aspect_labels,sentiment_labels
0,10231,"Я несколько раз была в этом заведении,о кухне ...","[Я, несколько, раз, была, в, этом, заведении, ...","[2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2]","[6, 6, 6, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 6, 6]"
1,10231,"Потрясающая паста с лососем,очень вкусные супч...","[Потрясающая, паста, с, лососем, ,, очень, вку...","[2, 0, 1, 1, 2, 2, 0, 0, 2, 2, 0, 2]","[6, 0, 1, 1, 6, 6, 0, 0, 6, 6, 0, 6]"
2,10231,"Последний раз была с друзьями,все остались дов...","[Последний, раз, была, с, друзьями, ,, все, ос...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2]","[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 6, 0, 6]"
3,10231,"Интерьер не плохой, несколько залов на разный ...","[Интерьер, не, плохой, ,, несколько, залов, на...","[0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2]","[0, 6, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6]"
4,10231,Вообщем стоящее место для обеда и посиделок с ...,"[Вообщем, стоящее, место, для, обеда, и, посид...","[2, 2, 0, 2, 2, 2, 2, 2, 2, 2]","[6, 6, 0, 6, 6, 6, 6, 6, 6, 6]"


In [None]:
train['sentence_tokens'].values.tolist()[:5]

[['Я',
  'несколько',
  'раз',
  'была',
  'в',
  'этом',
  'заведении',
  ',',
  'о',
  'кухне',
  'могу',
  'сказать',
  'только',
  'самое',
  'хорошее',
  '.'],
 ['Потрясающая',
  'паста',
  'с',
  'лососем',
  ',',
  'очень',
  'вкусные',
  'супчики',
  ',',
  'отличные',
  'десерты',
  '.'],
 ['Последний',
  'раз',
  'была',
  'с',
  'друзьями',
  ',',
  'все',
  'остались',
  'довольны',
  'и',
  'обслуживанием',
  'и',
  'едой',
  '.'],
 ['Интерьер',
  'не',
  'плохой',
  ',',
  'несколько',
  'залов',
  'на',
  'разный',
  'вкус',
  'и',
  'компанию',
  '.'],
 ['Вообщем',
  'стоящее',
  'место',
  'для',
  'обеда',
  'и',
  'посиделок',
  'с',
  'друзьями',
  '.']]

In [None]:
def get_dataset(path: str) -> tuple:
    '''
    Get dataset from files.
    '''
    dataset = pd.read_csv(path, delimiter='\t', converters=converters)
    data = dataset['sentence_tokens'].values.tolist()
    labels = dataset['sentiment_labels'].values.tolist()

    return data, labels

In [None]:
class ReviewsDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            'input_ids': self.data[idx],
            'labels': self.labels[idx]
        }
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
train_data, train_labels = get_dataset('/content/aspects/train_max_ner.tsv')
eval_data, eval_labels = get_dataset('/content/aspects/eval_max_ner.tsv')
test_data, test_labels = get_dataset('/content/aspects/test_max_ner.tsv')

In [None]:
custom_train_dataset = ReviewsDataset(train_data, train_labels)
custom_eval_dataset = ReviewsDataset(eval_data, eval_labels)
custom_test_dataset = ReviewsDataset(test_data, test_labels)

In [None]:
custom_train_dataset[0]

{'input_ids': ['Я',
  'несколько',
  'раз',
  'была',
  'в',
  'этом',
  'заведении',
  ',',
  'о',
  'кухне',
  'могу',
  'сказать',
  'только',
  'самое',
  'хорошее',
  '.'],
 'labels': [6, 6, 6, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 6, 6]}

Restaurants:

In [None]:
train_restaurants_data, train_restaurants_labels = get_dataset('/content/aspects/train_restaurants_max_ner.tsv')
eval_restaurants_data, eval_restaurants_labels = get_dataset('/content/aspects/test_restaurants_max_ner.tsv')
test_restaurants_data, test_restaurants_labels = get_dataset('/content/aspects/eval_restaurants_max_ner.tsv')

In [None]:
custom_train_restaurants_dataset = ReviewsDataset(train_restaurants_data, train_restaurants_labels)
custom_eval_restaurants_dataset = ReviewsDataset(eval_restaurants_data, eval_restaurants_labels)
custom_test_restaurants_dataset = ReviewsDataset(test_restaurants_data, test_restaurants_labels)

Automobiles:

In [None]:
train_automobiles_data, train_automobiles_labels = get_dataset('/content/aspects/train_automobiles_max_ner.tsv')
eval_automobiles_data, eval_automobiles_labels = get_dataset('/content/aspects/test_automobiles_max_ner.tsv')
test_automobiles_data, test_automobiles_labels = get_dataset('/content/aspects/eval_automobiles_max_ner.tsv')

In [None]:
custom_train_automobiles_dataset = ReviewsDataset(train_automobiles_data, train_automobiles_labels)
custom_eval_automobiles_dataset = ReviewsDataset(eval_automobiles_data, eval_automobiles_labels)
custom_test_automobiles_dataset = ReviewsDataset(test_automobiles_data, test_automobiles_labels)

## Fine-tuning

In [None]:
class NERTrainPipeline:

    def __init__(self, train_ds, val_ds, test_ds, model_checkpoint, new_model_checkpoint, labels):
        self.train_ds = train_ds
        self.val_ds = val_ds
        self.test_ds = test_ds

        self.model_checkpoint = model_checkpoint
        self.new_model_checkpoint = new_model_checkpoint

        self.num_labels = len(labels)
        self.label2id = {label: i for i, label in enumerate(labels)}
        self.id2label = {i: label for i, label in enumerate(labels)}

        self.model = AutoModelForTokenClassification.from_pretrained(
            self.model_checkpoint,
            num_labels=self.num_labels,
            id2label=self.id2label,
            label2id=self.label2id
            )
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_checkpoint)

        self.training_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=64,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=100,
        )

    def _align_labels(self, tokenized_inputs, batch_labels):
        '''
        Align labels with tokens.
        '''
        labels = []
        for i, label in enumerate(batch_labels):
            word_ids = tokenized_inputs.word_ids(i)
            previous_word_idx = None
            label_ids = []
            for word_idx in word_ids:
                if word_idx is None:
                    label_ids.append(-100)
                elif word_idx != previous_word_idx:
                    previous_word_idx = word_idx
                    label_ids.append(label[word_idx])
                elif word_idx == previous_word_idx:
                    label_ids.append(label[word_idx])
                else:
                    label_ids.append(-100)
                
            labels.append(label_ids)

        tokenized_inputs['labels'] = labels
        return tokenized_inputs

    def _collate_fn(self, batch):
        '''
        Data collator function for aligning labels in the batch.
        '''
        inputs = self.tokenizer([x['input_ids'] for x in batch], truncation=True, padding=True, max_length=100, is_split_into_words=True)
        inputs = self._align_labels(inputs, [x['labels'] for x in batch])
        return {
        'input_ids': torch.tensor(inputs['input_ids']),
        'labels': torch.tensor(inputs['labels'])
        }

    def train(self):
        '''
        Fine-tune model on the downstream task.
        '''
        trainer = Trainer(
            model=self.model,
            args=self.training_args,
            data_collator=self._collate_fn,
            train_dataset=self.train_ds,
            eval_dataset=self.val_ds
        )

        trainer.train()
        trainer.save_model(self.new_model_checkpoint)

        return

    def inference(self):
        '''
        Evaluate models on the test dataset.
        '''
        new_ner_model = AutoModelForTokenClassification.from_pretrained(
            self.new_model_checkpoint,
            num_labels=self.num_labels,
            id2label=self.id2label,
            label2id=self.label2id
            )

        references = []
        predictions = []

        for sent in self.test_ds:
            encodings = self.tokenizer(sent['input_ids'], truncation=True, padding=True, is_split_into_words=True)
            inputs = self.tokenizer.encode(sent['input_ids'], truncation=True, padding=True, is_split_into_words=True, return_tensors="pt")

            outputs = new_ner_model(inputs)[0].to(device)
            preds = torch.argmax(outputs.to('cpu'), dim=2)[0].tolist()

            aligned_preds = []
            word_ids = encodings.word_ids()
            previous_word_idx = None
            for idx, word_idx in enumerate(word_ids):
                if word_idx != previous_word_idx and word_idx is not None:
                    previous_word_idx = word_idx
                    aligned_preds.append(preds[idx])

            references.append([self.id2label.get(i, None) for i in sent['labels']])
            predictions.append([self.id2label.get(i, None) for i in aligned_preds])

        return ner.compute(predictions=predictions, references=references)

    @classmethod
    def display_aspects(text, model, tokenizer) -> None:
        '''
        Display aspects in the sentence using spacy.displacy.
        '''
        classifier = pipeline('ner', model=model, tokenizer=tokenizer)
        result = classifier(text)

        ents = []
        for elem in result:
            if elem['entity'].startswith('B'):
                if not elem['word'].startswith('##'):
                    e = {}
                    e['start'] = elem['start']
                    e['end'] = elem['end']
                    e['label'] = elem['entity'].split('-')[1]
                    ents.append(e)
                else:
                    last_entity = ents[-1]
                    last_entity['end'] = elem['end']
            elif elem['entity'].startswith('I'):
                last_entity = ents[-1]
                last_entity['end'] = elem['end'] 

        render_data = {'text': text, 'ents': ents, 'title': 'Sentence', 'settings': {'lang': 'ru', 'direction': 'ltr'}}

        spacy.displacy.render(render_data, style='ent', manual=True, jupyter=True)

## ruBERT

### Both

In [None]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'ai-forever/ruBert-base', 'rubert-sentiment-seqlabeling_both', BIO_sent)

Downloading (…)lve/main/config.json:   0%|          | 0.00/590 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/716M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

In [None]:
ner_trainer_rubert.model

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(120138, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [None]:
ner_trainer_rubert.train()



Step,Training Loss
100,1.1908
200,0.6612
300,0.5448
400,0.4803
500,0.3822
600,0.3733
700,0.3521
800,0.3578
900,0.2672
1000,0.1842


In [None]:
print(ner_trainer_rubert.inference())

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'NEG': {'precision': 0.46167557932263814, 'recall': 0.591324200913242, 'f1': 0.5185185185185185, 'number': 438}, 'NEUT': {'precision': 0.4780600461893764, 'recall': 0.4847775175644028, 'f1': 0.4813953488372093, 'number': 427}, 'POS': {'precision': 0.671976401179941, 'recall': 0.7236340533672173, 'f1': 0.6968491893545428, 'number': 1574}, 'overall_precision': 0.5968761621420603, 'overall_recall': 0.6580565805658056, 'overall_f1': 0.62597503900156, 'overall_accuracy': 0.8859181069481499}


In [None]:
del ner_trainer_rubert

### Restaurants

In [None]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'ai-forever/ruBert-base', 'rubert-sentiment-seqlabeling_restaurants', BIO_sent)
ner_trainer_rubert.train()

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Step,Training Loss
100,1.0811
200,0.5832
300,0.4575
400,0.4224
500,0.3136
600,0.2624


In [None]:
print(ner_trainer_rubert.inference())
del ner_trainer_rubert

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'NEG': {'precision': 0.39928057553956836, 'recall': 0.44047619047619047, 'f1': 0.4188679245283019, 'number': 252}, 'NEUT': {'precision': 0.48295454545454547, 'recall': 0.44041450777202074, 'f1': 0.4607046070460705, 'number': 193}, 'POS': {'precision': 0.6464516129032258, 'recall': 0.7433234421364985, 'f1': 0.6915113871635611, 'number': 674}, 'overall_precision': 0.5671277461350691, 'overall_recall': 0.6228775692582663, 'overall_f1': 0.5936967632027257, 'overall_accuracy': 0.8861259586335115}


### Automobiles

In [None]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'ai-forever/ruBert-base', 'rubert-sentiment-seqlabeling_automobiles', BIO_sent)
ner_trainer_rubert.train()

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Step,Training Loss
100,1.1429
200,0.6654
300,0.4857
400,0.4357
500,0.3273
600,0.2974


In [None]:
print(ner_trainer_rubert.inference())
del ner_trainer_rubert

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'NEG': {'precision': 0.5657894736842105, 'recall': 0.6013986013986014, 'f1': 0.5830508474576271, 'number': 286}, 'NEUT': {'precision': 0.45222929936305734, 'recall': 0.398876404494382, 'f1': 0.42388059701492536, 'number': 178}, 'POS': {'precision': 0.6193771626297578, 'recall': 0.6605166051660517, 'f1': 0.6392857142857143, 'number': 542}, 'overall_precision': 0.5784408084696824, 'overall_recall': 0.5974155069582505, 'overall_f1': 0.5877750611246944, 'overall_accuracy': 0.88604788649128}


## mBERT

### Both

In [None]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'bert-base-multilingual-cased', 'mbert-sentiment-seqlabeling_both', BIO_sent)
ner_trainer_mbert.train()

Downloading (…)lve/main/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]



Step,Training Loss
100,1.2222
200,0.7886
300,0.7274
400,0.6717
500,0.5805
600,0.5636
700,0.5368
800,0.5123
900,0.4266
1000,0.3373


In [None]:
print(ner_trainer_mbert.inference())

{'NEG': {'precision': 0.3415061295971979, 'recall': 0.4452054794520548, 'f1': 0.38652130822596625, 'number': 438}, 'NEUT': {'precision': 0.4034229828850856, 'recall': 0.3864168618266979, 'f1': 0.39473684210526316, 'number': 427}, 'POS': {'precision': 0.6331325301204819, 'recall': 0.667725540025413, 'f1': 0.6499690785405071, 'number': 1574}, 'overall_precision': 0.5344696969696969, 'overall_recall': 0.5785157851578516, 'overall_f1': 0.5556211852726914, 'overall_accuracy': 0.8696786915671035}


In [None]:
del ner_trainer_mbert

### Restaurants

In [None]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'bert-base-multilingual-cased', 'mbert-sentiment-seqlabeling_restaurants', BIO_sent)
ner_trainer_mbert.train()

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Step,Training Loss
100,1.1804
200,0.7176
300,0.6044
400,0.5829
500,0.4714
600,0.4252


In [None]:
print(ner_trainer_mbert.inference())
del ner_trainer_mbert

{'NEG': {'precision': 0.3179916317991632, 'recall': 0.30158730158730157, 'f1': 0.30957230142566194, 'number': 252}, 'NEUT': {'precision': 0.49142857142857144, 'recall': 0.44559585492227977, 'f1': 0.46739130434782605, 'number': 193}, 'POS': {'precision': 0.5742331288343558, 'recall': 0.6943620178041543, 'f1': 0.628609805238415, 'number': 674}, 'overall_precision': 0.5126118795768918, 'overall_recall': 0.5630026809651475, 'overall_f1': 0.536626916524702, 'overall_accuracy': 0.8659075063908901}


### Automobiles

In [None]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'bert-base-multilingual-cased', 'mbert-sentiment-seqlabeling_automobiles', BIO_sent)
ner_trainer_mbert.train()

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Step,Training Loss
100,1.2606
200,0.8136
300,0.6718
400,0.6193
500,0.4944
600,0.4554


In [None]:
print(ner_trainer_mbert.inference())
del ner_trainer_mbert

{'NEG': {'precision': 0.3560830860534125, 'recall': 0.4195804195804196, 'f1': 0.38523274478330655, 'number': 286}, 'NEUT': {'precision': 0.4228187919463087, 'recall': 0.3539325842696629, 'f1': 0.38532110091743116, 'number': 178}, 'POS': {'precision': 0.519672131147541, 'recall': 0.584870848708487, 'f1': 0.5503472222222222, 'number': 542}, 'overall_precision': 0.4562043795620438, 'overall_recall': 0.4970178926441352, 'overall_f1': 0.4757373929590866, 'overall_accuracy': 0.8579663020987289}


## XLM-RoBERTa

### Both

In [None]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'xlm-roberta-base', 'xlmroberta-sentiment-seqlabeling_both', BIO_sent)
ner_trainer_xlmroberta.train()

Downloading (…)lve/main/config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-st

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]



Step,Training Loss
100,1.4961
200,0.7933
300,0.6222
400,0.5538
500,0.4677
600,0.4586
700,0.4383
800,0.4105
900,0.3431
1000,0.2775


In [None]:
print(ner_trainer_xlmroberta.inference())
del ner_trainer_xlmroberta

{'NEG': {'precision': 0.4262589928057554, 'recall': 0.541095890410959, 'f1': 0.4768611670020121, 'number': 438}, 'NEUT': {'precision': 0.43805309734513276, 'recall': 0.4637002341920375, 'f1': 0.4505119453924915, 'number': 427}, 'POS': {'precision': 0.6515065378055713, 'recall': 0.7280813214739518, 'f1': 0.6876687668766878, 'number': 1574}, 'overall_precision': 0.5713769425370437, 'overall_recall': 0.6482164821648216, 'overall_f1': 0.6073761044948137, 'overall_accuracy': 0.8838301821134439}


### Restaurants

In [None]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'xlm-roberta-base', 'xlmroberta-sentiment-seqlabeling_restaurants', BIO_sent)
ner_trainer_xlmroberta.train()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-st

Step,Training Loss
100,1.4005
200,0.7225
300,0.5832
400,0.5208
500,0.4169
600,0.3668


In [None]:
print(ner_trainer_xlmroberta.inference())
del ner_trainer_xlmroberta

{'NEG': {'precision': 0.417910447761194, 'recall': 0.4444444444444444, 'f1': 0.43076923076923074, 'number': 252}, 'NEUT': {'precision': 0.4662576687116564, 'recall': 0.39378238341968913, 'f1': 0.42696629213483145, 'number': 193}, 'POS': {'precision': 0.62125, 'recall': 0.7373887240356083, 'f1': 0.6743554952510176, 'number': 674}, 'overall_precision': 0.5564581640942323, 'overall_recall': 0.612153708668454, 'overall_f1': 0.5829787234042553, 'overall_accuracy': 0.8822914245874971}


### Automobiles

In [None]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'xlm-roberta-base', 'xlmroberta-sentiment-seqlabeling_automobiles', BIO_sent)
ner_trainer_xlmroberta.train()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-st

Step,Training Loss
100,1.4957
200,0.8762
300,0.6582
400,0.5362
500,0.4445
600,0.4023


In [None]:
print(ner_trainer_xlmroberta.inference())
del ner_trainer_xlmroberta

{'NEG': {'precision': 0.4845679012345679, 'recall': 0.548951048951049, 'f1': 0.5147540983606558, 'number': 286}, 'NEUT': {'precision': 0.47878787878787876, 'recall': 0.4438202247191011, 'f1': 0.4606413994169096, 'number': 178}, 'POS': {'precision': 0.606473594548552, 'recall': 0.6568265682656826, 'f1': 0.6306465899025686, 'number': 542}, 'overall_precision': 0.550185873605948, 'overall_recall': 0.588469184890656, 'overall_f1': 0.568683957732949, 'overall_accuracy': 0.8897428318060893}


Saving

In [None]:
!mv -f '/content/rubert-sentiment-seqlabeling_both' '/content/drive/MyDrive/models/'
!mv -f '/content/mbert-sentiment-seqlabeling_both' '/content/drive/MyDrive/models/'
!mv -f '/content/xlmroberta-sentiment-seqlabeling_both' '/content/drive/MyDrive/models/'