# Models evaluation for sequence labeling

## Imports

In [1]:
!pip3 install gdown spacy stanza torch==1.13.1 accelerate transformers evaluate seqeval --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.5/802.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m219.1/219.1 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m115.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m3.1 MB/s

In [2]:
from ast import literal_eval
from google.colab import drive
import locale
import os
import random
import numpy as np
import pandas as pd

import spacy
import stanza

import torch

from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import TrainingArguments, Trainer
from transformers import pipeline

import evaluate

In [3]:
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [4]:
ner = evaluate.load('seqeval')

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [5]:
def seed_everything(seed=42) -> None:
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False

In [6]:
seed_everything()

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
BIO = ['B-ASPECT', 'I-ASPECT', 'O']

## Getting data

In [None]:
!gdown 10e9c3EgaIqrxYkt_q69hZsjuc1E9sYu- -O aspects --folder

Sample:

In [23]:
generic = lambda x: literal_eval(x)
converters = {'sentence_tokens': generic,
        'aspect_labels': generic,
        'sentiment_labels': generic}

In [24]:
train = pd.read_csv('/content/aspects/train_max_ner.tsv', delimiter='\t', converters=converters)

In [25]:
train.head()

Unnamed: 0,review_id,sentence_text,sentence_tokens,aspect_labels,sentiment_labels
0,10231,"Я несколько раз была в этом заведении,о кухне ...","[Я, несколько, раз, была, в, этом, заведении, ...","[2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2]","[6, 6, 6, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 6, 6]"
1,10231,"Потрясающая паста с лососем,очень вкусные супч...","[Потрясающая, паста, с, лососем, ,, очень, вку...","[2, 0, 1, 1, 2, 2, 0, 0, 2, 2, 0, 2]","[6, 0, 1, 1, 6, 6, 0, 0, 6, 6, 0, 6]"
2,10231,"Последний раз была с друзьями,все остались дов...","[Последний, раз, была, с, друзьями, ,, все, ос...","[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2]","[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 6, 0, 6]"
3,10231,"Интерьер не плохой, несколько залов на разный ...","[Интерьер, не, плохой, ,, несколько, залов, на...","[0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2]","[0, 6, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6]"
4,10231,Вообщем стоящее место для обеда и посиделок с ...,"[Вообщем, стоящее, место, для, обеда, и, посид...","[2, 2, 0, 2, 2, 2, 2, 2, 2, 2]","[6, 6, 0, 6, 6, 6, 6, 6, 6, 6]"


In [26]:
train['sentence_tokens'].values.tolist()[:5]

[['Я',
  'несколько',
  'раз',
  'была',
  'в',
  'этом',
  'заведении',
  ',',
  'о',
  'кухне',
  'могу',
  'сказать',
  'только',
  'самое',
  'хорошее',
  '.'],
 ['Потрясающая',
  'паста',
  'с',
  'лососем',
  ',',
  'очень',
  'вкусные',
  'супчики',
  ',',
  'отличные',
  'десерты',
  '.'],
 ['Последний',
  'раз',
  'была',
  'с',
  'друзьями',
  ',',
  'все',
  'остались',
  'довольны',
  'и',
  'обслуживанием',
  'и',
  'едой',
  '.'],
 ['Интерьер',
  'не',
  'плохой',
  ',',
  'несколько',
  'залов',
  'на',
  'разный',
  'вкус',
  'и',
  'компанию',
  '.'],
 ['Вообщем',
  'стоящее',
  'место',
  'для',
  'обеда',
  'и',
  'посиделок',
  'с',
  'друзьями',
  '.']]

In [27]:
def get_dataset(path: str) -> tuple:
    '''
    Get dataset from files.
    '''
    dataset = pd.read_csv(path, delimiter='\t', converters=converters)
    data = dataset['sentence_tokens'].values.tolist()
    labels = dataset['aspect_labels'].values.tolist()

    return data, labels

In [28]:
class ReviewsDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            'input_ids': self.data[idx],
            'labels': self.labels[idx]
        }
        return item

    def __len__(self):
        return len(self.labels)

In [29]:
train_data, train_labels = get_dataset('/content/aspects/train_max_ner.tsv')
eval_data, eval_labels = get_dataset('/content/aspects/eval_max_ner.tsv')
test_data, test_labels = get_dataset('/content/aspects/test_max_ner.tsv')

In [30]:
custom_train_dataset = ReviewsDataset(train_data, train_labels)
custom_eval_dataset = ReviewsDataset(eval_data, eval_labels)
custom_test_dataset = ReviewsDataset(test_data, test_labels)

In [31]:
custom_train_dataset[0]

{'input_ids': ['Я',
  'несколько',
  'раз',
  'была',
  'в',
  'этом',
  'заведении',
  ',',
  'о',
  'кухне',
  'могу',
  'сказать',
  'только',
  'самое',
  'хорошее',
  '.'],
 'labels': [2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2]}

Restaurants:

In [32]:
train_restaurants_data, train_restaurants_labels = get_dataset('/content/aspects/train_restaurants_max_ner.tsv')
eval_restaurants_data, eval_restaurants_labels = get_dataset('/content/aspects/test_restaurants_max_ner.tsv')
test_restaurants_data, test_restaurants_labels = get_dataset('/content/aspects/eval_restaurants_max_ner.tsv')

In [33]:
custom_train_restaurants_dataset = ReviewsDataset(train_restaurants_data, train_restaurants_labels)
custom_eval_restaurants_dataset = ReviewsDataset(eval_restaurants_data, eval_restaurants_labels)
custom_test_restaurants_dataset = ReviewsDataset(test_restaurants_data, test_restaurants_labels)

Automobiles:

In [34]:
train_automobiles_data, train_automobiles_labels = get_dataset('/content/aspects/train_automobiles_max_ner.tsv')
eval_automobiles_data, eval_automobiles_labels = get_dataset('/content/aspects/test_automobiles_max_ner.tsv')
test_automobiles_data, test_automobiles_labels = get_dataset('/content/aspects/eval_automobiles_max_ner.tsv')

In [35]:
custom_train_automobiles_dataset = ReviewsDataset(train_automobiles_data, train_automobiles_labels)
custom_eval_automobiles_dataset = ReviewsDataset(eval_automobiles_data, eval_automobiles_labels)
custom_test_automobiles_dataset = ReviewsDataset(test_automobiles_data, test_automobiles_labels)

## Fine-tuning

In [36]:
class NERTrainPipeline:

    def __init__(self, train_ds, val_ds, test_ds, model_checkpoint, new_model_checkpoint, labels):
        self.train_ds = train_ds
        self.val_ds = val_ds
        self.test_ds = test_ds

        self.model_checkpoint = model_checkpoint
        self.new_model_checkpoint = new_model_checkpoint

        self.num_labels = len(labels)
        self.label2id = {label: i for i, label in enumerate(labels)}
        self.id2label = {i: label for i, label in enumerate(labels)}

        self.model = AutoModelForTokenClassification.from_pretrained(
            self.model_checkpoint,
            num_labels=self.num_labels,
            id2label=self.id2label,
            label2id=self.label2id
            )
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_checkpoint)

        self.training_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=3,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=64,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=100,
        )

    def _align_labels(self, tokenized_inputs, batch_labels):
        '''
        Align labels with tokens.
        '''
        labels = []
        for i, label in enumerate(batch_labels):
            word_ids = tokenized_inputs.word_ids(i)
            previous_word_idx = None
            label_ids = []
            for word_idx in word_ids:
                if word_idx is None:
                    label_ids.append(-100)
                elif word_idx != previous_word_idx:
                    previous_word_idx = word_idx
                    label_ids.append(label[word_idx])
                elif word_idx == previous_word_idx:
                    label_ids.append(label[word_idx])
                else:
                    label_ids.append(-100)
                
            labels.append(label_ids)

        tokenized_inputs['labels'] = labels
        return tokenized_inputs

    def _collate_fn(self, batch):
        '''
        Data collator function for aligning labels in the batch.
        '''
        inputs = self.tokenizer([x['input_ids'] for x in batch], truncation=True, padding=True, max_length=100, is_split_into_words=True)
        inputs = self._align_labels(inputs, [x['labels'] for x in batch])
        return {
        'input_ids': torch.tensor(inputs['input_ids']),
        'labels': torch.tensor(inputs['labels'])
        }

    def train(self):
        '''
        Fine-tune model on the downstream task.
        '''
        trainer = Trainer(
            model=self.model,
            args=self.training_args,
            data_collator=self._collate_fn,
            train_dataset=self.train_ds,
            eval_dataset=self.val_ds
        )

        trainer.train()
        trainer.save_model(self.new_model_checkpoint)

        return

    def inference(self):
        '''
        Evaluate models on the test dataset.
        '''
        new_ner_model = AutoModelForTokenClassification.from_pretrained(
            self.new_model_checkpoint,
            num_labels=self.num_labels,
            id2label=self.id2label,
            label2id=self.label2id
            )

        references = []
        predictions = []

        for sent in self.test_ds:
            encodings = self.tokenizer(sent['input_ids'], truncation=True, padding=True, is_split_into_words=True)
            inputs = self.tokenizer.encode(sent['input_ids'], truncation=True, padding=True, is_split_into_words=True, return_tensors="pt")

            outputs = new_ner_model(inputs)[0].to(device)
            preds = torch.argmax(outputs.to('cpu'), dim=2)[0].tolist()

            aligned_preds = []
            word_ids = encodings.word_ids()
            previous_word_idx = None
            for idx, word_idx in enumerate(word_ids):
                if word_idx != previous_word_idx and word_idx is not None:
                    previous_word_idx = word_idx
                    aligned_preds.append(preds[idx])

            references.append([self.id2label.get(i, None) for i in sent['labels']])
            predictions.append([self.id2label.get(i, None) for i in aligned_preds])

        print(ner.compute(predictions=predictions, references=references))

    @classmethod
    def display_aspects(text, model, tokenizer) -> None:
        '''
        Display aspects in the sentence using spacy.displacy.
        '''
        classifier = pipeline('ner', model=model, tokenizer=tokenizer)
        result = classifier(text)

        ents = []
        for elem in result:
            if elem['entity'] == 'B':
                if not elem['word'].startswith('##'):
                    e = {}
                    e['start'] = elem['start']
                    e['end'] = elem['end']
                    e['label'] = 'Aspect'
                    ents.append(e)
                else:
                    last_entity = ents[-1]
                    last_entity['end'] = elem['end']
            elif elem['entity'] == 'I':
                last_entity = ents[-1]
                last_entity['end'] = elem['end'] 

        render_data = {'text': text, 'ents': ents, 'title': 'Sentence', 'settings': {'lang': 'ru', 'direction': 'ltr'}}

        spacy.displacy.render(render_data, style='ent', manual=True, jupyter=True)

## ruBERT

### Both

In [37]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'ai-forever/ruBert-base', 'rubert-is-aspect-seqlabeling_both', BIO)

Downloading (…)lve/main/config.json:   0%|          | 0.00/590 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/716M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

In [38]:
ner_trainer_rubert.model

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(120138, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [39]:
ner_trainer_rubert.train()



Step,Training Loss
100,0.7458
200,0.4042
300,0.3425
400,0.3138
500,0.2436
600,0.2418
700,0.223
800,0.2294
900,0.1754
1000,0.1101


In [40]:
ner_trainer_rubert.inference()

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'ASPECT': {'precision': 0.710960960960961, 'recall': 0.7752762996316005, 'f1': 0.7417270413158411, 'number': 2443}, 'overall_precision': 0.710960960960961, 'overall_recall': 0.7752762996316005, 'overall_f1': 0.7417270413158411, 'overall_accuracy': 0.9109152070525461}


In [41]:
del ner_trainer_rubert

### Restaurants

In [42]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'ai-forever/ruBert-base', 'rubert-is-aspect-seqlabeling_restaurants', BIO)
ner_trainer_rubert.train()

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Step,Training Loss
100,0.7219
200,0.375
300,0.2963
400,0.2802
500,0.2132
600,0.17


In [43]:
ner_trainer_rubert.inference()
del ner_trainer_rubert

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'ASPECT': {'precision': 0.6777408637873754, 'recall': 0.7285714285714285, 'f1': 0.7022375215146299, 'number': 1120}, 'overall_precision': 0.6777408637873754, 'overall_recall': 0.7285714285714285, 'overall_f1': 0.7022375215146299, 'overall_accuracy': 0.9049500348594004}


### Automobiles

In [44]:
ner_trainer_rubert = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'ai-forever/ruBert-base', 'rubert-is-aspect-seqlabeling_automobiles', BIO)
ner_trainer_rubert.train()

Some weights of the model checkpoint at ai-forever/ruBert-base were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized fro

Step,Training Loss
100,0.7469
200,0.3963
300,0.2914
400,0.2647
500,0.1859
600,0.1709


In [45]:
ner_trainer_rubert.inference()
del ner_trainer_rubert

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'ASPECT': {'precision': 0.6967930029154519, 'recall': 0.7127236580516899, 'f1': 0.7046683046683047, 'number': 1006}, 'overall_precision': 0.6967930029154519, 'overall_recall': 0.7127236580516899, 'overall_f1': 0.7046683046683047, 'overall_accuracy': 0.9099911321312445}


## mBERT

### Both

In [46]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'bert-base-multilingual-cased', 'mbert-is-aspect-seqlabeling_both', BIO)
ner_trainer_mbert.train()

Downloading (…)lve/main/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]



Step,Training Loss
100,0.7611
200,0.4908
300,0.4504
400,0.4205
500,0.3549
600,0.3557
700,0.3322
800,0.3215
900,0.2607
1000,0.196


In [47]:
ner_trainer_mbert.inference()

{'ASPECT': {'precision': 0.6824067022086824, 'recall': 0.7335243553008596, 'f1': 0.7070428092325902, 'number': 2443}, 'overall_precision': 0.6824067022086824, 'overall_recall': 0.7335243553008596, 'overall_f1': 0.7070428092325902, 'overall_accuracy': 0.9019835285929707}


In [48]:
del ner_trainer_mbert

### Restaurants

In [49]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'bert-base-multilingual-cased', 'mbert-is-aspect-seqlabeling_restaurants', BIO)
ner_trainer_mbert.train()

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Step,Training Loss
100,0.7198
200,0.4594
300,0.3825
400,0.3688
500,0.3073
600,0.2658


In [50]:
ner_trainer_mbert.inference()
del ner_trainer_mbert

{'ASPECT': {'precision': 0.6131782945736434, 'recall': 0.70625, 'f1': 0.6564315352697094, 'number': 1120}, 'overall_precision': 0.6131782945736434, 'overall_recall': 0.70625, 'overall_f1': 0.6564315352697094, 'overall_accuracy': 0.8861259586335115}


### Automobiles

In [51]:
ner_trainer_mbert = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'bert-base-multilingual-cased', 'mbert-is-aspect-seqlabeling_automobiles', BIO)
ner_trainer_mbert.train()

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

Step,Training Loss
100,0.7418
200,0.4771
300,0.3804
400,0.3477
500,0.2676
600,0.246


In [52]:
ner_trainer_mbert.inference()
del ner_trainer_mbert

{'ASPECT': {'precision': 0.6208487084870848, 'recall': 0.668986083499006, 'f1': 0.6440191387559808, 'number': 1006}, 'overall_precision': 0.6208487084870848, 'overall_recall': 0.668986083499006, 'overall_f1': 0.6440191387559808, 'overall_accuracy': 0.89476795743423}


## XLM-RoBERTa

### Both

In [53]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_dataset, custom_eval_dataset, custom_test_dataset,
    'xlm-roberta-base', 'xlmroberta-is-aspect-seqlabeling_both', BIO)
ner_trainer_xlmroberta.train()

Downloading (…)lve/main/config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]



Step,Training Loss
100,0.9534
200,0.4899
300,0.3917
400,0.3502
500,0.2992
600,0.3084
700,0.2803
800,0.2699
900,0.2248
1000,0.1757


In [54]:
ner_trainer_xlmroberta.inference()
del ner_trainer_xlmroberta

{'ASPECT': {'precision': 0.7032046613255645, 'recall': 0.790421612771183, 'f1': 0.7442667180574292, 'number': 2443}, 'overall_precision': 0.7032046613255645, 'overall_recall': 0.790421612771183, 'overall_f1': 0.7442667180574292, 'overall_accuracy': 0.9131191277114024}


### Restaurants

In [55]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_restaurants_dataset, custom_eval_restaurants_dataset, custom_test_restaurants_dataset,
    'xlm-roberta-base', 'xlmroberta-is-aspect-seqlabeling_restaurants', BIO)
ner_trainer_xlmroberta.train()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

Step,Training Loss
100,0.7629
200,0.4454
300,0.3491
400,0.3248
500,0.2659
600,0.2353


In [56]:
ner_trainer_xlmroberta.inference()
del ner_trainer_xlmroberta

{'ASPECT': {'precision': 0.6889991728701406, 'recall': 0.74375, 'f1': 0.7153284671532846, 'number': 1120}, 'overall_precision': 0.6889991728701406, 'overall_recall': 0.74375, 'overall_f1': 0.7153284671532846, 'overall_accuracy': 0.9075063908900767}


### Automobiles

In [57]:
ner_trainer_xlmroberta = NERTrainPipeline(
    custom_train_automobiles_dataset, custom_eval_automobiles_dataset, custom_test_automobiles_dataset,
    'xlm-roberta-base', 'xlmroberta-is-aspect-seqlabeling_automobiles', BIO)
ner_trainer_xlmroberta.train()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForTokenClassification: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-st

Step,Training Loss
100,0.8028
200,0.4911
300,0.358
400,0.3134
500,0.2652
600,0.2334


In [58]:
ner_trainer_xlmroberta.inference()
del ner_trainer_xlmroberta

{'ASPECT': {'precision': 0.6740947075208914, 'recall': 0.7216699801192843, 'f1': 0.6970715314450313, 'number': 1006}, 'overall_precision': 0.6740947075208914, 'overall_recall': 0.7216699801192843, 'overall_f1': 0.6970715314450313, 'overall_accuracy': 0.9132426840082767}


In [None]:
!mv -f '/content/rubert-is-aspect-seqlabeling_both' '/content/drive/MyDrive/models/'
!mv -f '/content/mbert-is-aspect-seqlabeling_both' '/content/drive/MyDrive/models/'
!mv -f '/content/xlmroberta-is-aspect-seqlabeling_both' '/content/drive/MyDrive/models/'