## Вход на Hugging Face

In [None]:
from huggingface_hub import HfApi, interpreter_login

interpreter_login()
username = HfApi().whoami()["name"]
REPO_NAME = f"{username}/RuSENTNE_project"

print(f"Homework repository: '{REPO_NAME}'")

## Загрузка датасета

In [None]:
!wget -q https://raw.githubusercontent.com/dialogue-evaluation/RuSentNE-evaluation/main/train_data.csv
!wget -q https://raw.githubusercontent.com/dialogue-evaluation/RuSentNE-evaluation/main/validation_data_labeled.csv
!wget -q https://raw.githubusercontent.com/dialogue-evaluation/RuSentNE-evaluation/main/final_data.csv

In [None]:
import pandas as pd
train = pd.read_csv('train_data.csv', sep='\t')
validation = pd.read_csv('validation_data_labeled.csv', sep='\t')
test = pd.read_csv('final_data.csv', sep='\t')

## Перефразирование обучающей выборки

Загружаем модель для перефразирования.

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
MODEL_NAME = 'cointegrated/rut5-base-paraphraser'
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model.cuda();
model.eval();

def paraphrase(text, beams=5, grams=4, do_sample=False):
    x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
    max_size = int(x.input_ids.shape[1] * 1.5 + 10)
    out = model.generate(**x, encoder_no_repeat_ngram_size=grams, num_beams=beams, max_length=max_size, do_sample=do_sample)
    return tokenizer.decode(out[0], skip_special_tokens=True)

print(paraphrase('Каждый охотник желает знать, где сидит фазан.'))
# Все охотники хотят знать где фазан сидит.



Проверим, как работает модель, на примере из датасета

In [None]:
print(paraphrase('Владислав первым заметил возгорание и начал тушить его.'))

Перефразируем предложения положительного и отрицательного класса из выборки:

In [None]:
from tqdm.auto import tqdm

progress_bar = tqdm(range(1863))

new_sentences = []
for i, sentence in enumerate(train["sentence"]):
  if train["label"][i] != 0:
    new_sent = paraphrase(sentence)
    progress_bar.update(1)
    if new_sent != sentence:
      new_row = {"sentence": new_sent,
                 "entity": train["entity"][i],
                  "entity_tag": train["entity_tag"][i],
                  "entity_pos_start_rel": train["entity_pos_start_rel"][i],
                  "entity_pos_end_rel": train["entity_pos_end_rel"][i],
                  "label": train["label"][i]}
      new_sentences.append(new_row)

Создадим датафрейм с новыми предложениями.

In [None]:
extension = pd.DataFrame(new_sentences, columns=['sentence', 'entity', 'entity_tag', 'entity_pos_start_rel', 'entity_pos_end_rel', 'label'])

In [None]:
extension.head()

Сохраним полученные предложения в csv-файл

In [None]:
extension.to_csv('extension.csv', sep="\t", index=False)

Иногда после перефразирования исходная сущность отсутствует в предложении

In [None]:
print(extension["sentence"][0])
print(extension["entity"][0])

## Предобработка данных

### Расширение обучающей выборки

Объединим изначальную обучающую выборку с перефразированными предложениями.

In [None]:
extension = pd.read_csv('extension.csv', sep='\t')
train_extended = pd.concat([train, extension], ignore_index=True)
train_extended.tail()

Перемешаем полученную выборку

In [None]:
train_extended = train_extended.sample(frac=1, random_state=22).reset_index(drop=True)
train_extended.tail()

### Создание вопросов

In [None]:
!pip install -q pymorphy3

In [None]:
import pymorphy3
morph = pymorphy3.MorphAnalyzer()

def question(df, sent='Как относятся к {}?', c='datv'):
  sentences = []
  for entity in df['entity'].values:
    try:
      dative_list = [pymorphy3.shapes.restore_capitalization(morph.parse(x)[0].inflect({c}).word, x) for x in entity.split()]
      final_form = ' '.join(dative_list)
    except AttributeError:
      final_form = entity
    sentences.append(sent.format(final_form))
  return sentences

train_extended['question'] = question(train_extended, 'Как относятся к {}?')
validation['question'] = question(validation, 'Как относятся к {}?')
test['question'] = question(test, 'Как относятся к {}?')
validation.head()

### Создание датасета

In [None]:
label_dict = {-1: 0, 0: 1, 1: 2}
train_extended['raw_label'] = train_extended["label"]
train_extended['label'] = train_extended["raw_label"].map(label_dict)
validation['raw_label'] = validation["label"]
validation['label'] = validation["raw_label"].map(label_dict)

In [None]:
!pip install -q datasets transformers evaluate

In [None]:
from datasets import Dataset, DatasetDict
dataset_dict = DatasetDict({"train": Dataset.from_pandas(train_extended),
                            "validation": Dataset.from_pandas(validation),
                            "test": Dataset.from_pandas(test)})

## Обучение обычной модели

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

Загрузка модели

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
model = AutoModelForSequenceClassification.from_pretrained("DeepPavlov/rubert-base-cased", num_labels=3).to(device)

Токенизация датасета

In [None]:
def tokenize_function(example):
    return tokenizer(example["question"], example["sentence"])

tokenized_dataset = dataset_dict.map(tokenize_function, batched=True)
tokenized_dataset

Параметры обучения

In [None]:
import os
from transformers import DataCollatorWithPadding, TrainingArguments
from transformers import Trainer

os.environ["WANDB_DISABLED"] = "true"
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
training_args = TrainingArguments(output_dir=f'{REPO_NAME}-base', push_to_hub=True, evaluation_strategy="epoch")

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    processing_class=tokenizer
)

In [None]:
trainer.train()

In [None]:
trainer.push_to_hub(f"{REPO_NAME}-base")

Запишем предсказания модели на валидационной выборке

In [None]:
import numpy as np

def predict_labels(dataset):
    output = trainer.predict(dataset)
    logits, labels = output[:2]
    predictions = np.argmax(logits, axis=-1)
    reverse_label_dict = {v:k for k, v in label_dict.items()}
    return [reverse_label_dict[x] for x in predictions]

validation_predictions = predict_labels(tokenized_dataset["validation"])
print(len(validation_predictions))
validation_predictions[:25]

Оценим качество модели на валидационной выборке

In [None]:
import evaluate

def compute_metrics(preds, labels):
    metric = evaluate.load("f1")
    return metric.compute(predictions=preds, references=labels, average="macro")

tokenized_dataset["validation"] = tokenized_dataset["validation"].add_column("predictions", validation_predictions)
f1_score_all = compute_metrics(tokenized_dataset["validation"]["predictions"], tokenized_dataset["validation"]["raw_label"])
filtered_validation = tokenized_dataset["validation"].filter(lambda example: example["raw_label"]!=0)
f1_score_filtered = compute_metrics(filtered_validation["predictions"], filtered_validation["raw_label"])
print('Макро F1-мера{}{}.\nМакро F1-мера для положительного и отрицательного классов{}{}.'.format(':'.ljust(54), round(f1_score_all['f1'], 2), ':'.ljust(10), round(f1_score_filtered['f1'], 2)))

Теперь сохраним предсказания модели на тестовой выборке

In [None]:
test_predictions = predict_labels(tokenized_dataset["test"])
print(len(test_predictions))
test_predictions[1925:]

In [None]:
pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_base.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer_base.csv'}, index=False, header=False)

На тестовой выборке на платформе CodaLab для базовой модели было получено качество 54.21 по макро F1-мере для положительного и отрицательного классов, 64.77 - для трех классов.

## Обучение предобученной модели

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

Попробуем обучить разные модели (предобученные на анализ тональности предложений) для анализа тональности именованных сущностей

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# tokenizer = AutoTokenizer.from_pretrained("blanchefort/rubert-base-cased-sentiment")
# model = AutoModelForSequenceClassification.from_pretrained("blanchefort/rubert-base-cased-sentiment", num_labels=3).to(device)

# tokenizer = AutoTokenizer.from_pretrained("seara/rubert-base-cased-russian-sentiment")
# model = AutoModelForSequenceClassification.from_pretrained("seara/rubert-base-cased-russian-sentiment", num_labels=3).to(device)

# tokenizer = AutoTokenizer.from_pretrained("r1char9/rubert-base-cased-russian-sentiment")
# model = AutoModelForSequenceClassification.from_pretrained("r1char9/rubert-base-cased-russian-sentiment", num_labels=3).to(device)

tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny-sentiment-balanced")
model = AutoModelForSequenceClassification.from_pretrained("cointegrated/rubert-tiny-sentiment-balanced", num_labels=3).to(device)

Токенизация

In [None]:
def tokenize_function(example):
    return tokenizer(example["question"], example["sentence"])

tokenized_dataset = dataset_dict.map(tokenize_function, batched=True)
tokenized_dataset

Параметры обучения

In [None]:
import os
from transformers import DataCollatorWithPadding, TrainingArguments
from transformers import Trainer

os.environ["WANDB_DISABLED"] = "true"
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
training_args = TrainingArguments(output_dir=f"{REPO_NAME}-cointegrated",push_to_hub=True, evaluation_strategy="epoch", num_train_epochs=3)

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    processing_class=tokenizer
)

In [None]:
trainer.train()

In [None]:
trainer.push_to_hub(f"{REPO_NAME}-seara")

Предсказания на валидационной выборке

In [None]:
import numpy as np

def predict_labels(dataset):
    output = trainer.predict(dataset)
    logits, labels = output[:2]
    predictions = np.argmax(logits, axis=-1)
    reverse_label_dict = {v:k for k, v in label_dict.items()}
    return [reverse_label_dict[x] for x in predictions]

validation_predictions = predict_labels(tokenized_dataset["validation"])
print(len(validation_predictions))
print('{0}'.format(validation_predictions[:25]))

Подсчёт качества

In [None]:
import evaluate

def compute_metrics(preds, labels):
    metric = evaluate.load("f1")
    return metric.compute(predictions=preds, references=labels, average="macro")

tokenized_dataset["validation"] = tokenized_dataset["validation"].add_column("predictions", validation_predictions)
f1_score_all = compute_metrics(tokenized_dataset["validation"]["predictions"], tokenized_dataset["validation"]["raw_label"])
filtered_validation = tokenized_dataset["validation"].filter(lambda example: example["raw_label"]!=0)
f1_score_filtered = compute_metrics(filtered_validation["predictions"], filtered_validation["raw_label"])
print('Макро F1-мера{}{}.\nМакро F1-мера для положительного и отрицательного классов{}{}.'.format(':'.ljust(54), round(f1_score_all['f1'], 2), ':'.ljust(10), round(f1_score_filtered['f1'], 2)))

Результаты для **r1char9/rubert-base-cased-russian-sentiment** на валидационной выборке:



```
Макро F1-мера:                                                     0.66.
Макро F1-мера для положительного и отрицательного классов:         0.42.
```



Результаты для **seara/rubert-base-cased-russian-sentiment** на валидационной выборке:



```
Макро F1-мера:                                                     0.67.
Макро F1-мера для положительного и отрицательного классов:         0.43.
```



Результаты для **blanchefort/rubert-base-cased-sentiment** на валидационной выборке:



```
Макро F1-мера:                                                     0.28.
Макро F1-мера для положительного и отрицательного классов:         0.0.
```



Результаты для **cointegrated/rubert-tiny-sentiment-balanced** на валидационной выборке:

```
Макро F1-мера:                                                     0.55.
Макро F1-мера для положительного и отрицательного классов:         0.39.
```


Запишем предсказания моделей на тестовой выборке

In [None]:
test_predictions = predict_labels(tokenized_dataset["test"])
print(len(test_predictions))
test_predictions[1925:]

In [None]:
# pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_2.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer.csv'}, index=False, header=False)
# pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_3.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer.csv'}, index=False, header=False)
#pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_4.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer.csv'}, index=False, header=False)
pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_seara.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer.csv'}, index=False, header=False)

## Промежуточные результаты моделей



| Модель | F1(P,N)-macro| F1(P,N,0)-macro|
|----------|----------|----------|
| Модель без предобучения  | 54.21   | 64.77   |
| seara/rubert-base-cased-russian-sentiment    |  52.37  |  63.13  |
| r1char9/rubert-base-cased-russian-sentiment    | 8.97   | 32.82   |
| blanchefort/rubert-base-cased-sentiment    | 14.05   | 36.41   |
| cointegrated/rubert-tiny-sentiment-balanced    | 30.91   | 46.81   |

## Предобученные модели на выборке без расширения

### Предобработка данных

In [None]:
!pip install -q pymorphy3
!pip install -q datasets transformers evaluate

In [None]:
import pymorphy3
from datasets import Dataset, DatasetDict

morph = pymorphy3.MorphAnalyzer()

def question(df):
  sentences = []
  for entity in df['entity'].values:
    try:
      dative_list = [pymorphy3.shapes.restore_capitalization(morph.parse(x)[0].inflect({'datv'}).word, x) for x in entity.split()]
      final_form = ' '.join(dative_list)
    except AttributeError:
      final_form = entity
    sentences.append(f'Как относятся к {final_form}?')
  return sentences

train['question'] = question(train)
validation['question'] = question(validation)
test['question'] = question(test)

label_dict = {-1: 0, 0: 1, 1: 2}
train['raw_label'] = train["label"]
train['label'] = train["raw_label"].map(label_dict)
validation['raw_label'] = validation["label"]
validation['label'] = validation["raw_label"].map(label_dict)

dataset_dict = DatasetDict({"train": Dataset.from_pandas(train),
                            "validation": Dataset.from_pandas(validation),
                            "test": Dataset.from_pandas(test)})

### Обучение моделей

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
import evaluate
import os
import numpy as np
from transformers import DataCollatorWithPadding, TrainingArguments
from transformers import Trainer

os.environ["WANDB_DISABLED"] = "true"

class MyTrainer():
  def __init__(self, model, tokenizer, tokenized_dataset, training_arguments):
    self.trainer = Trainer(
        model,
        training_arguments,
        train_dataset=tokenized_dataset["train"],
        eval_dataset=tokenized_dataset["validation"],
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
        processing_class=tokenizer
    )

  def training(self):
    self.trainer.train()

  def predict_labels(self, dataset):
      output = self.trainer.predict(dataset)
      logits, labels = output[:2]
      predictions = np.argmax(logits, axis=-1)
      reverse_label_dict = {v:k for k, v in label_dict.items()}
      return [reverse_label_dict[x] for x in predictions]

In [None]:
from collections import defaultdict
from transformers import AutoTokenizer, AutoModelForSequenceClassification

results = defaultdict(lambda: defaultdict(float))
model_names = ["blanchefort/rubert-base-cased-sentiment", "seara/rubert-base-cased-russian-sentiment", "r1char9/rubert-base-cased-russian-sentiment", "cointegrated/rubert-tiny-sentiment-balanced"]

def tokenize_function(example):
  return tokenizer(example["question"], example["sentence"])

def compute_metrics(preds, labels):
      metric = evaluate.load("f1")
      return metric.compute(predictions=preds, references=labels, average="macro")

def apply_to_model(model_name, training_arguments):
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3).to(device)
  tokenized_dataset = dataset_dict.map(tokenize_function, batched=True)
  trainer = MyTrainer(model, tokenizer, tokenized_dataset, training_arguments)
  trainer.training()
  validation_predictions = trainer.predict_labels(tokenized_dataset["validation"])
  tokenized_dataset["validation"] = tokenized_dataset["validation"].add_column("predictions", validation_predictions)
  f1_score_all = compute_metrics(tokenized_dataset["validation"]["predictions"], tokenized_dataset["validation"]["raw_label"])
  filtered_validation = tokenized_dataset["validation"].filter(lambda example: example["raw_label"]!=0)
  f1_score_filtered = compute_metrics(filtered_validation["predictions"], filtered_validation["raw_label"])
  results[model_name]["f1_PN0"] = round(f1_score_all['f1'], 2)
  results[model_name]["f1_PN"] = round(f1_score_filtered['f1'], 2)
  test_predictions = trainer.predict_labels(tokenized_dataset["test"])
  pd.Series(test_predictions).to_csv('RuSentNE_predictions_{}.zip'.format(model_name[:4]), compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer.csv'}, index=False, header=False)

for model_name in model_names:
  # apply_to_model(model_name, TrainingArguments(output_dir='./results', eval_strategy="epoch", num_train_epochs=3, weight_decay=0.01, learning_rate=1e-6))
  apply_to_model(model_name, TrainingArguments(output_dir='./results', eval_strategy="epoch", num_train_epochs=3))

### Результаты

**Качество моделей при дефолтных значениях параметров weight_decay (0) и learning_rate (5e-5)**

In [None]:
import pandas as pd

results_for_default_training_args = {"blanchefort/rubert-base-cased-sentiment": {"f1_PN0": 0.36, "f1_PN": 0.11},
                                     "seara/rubert-base-cased-russian-sentiment": {"f1_PN0": 0.69, "f1_PN": 0.46},
                                     "r1char9/rubert-base-cased-russian-sentiment	": {"f1_PN0": 0.67, "f1_PN": 0.45},
                                     "cointegrated/rubert-tiny-sentiment-balanced": {"f1_PN0": 0.54, "f1_PN": 0.33},
}
df_base_default = pd.DataFrame(results_for_default_training_args)
df_base_default = df_base_default.transpose()
f1_PN0_base = [0.28, 0.67, 0.66, 0.55]
df_base_default['compare to base model\'s f1_PN0'] = f1_PN0_base
f1_PN_base = [0.0, 0.43, 0.42, 0.39]
df_base_default['compare to base model\'s  f1_PN'] = f1_PN_base
df_base_default = df_base_default.iloc[:, [0, 2, 1, 3]]

df_base_default

**Качество моделей при заданных значениях параметров weight_decay (0.01) и learning_rate (1e-6)** -- ЗНАЧИТЕЛЬНОЕ УХУДШЕНИЕ

In [None]:
import pandas as pd

df_default_set = pd.DataFrame(results)
df_default_set = df_default_set.transpose()
f1_PN0_default = [0.36, 0.69, 0.67, 0.54]
df_default_set['compare to default f1_PN0'] = f1_PN0_default
f1_PN_default = [0.11, 0.46, 0.45, 0.33]
df_default_set['compare to default f1_PN'] = f1_PN_default
df_default_set = df_default_set.iloc[:, [0, 2, 1, 3]]

df_default_set

Результаты на CodaLab

| Модель | F1(P,N)-macro| F1(P,N,0)-macro|
|----------|----------|----------|
| seara/rubert-base-cased-russian-sentiment    |  52.37  |  63.13  |
| cointegrated/rubert-tiny-sentiment-balanced    | 30.91   | 46.81   |

## Ансамбль?

In [None]:
def create_Trainer(model):
  trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    processing_class=tokenizer
)
  return trainer

In [None]:
tokenizer1 = AutoTokenizer.from_pretrained("kravmar/RuSENTNE_project-base")
model1 = AutoModelForSequenceClassification.from_pretrained("kravmar/RuSENTNE_project-base")
trainer1 = create_Trainer(model1)
tokenizer2 = AutoTokenizer.from_pretrained("kravmar/RuSENTNE_project-seara")
model2 = AutoModelForSequenceClassification.from_pretrained("kravmar/RuSENTNE_project-seara")
trainer2 = create_Trainer(model2)
tokenizer3 = AutoTokenizer.from_pretrained("kravmar/RuSENTNE_project-cointegrated")
model3 = AutoModelForSequenceClassification.from_pretrained("kravmar/RuSENTNE_project-cointegrated")
trainer3 = create_Trainer(model3)
models = [trainer2, trainer1, trainer3]
tokenizers = [tokenizer2, tokenizer1, tokenizer3]

def tokenize_function(example, tokenizer):
    return tokenizer(example["question"], example["sentence"])

def majority_voting(dataset, models, tokenizers):
    predictions = []
    reverse_label_dict = {v:k for k, v in label_dict.items()}
    for i, model in enumerate(models):
        tokenizer = tokenizers[i]
        tokenized_dataset = [tokenize_function(i, tokenizer) for i in dataset]
        #tokenized_dataset = dataset.map(tokenize_function, batched=True)
        output = model.predict(tokenized_dataset)
        logits, labels = output[:2]
        preds = np.argmax(logits, axis=-1)
        predictions.append(preds)
    predictions = np.array(predictions)
    final_predictions = [np.bincount(pred).argmax() for pred in predictions.T]
    return [reverse_label_dict[x] for x in final_predictions]

validation_predictions = majority_voting(dataset_dict['validation'], models, tokenizers)
print(len(validation_predictions))
validation_predictions[:25]

In [None]:
tokenized_dataset["validation"] = tokenized_dataset["validation"].remove_columns(["predictions"])
tokenized_dataset["validation"] = tokenized_dataset["validation"].add_column("predictions", validation_predictions)
f1_score_all = compute_metrics(tokenized_dataset["validation"]["predictions"], tokenized_dataset["validation"]["raw_label"])
filtered_validation = tokenized_dataset["validation"].filter(lambda example: example["raw_label"]!=0)
f1_score_filtered = compute_metrics(filtered_validation["predictions"], filtered_validation["raw_label"])
print('Макро F1-мера{}{}.\nМакро F1-мера для положительного и отрицательного классов{}{}.'.format(':'.ljust(54), round(f1_score_all['f1'], 2), ':'.ljust(10), round(f1_score_filtered['f1'], 2)))

In [None]:
test_predictions = majority_voting(dataset_dict['test'], models, tokenizers)
print(len(test_predictions))
test_predictions[1925:]

In [None]:
pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_ensemble.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer_ensemble.csv'}, index=False, header=False)

Результаты на CodaLab

| Модель | F1(P,N)-macro| F1(P,N,0)-macro|
|----------|----------|----------|
| Ансамбль из трёх моделей  | 50.73     | 62.34   |

####Более сложный ансамбль с помощью Stacking

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def get_model_predictions(models, tokenizer, dataset):

    predictions = []
    for i, model in enumerate(models):
        tokenized_dataset = dataset.map(lambda example: tokenize_function(example, tokenizer[i]), batched=True)

        output = model.predict(tokenized_dataset)
        logits, labels = output[:2]
        preds = np.argmax(logits, axis=-1)
        predictions.append(preds)

    return np.stack(predictions, axis=1)

def stacking_ensemble(models, tokenizers, dataset, labels):
    preds = get_model_predictions(models, tokenizers, dataset)
    X_train, X_val, y_train, y_val = train_test_split(preds, labels, test_size=0.2, random_state=42)
    meta_model = LogisticRegression(class_weight='balanced')
    meta_model.fit(X_train, y_train)
    y_pred = meta_model.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)
    print(f"Meta-model accuracy: {accuracy * 100:.2f}%")

    return meta_model

def predict_stacking_ensemble(meta_model, models, tokenizers, dataset):
    logits = get_model_predictions(models, tokenizers, dataset)
    final_predictions = meta_model.predict(logits)
    reverse_label_dict = {v:k for k, v in label_dict.items()}
    return [reverse_label_dict[x] for x in final_predictions]


In [None]:
base_models = [model1, model2, model3]
tokenizers = [tokenizer1, tokenizer2, tokenizer3]
ensemble = stacking_ensemble(models, tokenizers, dataset_dict['train'], dataset_dict['train']['label'])

In [None]:
validation_predictions = predict_stacking_ensemble(ensemble, models, tokenizers, dataset_dict['validation'])
print(len(validation_predictions))
validation_predictions[:25]

In [None]:
tokenized_dataset["validation"] = tokenized_dataset["validation"].remove_columns(["predictions"])
tokenized_dataset["validation"] = tokenized_dataset["validation"].add_column("predictions", validation_predictions)
f1_score_all = compute_metrics(tokenized_dataset["validation"]["predictions"], tokenized_dataset["validation"]["raw_label"])
filtered_validation = tokenized_dataset["validation"].filter(lambda example: example["raw_label"]!=0)
f1_score_filtered = compute_metrics(filtered_validation["predictions"], filtered_validation["raw_label"])
print('Макро F1-мера{}{}.\nМакро F1-мера для положительного и отрицательного классов{}{}.'.format(':'.ljust(54), round(f1_score_all['f1'], 2), ':'.ljust(10), round(f1_score_filtered['f1'], 2)))

In [None]:
test_predictions = predict_stacking_ensemble(ensemble, models, tokenizers, dataset_dict['test'])
print(len(test_predictions))
test_predictions[1925:]

In [None]:
pd.Series(test_predictions).to_csv('RuSentNE_predictions_Trainer_ensemble_stack.zip', compression={'method': 'zip', 'archive_name': 'RuSentNE_predictions_Trainer_ensemble.csv'}, index=False, header=False)

Результаты на CodaLab

| Модель | F1(P,N)-macro| F1(P,N,0)-macro|
|----------|----------|----------|
| Ансамбль из трёх моделей  | 50.62     | 62.19   |