В данном ноутбуке производится замер качества на тестовой выборке комбинаций с весами, полученными в ноутбуках 2-3.

В ячейке ниже нужно указать:

- название задачи mteb
- название BERT-like модели в библиотеке transformers
- веса, полученные с optuna в формате словаря. Если веса с optuna не подбирались, словарь стоит оставить пустым.
- путь к csv файлу с таблицей весов, полученных с помощью нашего метода
- список слоев, которые предполагается использовать
- номер (нумерация с 0) слоя, показавшего лучшее качество на обучающей выборке

In [None]:
task_name = "STSBenchmark" # your mteb task name
model_name = "bert-base-uncased" # your model name
optuna_weights = {}
weights_path = ...
layers_to_use = list(range(0, 12)) # layers to use in the combination
best_layer = 11 # best layer on train part

### Preparation

In [1]:
from IPython.display import clear_output

!pip install mteb
clear_output()

In [13]:
import mteb
test_task = mteb.get_task(task_name, eval_splits=["test"], languages=['eng'])
train_task = mteb.get_task(task_name, eval_splits=["train"], languages=['eng'])
evaluation = mteb.MTEB(tasks=[train_task])
evaluation_final = mteb.MTEB(tasks=[test_task])

In [14]:
from mteb.encoder_interface import PromptType
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
import torch
import pandas as pd

class CustomModel:
    def __init__(self, model_name="bert-base-uncased", device="cuda" if torch.cuda.is_available() else "cpu", layers_to_use=[-1], layers_weights=[1.0], batch_size=64):
        self.model_name = model_name
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModel.from_pretrained(self.model_name, output_hidden_states=True).to(self.device)
        self.layers_to_use = layers_to_use
        self.layers_weights = layers_weights
        self.batch_size = batch_size

    def get_layer_embedding(self, batch: list[str], layers: list[int], weights: list[float]) -> np.ndarray:
        encoded_inputs = self.tokenizer(
            batch, padding=True, truncation=True, return_tensors="pt"
        ).to(self.device)
        model_output = self.model(**encoded_inputs)
        hidden_states = model_output.hidden_states[1:]
        layers_output = [hidden_states[i] for i in layers]

        pooled_layers = []
        for i, layer_output in enumerate(layers_output):
            input_mask_expanded = encoded_inputs['attention_mask'].unsqueeze(-1).expand(layer_output.size()).float()
            sum_embeddings = torch.sum(layer_output * input_mask_expanded, 1)
            sum_mask = input_mask_expanded.sum(1)
            mean_pooled = sum_embeddings / sum_mask
            pooled_layers.append(weights[i] * mean_pooled)

        return torch.sum(torch.stack(pooled_layers), dim=0)

    def encode(
        self,
        sentences: list[str],
        task_name: str,
        prompt_type: PromptType | None = None,
        **kwargs,
    ) -> np.ndarray:
        """Encodes the given sentences using the encoder.

        Args:
            sentences: The sentences to encode.
            task_name: The name of the task.  (Not directly used in this example, but kept for MTEB compatibility)
            prompt_type: The prompt type to use. (Not directly used in this example, but kept for MTEB compatibility)
            **kwargs: Additional arguments to pass to the encoder.

        Returns:
            The encoded sentences as a numpy array.
        """
        self.model.eval()
        with torch.no_grad():
            all_embeddings = []
            for i in range(0, len(sentences), self.batch_size):
                batch = sentences[i:i + self.batch_size]
                combination = self.get_layer_embedding(batch, self.layers_to_use, self.layers_weights)
                combination = combination.cpu().numpy()
                all_embeddings.append(combination)
        return np.concatenate(all_embeddings, axis=0)

In [4]:
df_optimal_weights = pd.read_csv(weights_path)

In [15]:
import ast

def convert_string_to_list(string):
    try:
        return ast.literal_eval(string)
    except (ValueError, SyntaxError):
        return None

def string_to_float_list(string):
  try:
    string = string.strip('[]')
    numbers = string.split()
    float_list = [float(num) for num in numbers]
    return float_list
  except (ValueError, AttributeError):
    return None

In [7]:
def test_weights(model_name, df_optimal_weights, layers, best_layer, optuna_weights):
    df_optimal_weights['weights'] = df_optimal_weights['weights'].apply(string_to_float_list)
    one_w = [1]
    model = CustomModel(model_name=model_name, layers_to_use=[11], layers_weights=one_w, batch_size=256)
    quality = evaluation_final.run(model, output_folder=f"results/{model_name}/{layers}/last_layer")
    print(f"{'last layer'.upper()} accuracy = {quality[0].scores['test'][0]['main_score']}")
    print()
    one_w = [1]
    model = CustomModel(model_name=model_name, layers_to_use=[best_layer], layers_weights=one_w, batch_size=256)
    quality = evaluation_final.run(model, output_folder=f"results/{model_name}/{layers}/best_layer")
    print(f"{'best layer'.upper()} accuracy = {quality[0].scores['test'][0]['main_score']}")
    print()
    equal_w = np.array([1] * len(layers)) / len(layers)
    model = CustomModel(model_name=model_name, layers_to_use=layers, layers_weights=equal_w, batch_size=256)
    quality = evaluation_final.run(model, output_folder=f"results/{model_name}/{layers}/equal")
    print(f"{'equal weights'.upper()} accuracy = {quality[0].scores['test'][0]['main_score']}")
    print()
    if len(optuna_weights != 0):
        optuna_w = np.array(optuna_weights) / np.sum(np.array(optuna_weights))
        model = CustomModel(model_name=model_name, layers_to_use=layers, layers_weights=optuna_w, batch_size=256)
        quality = evaluation_final.run(model, output_folder=f"results/{model_name}/{layers}/optuna")
        print(f"{'optuna weights'.upper()} accuracy = {quality[0].scores['test'][0]['main_score']}")
    layers_df = df_optimal_weights[df_optimal_weights.layers == str(layers)]
    for i, row in layers_df.iterrows():
        print()
        weights = row.weights
        model = CustomModel(model_name=model_name, layers_to_use=layers, layers_weights=weights, batch_size=256)
        quality = evaluation_final.run(model, output_folder=f"results/{model_name}/{layers}/{row.correlation}")
        print(f"{row.correlation.upper()} accuracy = {quality[0].scores['test'][0]['main_score']}")

### Test weights

In [None]:
df_optimal_weights = pd.read_csv(weights_path)
if len(optuna_weights) == 0:
    optuna_w = []
else:
    optuna_w = [optuna_weights['w_1'], optuna_weights['w_2'], optuna_weights['w_3'], optuna_weights['w_4'], optuna_weights['w_5'], optuna_weights['w_6'],
                optuna_weights['w_7'], optuna_weights['w_8'], optuna_weights['w_9'], optuna_weights['w_10'], optuna_weights['w_11'], optuna_weights['w_12']]
test_weights(model_name, df_optimal_weights, layers_to_use, best_layer, optuna_w)