This notebook contains the calculation of the values of representational similarity measures between each pair of model layers, as well as the quality of each layer in the training sample, which are later saved in two csv files. In the cell below, specify the name of the task in the MTEB benchmark, as well as the name of the BERT-like model in the Transformers library. It is important that the corresponding dataset in MTEB has not only the test part of the sample $-$  this can beseend in the benchmark space on huggingface.

### Preparation

In [None]:
task_name = "STSBenchmark" # your mteb task name
model_name = "bert-base-uncased" # your model name
path_train_quality = f"{task_name}_{model_name}_layer_quality_train.csv"
path_correlation = f"{task_name}_{model_name}_correlation.csv"

In [None]:
from IPython.display import clear_output
!pip install mteb
clear_output()

In [None]:
import mteb

In [None]:
task = mteb.get_task(task_name, eval_splits=["train"]) 
evaluation = mteb.MTEB(tasks=[task])

In [None]:
!git clone https://github.com/mklabunde/resi

In [None]:
%cd /kaggle/working/resi
!pip install -r requirements.txt
!pip install -e .
clear_output()

In [None]:
from repsim.measures import HardCorrelationMatch, \
    DistanceCorrelation, JaccardSimilarity, SecondOrderCosineSimilarity, \
    AlignedCosineSimilarity, RSA, ConcentricityDifference, EigenspaceOverlapScore, \
    SVCCA, RankSimilarity

class Measurer:
    def __init__(self):
        self.hard_corr = HardCorrelationMatch()
        self.dist_corr = DistanceCorrelation()
        self.jaccard = JaccardSimilarity()
        self.second_order = SecondOrderCosineSimilarity()
        self.aligned_cosine = AlignedCosineSimilarity()
        self.rsa = RSA()
        self.conc_dist = ConcentricityDifference()
        self.eos = EigenspaceOverlapScore()
        self.svcca = SVCCA()
        self.rank_sim = RankSimilarity()

    def __call__(self, R, Rp):
        return {
            'hard_correlation': self.hard_corr(R, Rp, shape='nd'),
            'distance_correlation': self.dist_corr(R, Rp, shape='nd'),
            'jaccard': self.jaccard(R, Rp, shape='nd'),
            'second_order': self.second_order(R, Rp, shape='nd'),
            'aligned_cosine': self.aligned_cosine(R, Rp, shape='nd'),
            'rsa': self.rsa(R, Rp, shape='nd'),
            'concentricity_distance': self.conc_dist(R.cpu().numpy(), Rp.cpu().numpy(), shape='nd'),
            'eos': self.eos(R, Rp, shape='nd'),
            'svcca': self.svcca(R, Rp, shape='nd'),
            'rank_similarity': self.rank_sim(R, Rp, shape='nd'),
        }

## Measure layers quality on train part

In [None]:
from mteb.encoder_interface import PromptType
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
import torch
import pandas as pd

class CustomModel:
    def __init__(self, comb_dict, model_name="bert-base-uncased", device="cuda" if torch.cuda.is_available() else "cpu", layers_to_use=[-1], batch_size=256):
        self.model_name = model_name
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModel.from_pretrained(self.model_name, output_hidden_states=True).to(self.device)
        self.layers_to_use = layers_to_use
        self.batch_size = batch_size
        self.comb_dict = comb_dict

    def get_layer_embedding(self, batch: list[str], layers: list[int]) -> np.ndarray:
        encoded_inputs = self.tokenizer(
            batch, padding=True, truncation=True, return_tensors="pt"
        ).to(self.device)
        model_output = self.model(**encoded_inputs)
        hidden_states = model_output.hidden_states[1:]
        layers_output = [hidden_states[i] for i in layers]

        pooled_layers = []
        for layer_output in layers_output:
            input_mask_expanded = encoded_inputs['attention_mask'].unsqueeze(-1).expand(layer_output.size()).float()
            sum_embeddings = torch.sum(layer_output * input_mask_expanded, 1)
            sum_mask = input_mask_expanded.sum(1)
            mean_pooled = sum_embeddings / sum_mask
            pooled_layers.append(mean_pooled * 0.5)

        return torch.sum(torch.stack(pooled_layers), dim=0)

    def encode(
        self,
        sentences: list[str],
        task_name: str,
        prompt_type: PromptType | None = None,
        **kwargs,
    ) -> np.ndarray:
        """Encodes the given sentences using the encoder.

        Args:
            sentences: The sentences to encode.
            task_name: The name of the task.  (Not directly used in this example, but kept for MTEB compatibility)
            prompt_type: The prompt type to use. (Not directly used in this example, but kept for MTEB compatibility)
            **kwargs: Additional arguments to pass to the encoder.

        Returns:
            The encoded sentences as a numpy array.
        """
        self.model.eval()
        with torch.no_grad():
            all_embeddings = []
            for i in range(0, len(sentences), self.batch_size):
                batch = sentences[i:i + self.batch_size]
                combination = self.get_layer_embedding(batch, self.layers_to_use)
                combination = combination.cpu().numpy()
                all_embeddings.append(combination)
        return np.concatenate(all_embeddings, axis=0)

In [None]:
df_quality = pd.DataFrame(columns=['layer', 'accuracy'])
def run_experiment(layer_1: int, layer_2: int, comb_df: pd.DataFrame):
    comb_dict = {'hard_correlation': [],
            'distance_correlation': [],
            'jaccard': [],
            'second_order': [],
            'aligned_cosine':  [],
            'rsa': [],
            'concentricity_distance': [],
            'eos': [], 'svcca': [], 'rank_similarity': [], 'rtd': []}
    model = CustomModel(comb_dict, model_name=model_name, layers_to_use=[layer_1, layer_2])
    quality = evaluation.run(model, output_folder=f"results/{model_name}/layers_{layer_1}_{layer_2}")
    print(f'LAYERS {layer_1} and {layer_2}')
    print(f"accuracy = {quality[0].scores['train'][0]['main_score']}")
    df_quality.loc[len(df_quality.index)] = [layer_1, quality[0].scores['train'][0]['main_score']]
    del model
    del comb_dict
    del quality

In [None]:
pairs = [(i, i) for i in range(0, 12)]
df_comb = pd.DataFrame(columns=['layer_1', 'layer_2', 'accuracy', 'hard_correlation', 'distance_correlation', 'jaccard', 'second_order', 'aligned_cosine', 'rsa', 'concentricity_distance', 'eos', 'svcca', 'rank_similarity', 'rtd'])

for layer_1, layer_2 in tqdm(pairs):
    run_experiment(layer_1, layer_2, df_comb)

df_quality.to_csv(path_train_quality, index=None)

## Count correlation metrics on train part

In [None]:
from mteb.encoder_interface import PromptType
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
import torch
import pandas as pd

class CustomModel:
    def __init__(self, comb_dict, model_name="bert-base-uncased", device="cuda" if torch.cuda.is_available() else "cpu", layers_to_use=[-1], batch_size=256):
        self.model_name = model_name
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModel.from_pretrained(self.model_name, output_hidden_states=True).to(self.device)
        self.layers_to_use = layers_to_use
        self.batch_size = batch_size
        self.measurer = Measurer()
        self.comb_dict = comb_dict

    def get_layer_embedding(self, batch: list[str], layers: list[int]) -> np.ndarray:
        encoded_inputs = self.tokenizer(
            batch, padding=True, truncation=True, return_tensors="pt"
        ).to(self.device)
        model_output = self.model(**encoded_inputs)
        hidden_states = model_output.hidden_states[1:]
        layers_output = [hidden_states[i] for i in layers]

        pooled_layers = []
        for layer_output in layers_output:
            input_mask_expanded = encoded_inputs['attention_mask'].unsqueeze(-1).expand(layer_output.size()).float()
            sum_embeddings = torch.sum(layer_output * input_mask_expanded, 1)
            sum_mask = input_mask_expanded.sum(1)
            mean_pooled = sum_embeddings / sum_mask
            pooled_layers.append(mean_pooled)
        metrics = {}
        for i in range(0, len(layers) - 1):
            metrics[i] = {}
            for j in range(i + 1, len(layers)):
                metrics[i][j] = self.measurer(pooled_layers[i], pooled_layers[j])
        return torch.sum(torch.stack(pooled_layers), dim=0), metrics

    def encode(
        self,
        sentences: list[str],
        task_name: str,
        prompt_type: PromptType | None = None,
        **kwargs,
    ) -> np.ndarray:
        """Encodes the given sentences using the encoder.

        Args:
            sentences: The sentences to encode.
            task_name: The name of the task.  (Not directly used in this example, but kept for MTEB compatibility)
            prompt_type: The prompt type to use. (Not directly used in this example, but kept for MTEB compatibility)
            **kwargs: Additional arguments to pass to the encoder.

        Returns:
            The encoded sentences as a numpy array.
        """
        self.model.eval()
        with torch.no_grad():
            all_embeddings = []
            for i in tqdm(range(0, len(sentences), self.batch_size)):
                batch = sentences[i:i + self.batch_size]
                combination, comb_metrics = self.get_layer_embedding(batch, self.layers_to_use)
                combination = combination.cpu().numpy()
                for key1 in comb_metrics:
                    for key2 in comb_metrics[key1]:
                        for m, value in comb_metrics[key1][key2].items():
                            self.comb_dict[key1][key2][m].append(value)
                all_embeddings.append(combination)
        return np.concatenate(all_embeddings, axis=0)

In [None]:
comb_dict = {}
for i in range(0, 11):
    comb_dict[i] = {}
    for j in range(i + 1, 12):
        comb_dict[i][j] = {'hard_correlation': [],
            'distance_correlation': [],
            'jaccard': [],
            'second_order': [],
            'aligned_cosine':  [],
            'rsa': [],
            'concentricity_distance': [],
            'eos': [], 'svcca': [], 'rank_similarity': []} 
model = CustomModel(comb_dict, model_name=model_name, layers_to_use=list(range(0, 12)))
quality = evaluation.run(model, output_folder=f"results/{model_name}/all_layers")
print(f'ALL LAYERS EQUAL')
print(f"accuracy = {quality[0].scores['train'][0]['main_score']}")

In [None]:
for layer_1 in comb_dict:
    for layer_2 in comb_dict[layer_1]:
        print(f'LAYERS {layer_1} and {layer_2}')
        pair_metrics = {}
        for m in comb_dict[layer_1][layer_2]:
            values = np.array(comb_dict[layer_1][layer_2][m])
            pair_metrics[m] = np.mean(values)
            print(f'METRIC {m} value = {np.mean(values)}')

In [None]:
df_comb = pd.DataFrame(columns=['layer_1', 'layer_2', 'hard_correlation', 'distance_correlation', 'jaccard', 'second_order', 'aligned_cosine', 'rsa', 'concentricity_distance', 'eos', 'svcca', 'rank_similarity']) #, 'rtd'

for layer_1 in comb_dict:
    for layer_2 in comb_dict[layer_1]:
        pair_metrics = {}
        for m in comb_dict[layer_1][layer_2]:
            values = np.array(comb_dict[layer_1][layer_2][m])
            pair_metrics[m] = np.mean(values)
        df_comb.loc[len(df_comb.index)] = [layer_1, layer_2, pair_metrics['hard_correlation'], pair_metrics['distance_correlation'], pair_metrics['jaccard'], \
                                          pair_metrics['second_order'], pair_metrics['aligned_cosine'], pair_metrics['rsa'], pair_metrics['concentricity_distance'], \
                                          pair_metrics['eos'], pair_metrics['svcca'], pair_metrics['rank_similarity']] #, pair_metrics['rtd']


df_comb.to_csv(path_correlation, index=None)