In [21]:
import transformers
from transformers import pipeline, AutoModel, AutoTokenizer

from peft import PeftConfig, PeftModel, LoraConfig, TaskType

from datasets import load_dataset

import numpy as np

import torch

from sklearn.model_selection import KFold, cross_val_predict, GridSearchCV
import time

from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr
from sklearn.metrics.pairwise import cosine_similarity

#### First of all load the PEFT models and get the sentence embeddings

In [22]:
from transformers import AutoModelForCausalLM, DebertaV2ForMaskedLM
from transformers import AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoModelForTokenClassification
from transformers import DebertaV2ForQuestionAnswering, DebertaV2ForSequenceClassification, DebertaV2ForTokenClassification

# BASELINE MODELS

# repo_name = "bert-base-uncased"
# repo_name = "roberta-base"
# repo_name = "microsoft/deberta-base"
# repo_name = "microsoft/deberta-v3-base"
# repo_name = "google/electra-base-generator"
# repo_name = "facebook/bart-base"
# repo_name = "gpt2"


# TESTING THE MODELS. MODELS ARE OPTIMIZED WITH PEFT LIBRARY, USING LORA WITH CAUSAL LM AND RANK=8

# repo_name = "alitolga/bert-base-uncased-large-peft"
# repo_name = "alitolga/gpt2-large-peft"
# repo_name = "alitolga/bart-base-large-peft"
# repo_name = "alitolga/roberta-base-large-peft"
# repo_name = "alitolga/deberta-base-large-peft"
# repo_name = "alitolga/deberta-v3-base-large-peft"
repo_name = "alitolga/electra-base-generator-large-peft"


# MODELS FOR TESTING THE RANK

# repo_name = "alitolga/deberta-v3-base-RealRank1"
# repo_name = "alitolga/deberta-v3-base-rank2"
# repo_name = "alitolga/deberta-v3-base-rank4"
# repo_name = "alitolga/deberta-v3-base-rank8"
# repo_name = "alitolga/deberta-v3-base-rank16"
# repo_name = "alitolga/deberta-v3-base-rank32"
# repo_name = "alitolga/deberta-v3-base-rank64"
# repo_name = "alitolga/deberta-v3-base-Rank1"

# repo_name = "alitolga/electra-base-generator-Rank1"
# repo_name = "alitolga/electra-base-generator-rank2"
# repo_name = "alitolga/electra-base-generator-rank4"
# repo_name = "alitolga/electra-base-generator-rank8"
# repo_name = "alitolga/electra-base-generator-rank16"
# repo_name = "alitolga/electra-base-generator-rank32"
# repo_name = "alitolga/electra-base-generator-rank64"
# repo_name = "alitolga/electra-base-generator-Rank128"

# MODELS FOR TESTING THE TASK TYPE

# repo_name = "alitolga/deberta-v3-base-FeatureExtraction"
# repo_name = "alitolga/deberta-v3-base-QuestionAns"
# repo_name = "alitolga/deberta-v3-base-SeqClassification"
# repo_name = "alitolga/deberta-v3-base-Wnut-TokenClassification"

# repo_name = "alitolga/electra-base-generator-FeatureExtraction"
# repo_name = "alitolga/electra-base-generator-QuestionAns"
# repo_name = "alitolga/electra-base-generator-SeqClassification"
# repo_name = "alitolga/electra-base-generator-Wnut-TokenClassification"

In [23]:
config = PeftConfig.from_pretrained(repo_name)

base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
# base_model = AutoModelForQuestionAnswering.from_pretrained(config.base_model_name_or_path)
# base_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
# base_model = AutoModelForTokenClassification.from_pretrained(config.base_model_name_or_path, num_labels=13)


# base_model = DebertaV2ForMaskedLM.from_pretrained(config.base_model_name_or_path)
# base_model = DebertaV2ForQuestionAnswering.from_pretrained(config.base_model_name_or_path)
# base_model = DebertaV2ForSequenceClassification.from_pretrained(config.base_model_name_or_path)
# base_model = DebertaV2ForTokenClassification.from_pretrained(config.base_model_name_or_path)


tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# model = PeftModel.from_pretrained(base_model, repo_name, config=config)
model = PeftModel.from_pretrained(base_model, repo_name)

model.print_trainable_parameters()

If you want to use `ElectraForCausalLM` as a standalone, add `is_decoder=True.`


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

trainable params: 0 || all params: 33,838,906 || trainable%: 0.0


#### Load the Dataset

In [24]:
# Specify the dataset name
dataset_name = "helena-balabin/pereira_fMRI_sentences"

# Specify the path to save or load the dataset
save_path = "./data"

# Load the dataset, use the cache if available
dataset = load_dataset(dataset_name, cache_dir=save_path)

In [25]:
sentences = dataset["train"]["sentences"]
sentences_zero = sentences[0] # 0th subject
print(len(sentences_zero))

384


### Extra cell to use SentEval library for evaluating Embeddings

In [26]:
# from __future__ import absolute_import, division

# import os
# import sys
# import logging

# # Set PATHs
# PATH_TO_SENTEVAL = 'SentEval/'
# PATH_TO_DATA = 'SentEval/data'

# # import SentEval
# sys.path.insert(0, PATH_TO_SENTEVAL)
# import senteval


# # SentEval prepare and batcher
# def prepare(params, samples):
#     return

# def batcher(params, batch):
#     se_sentences = [sent if sent != [] else ['.'] for sent in batch]
#     sentences_str = [' '.join(sentence) for sentence in se_sentences]
#     if tokenizer.pad_token is None:
#         tokenizer.pad_token = tokenizer.eos_token
#     inputs = tokenizer(sentences_str, return_tensors='pt', truncation=True, padding=True)
#     outputs = model(**inputs, output_hidden_states=True)
#     hidden_states = outputs.hidden_states
#     embeddings = torch.mean(hidden_states[0], dim=1)
#     return embeddings



# # Set params for SentEval
# params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
# params_senteval['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
#                                  'tenacity': 3, 'epoch_size': 2}

# # Set up logger
# logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)

# if __name__ == "__main__":
#     se = senteval.engine.SE(params_senteval, batcher, prepare)
#     transfer_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16',
#                       'MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
#                       'SICKEntailment', 'SICKRelatedness', 'STSBenchmark',
#                       'Length', 'WordContent', 'Depth', 'TopConstituents',
#                       'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber',
#                       'OddManOut', 'CoordinationInversion']
#     results = se.eval(transfer_tasks)
#     print(results)

#### Get the sentence embeddings from the Peft model

In [27]:
def get_embeddings(sentence):
    inputs = tokenizer(sentence, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs, output_hidden_states=True)
    
    hidden_states = outputs.hidden_states

    embeddings = torch.mean(hidden_states[0], dim=1)

    return embeddings

In [28]:
embeddings = get_embeddings(sentences_zero)

In [29]:
embeddings = embeddings.detach().numpy()
print(embeddings.shape)

(384, 256)


In [30]:
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

# Assuming Y is your (384, embedding_size) array of sentence embeddings
# embedding_size = 100  # Example embedding size

# Calculate the mean embedding across all sentences
mean_embedding = np.mean(embeddings, axis=0)

# Calculate Euclidean distances of each embedding to the mean embedding
distances = euclidean_distances(embeddings, mean_embedding.reshape(1, -1)).flatten()

# Sort embeddings by their distance to the mean embedding
sorted_indices = np.argsort(distances)

# Calculate the number of sentences per batch for 5 equal-sized folds
batch_size = len(embeddings) // 5

# Distribute sorted indices into 5 batches
batches = [sorted_indices[i::5] for i in range(5)]

# Each batch now acts as a fold, and every fold contains sentences from all ranges of distances
fold_indices = batches  # For clarity, as each batch is essentially a fold

# Verify one of the folds
print(f"Example fold indices: {fold_indices[0]}")
print(f"Number of sentences in the example fold: {len(fold_indices[0])}")


Example fold indices: [240 369 260 169 245 217 378 129 308 374 287  38  67   8 116 247 140   9
 218 112  10  66 265 242  82  93  96 107  37 251  26  94 161 191 234 194
  43 317 178 244 357 149 371 281  48 286 285 207 214 259  33 348  52 147
 352  75 209 329 202 155 223 341  32 159 319  65 268 200 365 238 382 360
 243  72 133 300 162]
Number of sentences in the example fold: 77


In [31]:
# tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token

'[PAD]'

#### Do the Brain Decoding Part

In [32]:
# Get the voxels. For simplicity we start with all the brain regions
fmri_data = dataset["train"]["all"]

# fMRI data of the first subject out of 8
subject_zero = np.array(fmri_data[0])
print(subject_zero.shape)

(384, 195127)


In [33]:
print((fmri_data[0][0][0:5]))
print((fmri_data[1][0][0:5]))
print((fmri_data[2][0][0:5]))
print(len(fmri_data[0][0]))
print(len(fmri_data[1][0]))
print(len(fmri_data[2][0]))


[5.112634075021202, -2.4816354533853904, 6.301162322570788, -0.4318679637650589, -1.5139628065971713]
[-0.9263837702103469, -0.9160614702604, 0.5151801601149782, -0.8384103923678308, -0.9219541666858164]
[0.5874925274376781, 1.1961929669346083, 0.09873265732566508, -1.0566230101831044, 1.607860846171499]
195127
177341
185703


In [34]:
from sklearn.decomposition import PCA

# Choose the number of components to keep
n_components = 10  # Example: Keeping the top 10 components

subjects_pca_data = []

for subject in fmri_data:
    pca = PCA(n_components=n_components)
    # Flatten the data for PCA if it's 3D (time x width x height) or 4D (time x width x height x depth)
    # For simplicity, we assume 2D data (time x features). Adjust the reshape accordingly for 3D/4D data.
    # subject_flat = subject.reshape(subject.shape[0], -1)  # Reshape to (time, features)
    subject_pca = pca.fit_transform(subject)

    subjects_pca_data.append(np.array(subject_pca))

subjects_pca_data = np.array(subjects_pca_data)
print(subjects_pca_data.shape)

(8, 384, 10)


In [37]:
from sklearn.model_selection import GridSearchCV, LeaveOneOut
from sklearn.linear_model import Ridge
import numpy as np
from sklearn.model_selection import cross_val_predict
import time


# Function to create a custom cross-validator from pre-defined folds
def custom_cv(folds):
    for i in range(len(folds)):
        train_indices = np.concatenate([folds[j] for j in range(len(folds)) if j != i])
        test_indices = folds[i]
        yield train_indices, test_indices

# Use the custom cross-validator for your inner CV
inner_cv = custom_cv(fold_indices)

# # For inner CV
# n_folds = 5  
# state = int(time.time())
# inner_cv = KFold(n_splits=n_folds, shuffle=True, random_state=state)

n_jobs = 4

# Candidate ridge regression regularization parameters.
ALPHAS = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e1]

# Assuming `subjects` is a list or array that indicates the subject number for each sample in X
# For example, subjects = [0, 0, ..., 1, 1, ..., 7, 7] if the first set of rows in X belong to subject 0, and so on.
# Adjust this to match how your data is structured.

# LeaveOneSubjectOut is not directly available, so we simulate it using LeaveOneOut,
# assuming each "fold" is a subject.
loo = LeaveOneOut()

# Placeholder for decoder predictions
decoder_predictions = []

# Placeholder for test indices
test_indices = []

Y = embeddings - embeddings.mean(axis=0)
Y = Y / np.linalg.norm(Y, axis=1, keepdims=True)

# Loop over each fold (subject) for leave-one-subject-out CV
for train_index, test_index in loo.split(np.arange(8)):

    # # Extract training and test data for this fold
    # # This concatenates along the sentence dimension for training data
    # X_train = np.concatenate(subjects_pca_data[train_index], axis=0)

    # Extract training data for components and take the mean across subjects
    # This averages the component data across the 7 training subjects for each sentence
    X_train = np.mean(subjects_pca_data[train_index], axis=0)  # Taking mean across the subject dimension


    X_test = subjects_pca_data[test_index].squeeze(axis=0)

    print(X_test.shape)
    print(X_train.shape)

    X_train = X_train - X_train.mean(axis=0)
    X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
    X_test = X_test - X_test.mean(axis=0)
    X_test = X_test / np.linalg.norm(X_test, axis=1, keepdims=True)
    
    # Run inner CV for hyperparameter tuning
    gs = GridSearchCV(Ridge(fit_intercept=False),
                      {"alpha": ALPHAS}, cv=inner_cv, n_jobs=n_jobs, verbose=10)
    

    # Replicate Y to match the expanded X_train
    # Since we concatenated sentence data for 7 subjects, replicate Y 7 times
    # Y_train = np.tile(Y, (len(train_index), 1))  # Replicating Y for each subject in train_index

    # Fit on the training set
    gs.fit(X_train, Y)

    # Predict on the test set
    Y_pred = gs.predict(X_test)

    # Store predictions
    decoder_predictions.append(Y_pred)
    

# Convert predictions list to an array for further analysis
decoder_predictions = np.array(decoder_predictions)
decoder_predictions_concat = np.concatenate(decoder_predictions)

print(decoder_predictions.shape)
print(decoder_predictions_concat.shape)


(384, 10)
(384, 10)
Fitting 5 folds for each of 8 candidates, totalling 40 fits
(384, 10)
(384, 10)
Fitting 0 folds for each of 8 candidates, totalling 0 fits


ValueError: No fits were performed. Was the CV iterator empty? Were there no candidates?

In [None]:
from scipy.spatial.distance import cosine

### Evaluate for each subject.

for subject in np.arange(8):
    predictions = decoder_predictions[subject]

    # Evaluate the performance (e.g., using mean squared error)
    mse = mean_squared_error(Y, predictions)
    # print(f"Mean Squared Error {subject}: {mse}")

    r2 = r2_score(Y, predictions)
    # print(f"R-squared (R2) Score {subject}: {r2}")

    # Pearson Correlation Coefficient
    res = [pearsonr(t, p).statistic for t, p in zip(Y, predictions)]
    pearson_corr = np.mean(res)
    print(f"Pearson Correlation Coefficient {subject}: {pearson_corr}")

    # Cosine Similarity
    cosine_sim = np.mean(cosine_similarity(predictions, Y))
    # print(f"Cosine Similarity {subject}: {cosine_sim}")

    # Pairwise Accuracy
    res = [ cosine(predictions[i], Y[i]) + cosine(predictions[j], Y[j]) < cosine(predictions[i], Y[j]) + cosine(predictions[j], Y[i])
            for i in range(len(predictions))
            for j in range(i + 1, len(predictions))
        ]
    pairwise_acc = sum(res) / len(res)
    print(f"Pairwise Accuracy {subject}: {pairwise_acc}")

Pearson Correlation Coefficient 0: 0.0067918952232561445
Pairwise Accuracy 0: 0.5043380113141862
Pearson Correlation Coefficient 1: 0.017882779115256358
Pairwise Accuracy 1: 0.522927545691906
Pearson Correlation Coefficient 2: 0.019916078379573886
Pairwise Accuracy 2: 0.5302164926022629
Pearson Correlation Coefficient 3: 0.028737449491400235
Pairwise Accuracy 3: 0.5516345735422106
Pearson Correlation Coefficient 4: 0.006331650129180452
Pairwise Accuracy 4: 0.5232131201044387
Pearson Correlation Coefficient 5: -0.036789915591514115
Pairwise Accuracy 5: 0.4618146214099217
Pearson Correlation Coefficient 6: 0.000585922346742577
Pairwise Accuracy 6: 0.5036988685813751
Pearson Correlation Coefficient 7: 4.379542212248321e-05
Pairwise Accuracy 7: 0.50707136640557


In [None]:
# Normalize the embeddings
# embeddings_normed = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

In [None]:
# Prepare nested CV.
# Inner CV is responsible for hyperparameter optimization;
# outer CV is responsible for prediction.

n_folds = 5

state = int(time.time())
inner_cv = KFold(n_splits=n_folds, shuffle=True, random_state=state)
outer_cv = KFold(n_splits=n_folds, shuffle=True, random_state=state)

# Final data prep: normalize.
X = voxels - voxels.mean(axis=0)
X = X / np.linalg.norm(X, axis=1, keepdims=True)
Y = embeddings - embeddings.mean(axis=0)
Y = Y / np.linalg.norm(Y, axis=1, keepdims=True)

In [None]:
######## Run learning.

n_jobs = 4

# Candidate ridge regression regularization parameters.
ALPHAS = [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e1]

# Run inner CV.
gs = GridSearchCV(Ridge(fit_intercept=False),
                {"alpha": ALPHAS}, cv=inner_cv, n_jobs=n_jobs, verbose=10)

"""
Purpose of This Line

Nested Cross-Validation:

The use of cross_val_predict with GridSearchCV (gs in this context) as the estimator 
is a part of a nested cross-validation strategy. 
The key purpose here is to evaluate the model's performance in a way that is as unbiased as possible.

Independent Data Splits:

The outer cross-validation (cv=outer_cv) splits the dataset into training and test sets multiple times 
(based on the number of folds in outer_cv). For each of these splits, 
the inner cross-validation (within GridSearchCV) finds the best alpha value. 

This process ensures that the choice of hyperparameters (alpha in this case) is not biased by the 
particular split of data used for model training and evaluation.

Generating Unbiased Predictions:

cross_val_predict does not simply fit the model but generates predictions for each point 
when it is in the test set of the outer cross-validation. 
These predictions are made by a model that has never seen the data point during training, 
thereby providing an unbiased estimate of the model's performance on unseen data.
"""

In [None]:
# Run outer CV.
decoder_predictions = cross_val_predict(gs, X, Y, cv=outer_cv)

In [None]:
# print(decoder_predictions.shape)
# print(Y.shape)

In [None]:
# def pairwise_accuracy(y_true, y_pred):
#     n = len(y_true)
#     correct_pairs = 0
#     total_pairs = 0

#     for i in range(n):
#         for j in range(i + 1, n):
#             if (y_true[i] > y_true[j]) == (y_pred[i] > y_pred[j]):
#                 correct_pairs += 1
#             total_pairs += 1

#     return correct_pairs / total_pairs if total_pairs > 0 else 0

# # Example usage:
# # y_true = [actual values]
# # y_pred = [predicted values]
# # accuracy = pairwise_accuracy(y_true, y_pred)
# # print("Pairwise Accuracy:", accuracy)

# def pairwise_accuracy_efficient(y_true, y_pred):
#     y_true = np.array(y_true)
#     y_pred = np.array(y_pred)

#     # Create a matrix of differences for true labels
#     diff_true = np.subtract.outer(y_true, y_true) > 0

#     # Create a matrix of differences for predictions
#     diff_pred = np.subtract.outer(y_pred, y_pred) > 0

#     # Count the number of pairs that agree in order
#     correct_pairs = np.sum(diff_true == diff_pred)

#     # Total number of pairs
#     total_pairs = len(y_true) * (len(y_true) - 1) / 2

#     print(correct_pairs)
#     print(total_pairs)

#     return correct_pairs / total_pairs if total_pairs > 0 else 0

In [None]:
from sklearn.base import BaseEstimator, clone
from scipy.spatial.distance import cosine

In [None]:
# def pairwise_accuracy(
#     estimator: BaseEstimator = None,
#     X: torch.Tensor = None,  # noqa
#     y: torch.Tensor = None,
#     topic_ids: torch.Tensor = None,
#     scoring_variation: str = None,  # type: ignore
# ) -> float:
#     """Calculate the average pairwise accuracy of all pairs of true and predicted vectors.

#     Based on the pairwise accuracy as defined in Oota et al. 2022, Sun et al. 2021, Pereira et al. 2018.

#     :param estimator: Estimator object (e.g., a Ridge regression)
#     :type estimator: BaseEstimator
#     :param X: Sentence embeddings used as a basis to predict MRI vectors with the estimator
#     :type X: torch.Tensor
#     :param y: True MRI vectors
#     :type y: torch.Tensor
#     :param topic_ids: Topic IDs for each paragraph
#     :type topic_ids: torch.Tensor
#     :param scoring_variation: Variation of the scoring function, defaults to None
#     :type scoring_variation: str
#     :return: Average pairwise accuracy from all possible sentence pairs
#     :rtype: float
#     """
#     pred = estimator.predict(X)  # noqa

#     if scoring_variation == "same-topic":
#         # Calculate pairwise accuracy for same-topic sentences
#         res = [
#             cosine(pred[i], y[i]) + cosine(pred[j], y[j]) < cosine(pred[i], y[j]) + cosine(pred[j], y[i])
#             for i in range(len(X))
#             for j in range(i + 1, len(X)) if topic_ids[i] == topic_ids[j]
#         ]
#     elif scoring_variation == "different-topic":
#         # Calculate pairwise accuracy for different-topic sentences
#         res = [
#             cosine(pred[i], y[i]) + cosine(pred[j], y[j]) < cosine(pred[i], y[j]) + cosine(pred[j], y[i])
#             for i in range(len(X))
#             for j in range(i + 1, len(X)) if topic_ids[i] != topic_ids[j]
#         ]
#     else:
#         # See for all possible sentence pairings: Is the distance between the correct matches of predicted and X
#         # sentences smaller than the distance between pairings of X and predicted vectors from different sentences?
#         res = [
#             cosine(pred[i], y[i]) + cosine(pred[j], y[j]) < cosine(pred[i], y[j]) + cosine(pred[j], y[i])
#             for i in range(len(X))
#             for j in range(i + 1, len(X))
#         ]

#     # Return the fraction of instances for which the condition holds versus all possible pairs
#     return sum(res) / len(res)


# def pearson_scoring(
#     estimator: BaseEstimator = None,
#     X: torch.Tensor = None,  # noqa
#     y: torch.Tensor = None,
# ) -> float:
#     """Calculate the average pearson correlation for the given set of true and predicted MRI vectors.

#     :param estimator: Estimator object (e.g., a Ridge regression)
#     :type estimator: BaseEstimator
#     :param X: Sentence embeddings used as a basis to predict MRI vectors with the estimator
#     :type X: torch.Tensor
#     :param y: True MRI vectors
#     :type y: torch.Tensor
#     :return: Average pearson correlation from all pairs of predicted and true MRI vectors
#     :rtype: float
#     """
#     pred = estimator.predict(X)  # noqa

#     # See for all possible sentence pairings: Is the distance between the correct matches of predicted and X
#     # sentences smaller than the distance between pairings of X and predicted vectors from different sentences?
#     res = [pearsonr(t, p).statistic for t, p in zip(y, pred)]

#     # Return the fraction of instances for which the condition holds versus all possible pairs
#     return np.mean(res)  # noqa

In [None]:
######### Evaluate.

Y_flatten = Y.flatten()
pred_flatten = decoder_predictions.flatten()

# Evaluate the performance (e.g., using mean squared error)
mse = mean_squared_error(Y, decoder_predictions)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(Y, decoder_predictions)
print(f"R-squared (R2) Score: {r2}")

# Pearson Correlation Coefficient
res = [pearsonr(t, p).statistic for t, p in zip(Y, decoder_predictions)]
pearson_corr = np.mean(res)
print(f"Pearson Correlation Coefficient: {pearson_corr}")

# Cosine Similarity
cosine_sim = np.mean(cosine_similarity(decoder_predictions, Y))
print(f"Cosine Similarity: {cosine_sim}")

# Pairwise Accuracy
res = [ cosine(decoder_predictions[i], Y[i]) + cosine(decoder_predictions[j], Y[j]) < cosine(decoder_predictions[i], Y[j]) + cosine(decoder_predictions[j], Y[i])
        for i in range(len(X))
        for j in range(i + 1, len(X))
    ]
pairwise_acc = sum(res) / len(res)
print(f"Pairwise Accuracy: {pairwise_acc}")