In [None]:
import torch
from transformers import AutoTokenizer
from models.diffpool import DiffPoolModel
from models.diffpool.old import DiffPoolModel as OldDiffpoolModel
from metrics import Metrics
import pandas as pd
import numpy as np
from ensembling.embeddings import compute_embeddings
from ensembling.similarities import compute_similarities, load_similarities
from ensembling.fuse import mean_fuse, condorcet_fuse, reciprocal_rank_fuse
from sklearn.metrics import label_ranking_average_precision_score
from scipy.optimize import minimize

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%load_ext autoreload
%autoreload 2

## Define the models in the ensemble

In [None]:
# define the different models in the ensemble

models = [
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-mpnet-base-v2",
        num_node_features=300,
        nout=768,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[30, 10, 3, 1],
        d_encoder_hidden_dims=[300, 600, 1200, 1200],
        d_encoder_linear_layers=[[300], [600], [1200, 600], [1200, 600]],
        d_encoder_num_heads=[3, 6, 12, 12],
        d_encoder_num_layers=[10, 5, 3, 1],
        d_linear=1200,
        dropout=0,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
    ),
    OldDiffpoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 5, 1],
        d_encoder_hidden_dims=[600, 600, 600],
        d_encoder_linear_layers=[[300, 300], [300, 300], [300, 300]],
        d_encoder_num_heads=[3, 3, 3],
        d_encoder_num_layers=[4, 3, 2],
        d_linear=1200,
        dropout=0,
    ),
    DiffPoolModel(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 5, 1],
        d_encoder_hidden_dims=[600, 600, 600],
        d_encoder_linear_layers=[[300, 300], [300, 300], [300, 300]],
        d_encoder_num_heads=[3, 3, 3],
        d_encoder_num_layers=[4, 3, 2],
        d_linear=1200,
        dropout=0,
    ),
]

tokenizers = [
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
]

saved_paths = [
    "./outputs/saved/circle_loss/circle70.pt",
    "./outputs/saved/mpnet/model60.pt",
    "./outputs/saved/diffpool30M/model63.pt",
    "./outputs/saved/best-diffpool-kaggle/model204.pt",
    "./outputs/saved/sofiane/NADAM 0.82/model24.pt",
    "./outputs/saved/sofiane/First diffpool 0.845/model35.pt",
    "./outputs/saved/diffpool87/model54.pt",
    "./outputs/saved/diffpool87other/model70.pt",
]

metrics = [
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
]

skip = [True, True, True, True, True, True, True, True]

In [None]:
# Which models do we actually use ?
models_indices = [0, 1, 2, 3, 4, 5, 6, 7]

## Precompute all the similarities

In [None]:
for split in ["val", "test"]:
    print(split)
    compute_embeddings(
        models,
        tokenizers,
        saved_paths,
        skip,
        split,
    )

In [None]:
for split in ["val", "test"]:
    print(split)
    compute_similarities(metrics, skip, split)

## Find the best weights on validation data using simple method

In [None]:
val_similarities = load_similarities("val", models_indices)

val_similarities = (val_similarities - val_similarities.min(axis=1)[:, None, :]) / (
    val_similarities.max(axis=1)[:, None, :] - val_similarities.min(axis=1)[:, None, :]
)
labels = np.eye(val_similarities.shape[1])

np.set_printoptions(precision=3)


def objective(weights):
    val_aggregation = np.average(val_similarities, axis=0, weights=weights)
    score = label_ranking_average_precision_score(labels, val_aggregation)
    print(f"{score:.4f} | ", weights, end="\r")
    return -score


res = minimize(
    objective, np.random.normal(10, 3, len(models_indices)), method="Nelder-Mead"
)
weights = res.x

# Store the weights for later reference
with open("./outputs/weights", "a") as f:
    f.write(
        f"{str(weights)} | {label_ranking_average_precision_score(labels, mean_fuse(val_similarities, weights))}\n"
    )

## Results with different fusing methods

### Mean fuse

In [None]:
val_similarities = load_similarities("val", models_indices)
val_aggregation = mean_fuse(val_similarities, weights)
print(
    "Validation score :",
    label_ranking_average_precision_score(
        np.eye(len(val_aggregation)), val_aggregation
    ),
)

### Reciprocal rank fuse

In [None]:
val_similarities = load_similarities("val", models_indices)
val_aggregation = reciprocal_rank_fuse(val_similarities, weights)
print(
    "Validation score :",
    label_ranking_average_precision_score(
        np.eye(len(val_aggregation)), val_aggregation
    ),
)

### Condorcet fuse

In [None]:
val_similarities = load_similarities("val", models_indices)
val_aggregation = condorcet_fuse(val_similarities, weights)
print(
    "Validation score :",
    label_ranking_average_precision_score(
        np.eye(len(val_aggregation)), val_aggregation
    ),
)

## Final test results

In [None]:
test_similarities = load_similarities("test", models_indices)
test_aggregation = mean_fuse(test_similarities, weights)

solution = pd.DataFrame(test_aggregation)
solution["ID"] = solution.index
solution = solution[["ID"] + [col for col in solution.columns if col != "ID"]]
solution.to_csv("outputs/ensemble_solution.csv", index=False)