In [None]:
import numpy as np
import pandas as pd
import torch
from scipy.optimize import minimize
from sklearn.metrics import label_ranking_average_precision_score
from transformers import AutoTokenizer

from ensembling.embeddings import compute_embeddings
from ensembling.fuse import (
    condorcet_fuse,
    identity_norm,
    max_norm,
    mean_fuse,
    min_max_norm,
    prod_fuse,
    rank_norm,
    reciprocal_rank_fuse,
    strange_norm,
    sum_norm,
    zmuv_norm,
)
from ensembling.similarities import compute_similarities, load_similarities
from metrics import Metrics
from models import DiffPoolModel, GATModel

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
np.set_printoptions(precision=4, linewidth=200)

## Define the models in the ensemble

In [None]:
# define the different models in the ensemble

models = [
    DiffPoolModel(  # diffpool-base
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
    ),
    DiffPoolModel(  # diffpool-big
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[30, 10, 3, 1],
        d_encoder_hidden_dims=[300, 600, 1200, 1200],
        d_encoder_linear_layers=[[300], [600], [1200, 600], [1200, 600]],
        d_encoder_num_heads=[3, 6, 12, 12],
        d_encoder_num_layers=[10, 5, 3, 1],
        d_linear=1200,
        dropout=0,
    ),
    DiffPoolModel(  # diffpool-linear
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 5, 1],
        d_encoder_hidden_dims=[600, 600, 600],
        d_encoder_linear_layers=[[300, 300], [300, 300], [300, 300]],
        d_encoder_num_heads=[3, 3, 3],
        d_encoder_num_layers=[4, 3, 2],
        d_linear=1200,
        dropout=0,
    ),
    DiffPoolModel(  # diffpool-shallow
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[20, 3],
        d_encoder_hidden_dims=[1200, 1200],
        d_encoder_linear_layers=[[150, 150, 150, 150], [150, 150, 150, 150]],
        d_encoder_num_heads=[5, 5],
        d_encoder_num_layers=[4, 3],
        d_linear=2000,
        dropout=0,
    ),
    DiffPoolModel(  # diffpool-deep
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 10, 5, 1],
        d_encoder_hidden_dims=[600, 600, 600, 600],
        d_encoder_linear_layers=[
            [150, 150, 150],
            [150, 150, 150],
            [150, 150, 150],
            [150, 150, 150],
        ],
        d_encoder_num_heads=[3, 3, 3, 3],
        d_encoder_num_layers=[4, 3, 2, 2],
        d_linear=700,
        dropout=0,
    ),
    GATModel(  # gat
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_hidden_dim=1200,
        num_layers=3,
        num_heads=6,
        d_linear_layers=[
            1200,
            600,
        ],
        dropout=0,
        activation="LeakyReLU",
    ),
    DiffPoolModel(  # diffpool-large
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 5, 1],
        d_encoder_hidden_dims=[1200, 1200, 1200],
        d_encoder_linear_layers=[[1200, 600], [1200, 600], [1200, 600]],
        d_encoder_num_heads=[6, 6, 6],
        d_encoder_num_layers=[5, 3, 2],
        d_linear=1200,
        dropout=0.01,
    ),
    DiffPoolModel(  # diffpool-medium
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        num_node_features=300,
        nout=384,
        d_pooling_layers=[15, 5, 1],
        d_encoder_hidden_dims=[600, 600, 600],
        d_encoder_linear_layers=[[600, 300], [600, 300], [600, 300]],
        d_encoder_num_heads=[6, 6, 6],
        d_encoder_num_layers=[6, 4, 3],
        d_linear=1200,
        dropout=0.01,
    ),
]

tokenizers = [
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
    AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2"),
]

saved_paths = [
    "./outputs/diffpool-base/model99.pt",
    "./outputs/diffpool-big/model99.pt",
    "./outputs/diffpool-linear/model99.pt",
    "./outputs/diffpool-shallow/model99.pt",
    "./outputs/diffpool-deep/model99.pt",
    "./outputs/gat/model99.pt",
    "./outputs/diffpool-large/model99.pt",
    "./outputs/diffpool-medium/model99.pt",
]

metrics = [
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
    Metrics(loss="circle"),
]

skip = [
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
]

In [None]:
# Which models do we actually use ?
models_indices = [0, 1, 2, 3, 4, 5, 6, 7]

## Precompute all the similarities

In [None]:
for split in ["val", "test"]:
    compute_embeddings(
        models,
        tokenizers,
        saved_paths,
        skip,
        split,
    )

In [None]:
for split in ["val", "test"]:
    compute_similarities(metrics, skip, split)

In [None]:
# Validation score for each model
for k in models_indices:
    val_similarities = torch.load(f"./outputs/similarities/val/similarity{k}.pt")
    labels = np.eye(val_similarities.shape[1])
    print(k, label_ranking_average_precision_score(labels, val_similarities))

## Grid search for a good intialization of the weights

In [None]:
# Generating grid
grid = np.meshgrid(*[[0, 1, 2] for _ in range(len(models_indices))])
grid = np.array([g.flatten() for g in grid]).T
grid = grid[1:]
np.random.shuffle(grid)

# Precompute to speed up
val_similarities = load_similarities("val", models_indices)
val_similarities = strange_norm(val_similarities)
labels = np.eye(val_similarities.shape[1])

grid_weights = None
best_score = 0
iter = 0
try:
    for weights in grid:
        iter += 1
        val_aggregation = np.average(val_similarities, axis=0, weights=weights)
        score = label_ranking_average_precision_score(labels, val_aggregation)
        if score > best_score:
            print(
                f"Iteration {iter} | New best score {score} with weights {weights}",
                end="\r",
            )
            best_score = score
            grid_weights = weights
except KeyboardInterrupt:
    pass

with open("./outputs/weights", "a") as f:
    f.write(
        f"{str(grid_weights)} | {label_ranking_average_precision_score(labels, mean_fuse(val_similarities, weights=grid_weights, norm=identity_norm))}\n"
    )

## Simple method to optimize the weights

In [None]:
val_similarities = load_similarities("val", models_indices)
val_similarities = strange_norm(val_similarities)
labels = np.eye(val_similarities.shape[1])


def objective(weights):
    val_aggregation = np.average(val_similarities, axis=0, weights=weights)
    score = label_ranking_average_precision_score(labels, val_aggregation)
    print(f"{score:.5f} | ", weights, end="\r")
    return -score


res = minimize(
    objective,
    np.array([grid_weights]),
    method="Powell",
    tol=1e-4,
)
weights = res.x

# Store the weights for later reference
with open("./outputs/weights", "a") as f:
    f.write(
        f"{str(weights)} | {label_ranking_average_precision_score(labels, mean_fuse(val_similarities, weights=weights, norm=identity_norm))}\n"
    )

## Results with different fusing methods

In [None]:
val_similarities = load_similarities("val", models_indices)
labels = np.eye(val_similarities.shape[1])

for fuse in [mean_fuse, prod_fuse, reciprocal_rank_fuse, condorcet_fuse]:
    for norm in [rank_norm, strange_norm, min_max_norm, sum_norm, max_norm, zmuv_norm]:
        score = label_ranking_average_precision_score(
            labels, fuse(val_similarities, weights=weights, norm=norm)
        )
        print(f"{fuse.__name__} | {norm.__name__} || {score:.6f}")

## Final test results

In [None]:
val_similarities = load_similarities("val", models_indices)
labels = np.eye(val_similarities.shape[1])
print(
    label_ranking_average_precision_score(
        labels, mean_fuse(val_similarities, weights=weights)
    )
)

In [None]:
test_similarities = load_similarities("test", models_indices)
test_aggregation = mean_fuse(test_similarities, weights=weights)

solution = pd.DataFrame(test_aggregation)
solution["ID"] = solution.index
solution = solution[["ID"] + [col for col in solution.columns if col != "ID"]]
solution.to_csv("outputs/ensemble_solution.csv", index=False)