# Complementarity

This notebook evaluates the complementarity of models by computing the pearson correlation between distances and performances.

Results are explained in Section 4.5.

In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
from utils import (
    compute_shap_similarity_pearson,
    compute_ndcg_similarity,
    compute_pred_jaccard,
    compute_aucpr,
    compute_score_correlations,
    load_nested_results,
)
import numpy as np

In [3]:
all_results = load_nested_results("results")

In [4]:
dataset_names = list(all_results.keys())
models_names = sorted(list(all_results[dataset_names[0]].keys() - {"ground_truth"}))

In [None]:
import numpy as np
from itertools import combinations
from collections import defaultdict
from sklearn.metrics import average_precision_score
from scipy.stats import rankdata


def aggreg_rank(y_true, y_preds):
    ranks = [rankdata(scores, "average") for scores in y_preds]
    mean_ranks = np.mean(ranks, axis=0)
    return average_precision_score(y_true, mean_ranks)


scores = defaultdict(lambda: defaultdict(list))
n_models_ensemblist = 3

for dataset in dataset_names:
    print(dataset)

    y_true_folds = all_results[dataset]["ground_truth"]

    # Compute similarities on the full set of models first
    dist_shap_sim = 1 - compute_shap_similarity_pearson(all_results[dataset])[0]
    dist_ndcg_sim = 1 - compute_ndcg_similarity(all_results[dataset])[0]
    dist_scores_sim = 1 - compute_score_correlations(all_results[dataset])[0]
    dist_jaccard_sim = 1 - compute_pred_jaccard(all_results[dataset])[0]

    for comb in combinations(range(len(models_names)), n_models_ensemblist):
        metric = []

        for fold in range(len(y_true_folds)):
            y_true = y_true_folds[fold]

            metric.append(
                aggreg_rank(
                    y_true,
                    np.array(
                        [
                            all_results[dataset][models_names[i]][fold]["scores"]
                            for i in comb
                        ]
                    ),
                )
            )

        dists_shap = []
        dists_ndcg = []
        dists_scores = []
        dists_jaccard = []

        # Metric aggregation
        for i in range(n_models_ensemblist):
            for j in range(i + 1, n_models_ensemblist):
                dists_shap.append(dist_shap_sim[comb[i], comb[j]])
                dists_ndcg.append(dist_ndcg_sim[comb[i], comb[j]])
                dists_scores.append(dist_scores_sim[comb[i], comb[j]])
                dists_jaccard.append(dist_jaccard_sim[comb[i], comb[j]])

        scores[dataset]["name"].append(f"{'-'.join([models_names[i] for i in comb])}")
        scores[dataset]["mcc"].append(np.nanmean(metric))
        scores[dataset]["ndcg"].append(np.mean(dists_ndcg))
        scores[dataset]["shap"].append(np.mean(dists_shap))
        scores[dataset]["scores"].append(np.mean(dists_scores))
        scores[dataset]["jaccard"].append(np.mean(dists_jaccard))

2_annthyroid
4_breastw
14_glass
15_Hepatitis
21_Lymphography
23_mammography
27_PageBlocks
29_Pima
37_Stamps
38_thyroid
39_vertebral
40_vowels
42_WBC
44_Wilt
45_wine
47_yeast


In [12]:
import pandas as pd
from scipy.stats import pearsonr

data = []

for dataset in dataset_names:
    mcc = scores[dataset]["mcc"]
    r, _ = pearsonr(mcc, scores[dataset]["shap"])
    r_w, _ = pearsonr(mcc, scores[dataset]["ndcg"])
    r_s, _ = pearsonr(mcc, scores[dataset]["scores"])
    r_J, _ = pearsonr(mcc, scores[dataset]["jaccard"])

    data.append(
        {"Dataset": dataset, "shap": r, "NDCG": r_w, "Scores": r_s, "Jaccard": r_J}
    )

df = pd.DataFrame(data).set_index("Dataset")

df["shap"] *= 100
df["NDCG"] *= 100
df["Scores"] *= 100
df["Jaccard"] *= 100

styled_df = df.style.highlight_max(axis=1, color="red").format(precision=0)

styled_df

Unnamed: 0_level_0,shap,NDCG,Scores,Jaccard
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2_annthyroid,-20,39,-1,-32
4_breastw,0,-17,-90,-59
14_glass,5,-2,-33,-37
15_Hepatitis,-28,-32,-48,-31
21_Lymphography,15,0,-29,16
23_mammography,-9,-68,-18,3
27_PageBlocks,-18,21,-17,-4
29_Pima,37,14,30,24
37_Stamps,40,15,15,36
38_thyroid,-38,19,-12,-48


In [14]:
df.mean().round()

shap       3.0
NDCG       5.0
Scores    -9.0
Jaccard   -4.0
dtype: float64