In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

from pathlib import Path
import re

from IPython.core.display import display_latex

from visu_utils import TASK_LIST_CLASSIFICATION, make_results_tables_per_group_size, load_classification_merged_mteb, \
    load_sts_merged_mteb

from autorank import autorank, plot_stats, create_report, latex_table

EXPORT_PATH_TABLE = Path("../../papers/Distillation-MI-ICLR/tables/nlp/")
EXPORT_PATH_FIG = Path("../../papers/Distillation-MI-ICLR/figures/nlp/")

# Make sure the folders exist
EXPORT_PATH_TABLE.mkdir(parents=True, exist_ok=True)
EXPORT_PATH_FIG.mkdir(parents=True, exist_ok=True)

%reload_ext autoreload
%autoreload 2


In [27]:

MTEB_BASELINES_PATH = Path("../non_sync/baselines_mteb/mteb_detailed/en_STS.csv")

RESULTS_PATHS = [Path("../non_sync/mteb_benchmarking/results/experiments_gist_nll"),
                 Path("../non_sync/mteb_benchmarking/results/experiments_gist_mse")
                 ]



df_merged = load_sts_merged_mteb(MTEB_BASELINES_PATH, RESULTS_PATHS)


# select only last training step for each model
df_merged = df_merged.reset_index()

indices = df_merged.sort_values('Training step').groupby('Model')['Training step'].idxmax()
df_merged = df_merged.loc[indices]
df_merged = df_merged.drop('Training step', axis=1)


df_melted = df_merged.melt(id_vars=['Model', 'Model Size (Million Parameters)', 'loss', 'Dataset'],
                           value_vars=df_merged.columns[4:], var_name='Task', value_name='Score')


df_melted = df_melted[~df_melted['Task'].str.contains('SummEval')]


In [28]:

df_melted

Unnamed: 0,Model,Model Size (Million Parameters),loss,Dataset,Task,Score
0,Alibaba-NLP/gte-Qwen1.5-7B-instruct,7099.0,MTEB,MTEB,BIOSSES,81.12
1,Alibaba-NLP/gte-Qwen2-1.5B-instruct,1776.0,MTEB,MTEB,BIOSSES,82.11
2,Alibaba-NLP/gte-Qwen2-7B-instruct,7613.0,MTEB,MTEB,BIOSSES,81.37
3,Alibaba-NLP/gte-base-en-v1.5,137.0,MTEB,MTEB,BIOSSES,83.65
4,Alibaba-NLP/gte-large-en-v1.5,434.0,MTEB,MTEB,BIOSSES,85.39
...,...,...,...,...,...,...
1375,thenlper/gte-small,33.0,MTEB,MTEB,STSBenchmark,85.57
1376,thtang/ALL_862873,118.0,MTEB,MTEB,STSBenchmark,44.39
1377,w601sxs/b1ade-embed,335.0,MTEB,MTEB,STSBenchmark,88.77
1378,zeta-alpha-ai/Zeta-Alpha-E5-Mistral,7111.0,MTEB,MTEB,STSBenchmark,88.28


In [29]:
# to downsample

models = ['paraphrase-multilingual', 'msmarco', 'ALL', "m-v1.5", "unsup-sim"]


In [30]:

size_ranges = [(16, 30), (30, 50), (100, 120)]

idx = pd.IndexSlice
for k, (low, high) in enumerate(size_ranges):
    latex_results = df_melted[
        (df_melted['Model Size (Million Parameters)'] >= low) & (
                df_melted['Model Size (Million Parameters)'] <= high)].copy()

    
    latex_results = latex_results.dropna()
    latex_results['Task'] = latex_results['Task'].apply(lambda x: re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', x))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" (en)", ""))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" ", " \\\\ "))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: f"\\rotatebox{{90}}{{\\shortstack{{{x}}}}}")

    make_results_tables_per_group_size(latex_results, "Score", low, high, EXPORT_PATH_TABLE,
                                       f"mteb_sts_per_size_{low}_{high}",
                                       caption=f"Performance of our distilled models compared of models of similar sizes {low}M to {high}M parameters from the MTEB Benchmark on STS tasks.")

    for model in models:
        latex_results = latex_results[~latex_results['Model'].str.contains(model)]

    make_results_tables_per_group_size(latex_results, "Score", low, high, EXPORT_PATH_TABLE,
                                       name="mteb_sts_per_size_{low}_{high}_downsampled",
                                       caption=f"Performance of our distilled models compared of models of similar sizes {low}M to {high}M parameters from the MTEB Benchmark on STS tasks.")








Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-xs,23.0,76.828859,78.89816,71.375237,78.873817,75.627732,84.593771,82.297623,86.822035,66.693038,82.616256
1,MTEB,Mihaiii/Bulbasaur,17.0,85.05,76.04,69.5,80.96,77.08,85.42,82.3,88.03,64.12,83.34
2,MTEB,Mihaiii/Ivysaur,23.0,87.32,75.63,68.58,80.54,77.63,86.16,82.82,88.57,67.39,84.25
3,MTEB,Mihaiii/Squirtle,16.0,71.78,77.34,70.15,78.42,74.76,82.0,78.27,85.85,61.2,79.21
4,MTEB,Mihaiii/Venusaur,16.0,77.59,74.68,54.35,74.24,69.99,75.74,73.65,84.81,62.56,76.72
5,MTEB,Mihaiii/Wartortle,17.0,80.78,78.24,75.19,79.33,76.56,84.7,81.44,86.61,63.43,81.79
6,MTEB,Snowflake/snowflake-arctic-embed-xs,23.0,84.05,69.26,65.9,77.87,72.82,83.49,80.58,84.49,66.28,79.18
7,MTEB,TaylorAI/bge-micro,17.0,83.42,72.39,71.9,80.93,76.6,84.92,80.72,85.61,65.9,81.32
8,MTEB,TaylorAI/bge-micro-v2,17.0,82.92,73.62,71.89,79.85,76.86,84.77,81.91,86.82,65.38,82.5
9,MTEB,TaylorAI/gte-tiny,23.0,86.63,75.85,72.58,82.39,77.98,86.54,83.31,88.28,66.68,84.38


Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-xs,23.0,76.828859,78.89816,71.375237,78.873817,75.627732,84.593771,82.297623,86.822035,66.693038,82.616256
1,MTEB,Mihaiii/Bulbasaur,17.0,85.05,76.04,69.5,80.96,77.08,85.42,82.3,88.03,64.12,83.34
2,MTEB,Mihaiii/Ivysaur,23.0,87.32,75.63,68.58,80.54,77.63,86.16,82.82,88.57,67.39,84.25
3,MTEB,Mihaiii/Squirtle,16.0,71.78,77.34,70.15,78.42,74.76,82.0,78.27,85.85,61.2,79.21
4,MTEB,Mihaiii/Venusaur,16.0,77.59,74.68,54.35,74.24,69.99,75.74,73.65,84.81,62.56,76.72
5,MTEB,Mihaiii/Wartortle,17.0,80.78,78.24,75.19,79.33,76.56,84.7,81.44,86.61,63.43,81.79
6,MTEB,Snowflake/snowflake-arctic-embed-xs,23.0,84.05,69.26,65.9,77.87,72.82,83.49,80.58,84.49,66.28,79.18
7,MTEB,TaylorAI/bge-micro,17.0,83.42,72.39,71.9,80.93,76.6,84.92,80.72,85.61,65.9,81.32
8,MTEB,TaylorAI/bge-micro-v2,17.0,82.92,73.62,71.89,79.85,76.86,84.77,81.91,86.82,65.38,82.5
9,MTEB,TaylorAI/gte-tiny,23.0,86.63,75.85,72.58,82.39,77.98,86.54,83.31,88.28,66.68,84.38


Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-s,33.0,74.840862,78.931793,70.055913,78.196301,74.090338,83.242073,81.498496,85.633134,65.860047,82.295341
1,MTEB,BAAI/bge-small-en-v1.5,33.0,83.75,79.41,77.44,82.98,81.84,87.26,84.93,87.15,65.3,85.86
2,MTEB,Snowflake/snowflake-arctic-embed-s,33.0,86.27,69.66,68.79,79.62,75.58,84.64,82.4,86.73,69.49,81.19
3,MTEB,andersonbcdefg/bge-small-4096,35.0,81.6,74.18,72.2,80.54,76.2,85.2,81.93,86.61,65.46,81.94
4,MTEB,avsolatorio/GIST-small-Embedding-v0,33.0,86.99,80.53,75.57,86.26,82.3,88.74,85.27,89.02,68.51,87.08
5,MTEB,avsolatorio/NoInstruct-small-Embedding-v0,33.0,87.2,80.31,75.76,86.08,82.28,88.9,85.18,88.73,68.54,86.98
6,MTEB,intfloat/e5-small,33.0,84.22,78.9,75.19,81.8,78.48,87.49,84.58,87.94,63.76,86.36
7,MTEB,intfloat/e5-small-v2,33.0,79.43,78.51,76.21,82.4,79.0,87.76,83.8,87.72,63.15,85.95
8,MTEB,jinaai/jina-embedding-s-en-v1,35.0,82.96,76.33,74.28,78.55,73.84,83.71,80.03,87.49,64.25,79.2
9,MTEB,jinaai/jina-embeddings-v2-small-en,33.0,80.52,76.72,73.66,83.3,79.17,87.3,83.6,88.23,63.46,84.04


Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-s,33.0,74.840862,78.931793,70.055913,78.196301,74.090338,83.242073,81.498496,85.633134,65.860047,82.295341
1,MTEB,BAAI/bge-small-en-v1.5,33.0,83.75,79.41,77.44,82.98,81.84,87.26,84.93,87.15,65.3,85.86
2,MTEB,Snowflake/snowflake-arctic-embed-s,33.0,86.27,69.66,68.79,79.62,75.58,84.64,82.4,86.73,69.49,81.19
3,MTEB,andersonbcdefg/bge-small-4096,35.0,81.6,74.18,72.2,80.54,76.2,85.2,81.93,86.61,65.46,81.94
4,MTEB,avsolatorio/GIST-small-Embedding-v0,33.0,86.99,80.53,75.57,86.26,82.3,88.74,85.27,89.02,68.51,87.08
5,MTEB,avsolatorio/NoInstruct-small-Embedding-v0,33.0,87.2,80.31,75.76,86.08,82.28,88.9,85.18,88.73,68.54,86.98
6,MTEB,intfloat/e5-small,33.0,84.22,78.9,75.19,81.8,78.48,87.49,84.58,87.94,63.76,86.36
7,MTEB,intfloat/e5-small-v2,33.0,79.43,78.51,76.21,82.4,79.0,87.76,83.8,87.72,63.15,85.95
8,MTEB,jinaai/jina-embedding-s-en-v1,35.0,82.96,76.33,74.28,78.55,73.84,83.71,80.03,87.49,64.25,79.2
9,MTEB,jinaai/jina-embeddings-v2-small-en,33.0,80.52,76.72,73.66,83.3,79.17,87.3,83.6,88.23,63.46,84.04


Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-m,109.0,82.676583,81.068982,73.496009,81.949051,78.168532,86.333299,84.653138,88.051412,66.003694,85.063858
1,MTEB,BAAI/bge-base-en-v1.5,109.0,86.94,80.3,78.03,84.19,82.27,87.96,85.48,86.42,65.95,86.42
2,MTEB,Lajavaness/bilingual-embedding-small,118.0,83.99,74.65,79.39,85.32,83.9,88.47,84.44,85.77,67.24,86.08
3,MTEB,Marqo/multilingual-e5-small,118.0,82.26,77.51,76.56,76.97,75.52,87.12,83.63,86.44,60.94,84.01
4,MTEB,Snowflake/snowflake-arctic-embed-m,109.0,86.62,69.12,66.97,79.12,68.51,79.92,78.66,81.46,65.84,74.1
5,MTEB,Snowflake/snowflake-arctic-embed-m-v1.5,109.0,86.4,69.86,61.8,82.67,68.95,75.55,77.27,74.97,69.08,69.72
6,MTEB,avsolatorio/GIST-Embedding-v0,109.0,87.95,81.29,76.16,87.85,83.39,89.43,85.35,88.59,67.81,87.32
7,MTEB,current/ml-nlp-elser.html,110.0,83.79,68.78,64.81,80.1,74.96,83.7,80.55,85.74,67.5,79.54
8,MTEB,dwzhu/e5-base-4k,112.0,81.4,78.3,75.79,83.58,79.95,88.82,84.46,87.58,64.07,86.52
9,MTEB,hkunlp/instructor-base,110.0,82.31,80.26,77.02,86.58,81.32,88.19,84.88,89.46,66.45,86.43


Task,Unnamed: 1,Model,Params. (M),\rotatebox{90}{\shortstack{BIOSSES}},\rotatebox{90}{\shortstack{SICK-R}},\rotatebox{90}{\shortstack{STS12}},\rotatebox{90}{\shortstack{STS13}},\rotatebox{90}{\shortstack{STS14}},\rotatebox{90}{\shortstack{STS15}},\rotatebox{90}{\shortstack{STS16}},\rotatebox{90}{\shortstack{STS17}},\rotatebox{90}{\shortstack{STS22}},\rotatebox{90}{\shortstack{STSBenchmark}}
0,MSE,MSE/GIST/Student-m,109.0,82.676583,81.068982,73.496009,81.949051,78.168532,86.333299,84.653138,88.051412,66.003694,85.063858
1,MTEB,BAAI/bge-base-en-v1.5,109.0,86.94,80.3,78.03,84.19,82.27,87.96,85.48,86.42,65.95,86.42
2,MTEB,Lajavaness/bilingual-embedding-small,118.0,83.99,74.65,79.39,85.32,83.9,88.47,84.44,85.77,67.24,86.08
3,MTEB,Marqo/multilingual-e5-small,118.0,82.26,77.51,76.56,76.97,75.52,87.12,83.63,86.44,60.94,84.01
4,MTEB,Snowflake/snowflake-arctic-embed-m,109.0,86.62,69.12,66.97,79.12,68.51,79.92,78.66,81.46,65.84,74.1
5,MTEB,avsolatorio/GIST-Embedding-v0,109.0,87.95,81.29,76.16,87.85,83.39,89.43,85.35,88.59,67.81,87.32
6,MTEB,current/ml-nlp-elser.html,110.0,83.79,68.78,64.81,80.1,74.96,83.7,80.55,85.74,67.5,79.54
7,MTEB,dwzhu/e5-base-4k,112.0,81.4,78.3,75.79,83.58,79.95,88.82,84.46,87.58,64.07,86.52
8,MTEB,hkunlp/instructor-base,110.0,82.31,80.26,77.02,86.58,81.32,88.19,84.88,89.46,66.45,86.43
9,MTEB,huggingface.co/bert-base-uncased,110.0,54.7,58.65,30.87,59.89,47.73,60.29,63.73,64.1,56.37,47.29
