In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

from pathlib import Path
import re

from IPython.core.display import display_latex

from visu_utils import TASK_LIST_CLASSIFICATION, make_results_tables_per_group_size, load_classification_merged_mteb, \
    load_clustering_merged_mteb

from autorank import autorank, plot_stats, create_report, latex_table

EXPORT_PATH_TABLE = Path("../../papers/Distillation-MI-ICLR/tables/nlp/")
EXPORT_PATH_FIG = Path("../../papers/Distillation-MI-ICLR/figures/nlp/")

# Make sure the folders exist
EXPORT_PATH_TABLE.mkdir(parents=True, exist_ok=True)
EXPORT_PATH_FIG.mkdir(parents=True, exist_ok=True)

%reload_ext autoreload
%autoreload 2


In [6]:

MTEB_BASELINES_PATH = Path("../non_sync/baselines_mteb/mteb_detailed/en_Clustering.csv")

RESULTS_PATHS = [Path("../non_sync/mteb_benchmarking/results/experiments_gist_nll"),
                 Path("../non_sync/mteb_benchmarking/results/experiments_gist_mse")
                 ]



df_merged = load_clustering_merged_mteb(MTEB_BASELINES_PATH, RESULTS_PATHS)


# select only last training step for each model
df_merged = df_merged.reset_index()

indices = df_merged.sort_values('Training step').groupby('Model')['Training step'].idxmax()
df_merged = df_merged.loc[indices]
df_merged = df_merged.drop('Training step', axis=1)

display(df_merged[df_merged['Model'].str.contains('Stu')])

df_melted = df_merged.melt(id_vars=['Model', 'Model Size (Million Parameters)', 'loss', 'Dataset'],
                           value_vars=df_merged.columns[4:], var_name='Task', value_name='Score')



Unnamed: 0,index,Model,Model Size (Million Parameters),loss,Dataset,ArxivClusteringP2P,ArxivClusteringS2S,RedditClustering,RedditClusteringP2P,StackExchangeClustering,StackExchangeClusteringP2P,TwentyNewsgroupsClustering
12,12,MSE/GIST/Student-m,109.0,MSE,GIST,46.498656,37.081099,54.471155,60.397679,62.016317,33.357345,46.0768
14,14,MSE/GIST/Student-s,33.0,MSE,GIST,43.137838,33.265613,50.768963,57.123363,55.653675,32.257206,42.788537
16,16,MSE/GIST/Student-xs,23.0,MSE,GIST,42.350938,30.884787,49.194528,55.219228,53.526287,32.65463,41.896597
2,2,NLL/GIST/Student-m,109.0,NLL,GIST,47.728607,38.721918,56.278965,61.523439,64.655865,33.843269,46.596227
6,6,NLL/GIST/Student-s,33.0,NLL,GIST,45.854248,35.214716,51.911016,60.291415,61.45794,32.254829,45.084456
10,10,NLL/GIST/Student-xs,23.0,NLL,GIST,45.215166,33.898452,52.138519,58.100985,59.904538,33.067741,44.338147


In [7]:
# to downsample

models = ['paraphrase-multilingual', 'msmarco', 'ALL', "m-v1.5", "unsup-sim"]


In [8]:

size_ranges = [(16, 30), (30, 50), (100, 120)]

idx = pd.IndexSlice
for k, (low, high) in enumerate(size_ranges):
    latex_results = df_melted[
        (df_melted['Model Size (Million Parameters)'] >= low) & (
                df_melted['Model Size (Million Parameters)'] <= high)].copy()

    latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace("Classification", ""))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', x))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" (en)", ""))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" ", " \\\\ "))
    latex_results['Task'] = latex_results['Task'].apply(lambda x: f"\\rotatebox{{90}}{{\\shortstack{{{x}}}}}")

    make_results_tables_per_group_size(latex_results, "Score", low, high, EXPORT_PATH_TABLE,
                                       f"mteb_clustering_per_size_{low}_{high}",
                                       caption=f"Performance of our distilled models compared of models of similar sizes {low}M to {high}M parameters from the MTEB Benchmark on clustering tasks.")

    for model in models:
        latex_results = latex_results[~latex_results['Model'].str.contains(model)]

    make_results_tables_per_group_size(latex_results, "Score", low, high, EXPORT_PATH_TABLE,
                                       name="mteb_clustering_per_size_{low}_{high}_downsampled",
                                       caption=f"Performance of our distilled models compared of models of similar sizes {low}M to {high}M parameters from the MTEB Benchmark on clustering tasks.")






