In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import seaborn as sns

from visu_utils import *

import re

%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
EXPORT_PATH_TABLE = Path("../../papers/Distillation-MI-ICLR/tables/nlp/")
EXPORT_PATH_FIG = Path("../../papers/Distillation-MI-ICLR/figures/nlp/")

# Make sure the folders exist
EXPORT_PATH_TABLE.mkdir(parents=True, exist_ok=True)
EXPORT_PATH_FIG.mkdir(parents=True, exist_ok=True)


MTEB_BASELINES_PATH = Path("../non_sync/baselines_mteb/mteb_detailed/en_Classification.csv")

RESULTS_PATHS = [Path("../non_sync/mteb_benchmarking/results/experiments_gist_nll"),
                 Path("../non_sync/mteb_benchmarking/results/experiments_gist_mse")
                 ]



df_merged = load_classification_merged_mteb(MTEB_BASELINES_PATH, RESULTS_PATHS)

# all but Student

df_merged = df_merged[~df_merged["Model"].str.contains('Student')]


In [3]:
base_students_models = df_merged[df_merged["Model"].str.contains('snowflake') & ~df_merged["Model"].str.contains('-l')].copy()
base_students_models = base_students_models[~base_students_models['Model'].str.contains('1.5')]
# /lustre/fsn1/projects/rech/ehz/uwf24rf/EMIR/nlp_embeddings/embeddings_gist/jamesgpt1/sf_model_e5  /lustre/fsn1/projects/rech/ehz/uwf24rf/EMIR/nlp_embeddings/embeddings_gist/WhereIsAI/UAE-Large-V1          /lustre/fsn1/projects/rech/ehz/uwf24rf/EMIR/nlp_embeddings/embeddings_gist/Salesforce/SFR-Embedding-2_R /lustre/fsn1/projects/rech/ehz/uwf24rf/EMIR/nlp_embeddings/embeddings_gist/dunzhang/stella_en_400M_v5

teachers = ["jamesgpt1/sf_model_e5", "WhereIsAI/UAE-Large-V1", "Salesforce/SFR-Embedding-2_R", "dunzhang/stella_en_400M_v5"]


teacher_models = df_merged[df_merged["Model"].isin(teachers)].copy()




teacher_models[" "] = "Teacher"
base_students_models[" "] = "Student (Base)"


In [4]:


merged = pd.concat([teacher_models, base_students_models])

merged = merged.set_index([' ', 'Model']).drop(['loss', 'Training step', 'Dataset'], axis=1)
display(merged)

df_pivot = merged.rename(
        {
            "Model Size (Million Parameters)": "Size",
        },
        axis=1)

# latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace("Classification", ""))
# latex_results['Task'] = latex_results['Task'].apply(lambda x: re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', x))
# latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" (en)", ""))
# latex_results['Task'] = latex_results['Task'].apply(lambda x: x.replace(" ", " \\\\ "))
# latex_results['Task'] = latex_results['Task'].apply(lambda x: f"\\rotatebox{{90}}{{\\shortstack{{{x}}}}}")

# average all colums but size
avg = df_pivot[[c for c in df_pivot.columns if c != "Size"]].mean(axis=1)

df_pivot["Avg."] = avg

df_pivot.index = pd.MultiIndex.from_tuples([(x[0],x[1].split("/")[-1]) for x in df_pivot.index])


# rename columns
df_pivot.columns = df_pivot.columns.str.replace("Classification", "")
df_pivot.columns = [re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', x) for x in df_pivot.columns]
df_pivot.columns = df_pivot.columns.str.replace(" (en)", "")
df_pivot.columns = df_pivot.columns.str.replace(" ", " \\\\ ")
df_pivot.columns = [f"\\rotatebox{{90}}{{\\shortstack{{{x}}}}}" for x in df_pivot.columns]
style = df_pivot.style.format("{:.1f}")
# style = style.format("{:.0f}", subset=['Size'])
style = style.format_index(escape="latex")

latex = style.to_latex(
    clines="skip-last;data",
    hrules=True,
    sparse_index=True,
    multicol_align="c",
    multirow_align="c",
    caption="Performance of the 4 teachers we used and of the base students. Experiments with single teacher distillation were performed with the stronger teacher SFR-Embedding-2\\_R.",
    label=f"tab:nlp:base_student_teacher_perfs",
    column_format="llc|" + "c" * (len(df_pivot.columns) - 2) + "|c",
)

# add resizebox
latex = latex.replace(
    "\\begin{tabular}",
    "\\resizebox{\\textwidth}{!}{\\begin{tabular}",
)
latex = latex.replace(
    "\\end{tabular}",
    "\\end{tabular}}\n",
)

with open(EXPORT_PATH_TABLE / "table_base_student_teacher_perfs.tex", "w") as f:
    f.write(latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Model Size (Million Parameters),AmazonCounterfactualClassification (en),AmazonPolarityClassification,AmazonReviewsClassification (en),Banking77Classification,EmotionClassification,ImdbClassification,MTOPDomainClassification (en),MTOPIntentClassification (en),MassiveIntentClassification (en),MassiveScenarioClassification (en),ToxicConversationsClassification,TweetSentimentExtractionClassification
Unnamed: 0_level_1,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Teacher,Salesforce/SFR-Embedding-2_R,7111.0,92.72,97.31,61.04,90.02,93.37,96.8,98.58,91.3,85.97,90.61,91.14,79.7
Teacher,dunzhang/stella_en_400M_v5,435.0,92.36,97.19,59.53,89.3,78.77,96.49,98.83,92.3,85.17,89.62,86.94,73.58
Teacher,WhereIsAI/UAE-Large-V1,335.0,75.55,92.84,48.29,87.69,51.75,92.78,94.02,76.92,76.5,79.75,71.09,59.75
Teacher,jamesgpt1/sf_model_e5,335.0,70.85,91.81,48.94,84.61,54.9,93.14,93.64,66.01,73.49,77.38,71.19,61.55
Student (Base),Snowflake/snowflake-arctic-embed-m,109.0,76.81,82.83,38.93,80.34,46.54,74.08,92.73,65.18,66.92,72.75,64.93,56.73
Student (Base),Snowflake/snowflake-arctic-embed-s,33.0,71.18,78.75,38.26,79.12,45.77,69.49,90.94,58.63,64.76,70.05,61.96,58.9
Student (Base),Snowflake/snowflake-arctic-embed-xs,23.0,65.09,70.04,35.34,76.37,41.84,62.8,90.8,58.01,63.51,71.01,64.31,56.19
