In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import networkx as nx


from netgraph import Graph, InteractiveGraph

from networkx.algorithms.community import girvan_newman, modularity_max, louvain_communities

from visu_utils import *

# white grid
sns.set_style("whitegrid")

%load_ext autoreload
%autoreload 2



In [3]:


PREFIX = "mteb_ds"
df = pd.read_csv('exported_data/normalized_13.df')

df = text_embeddings_metadata_results_preprocessing(df, models=MODELS_MAIN_EXPES)
METRIC = "I(X_1->X_2)/d_2"

table = make_table_pivot(METRIC, df)

METRICS = ['Average (56 datasets)', 'Classification Average (12 datasets)', 'Clustering Average (11 datasets)', 'Retrieval Average (15 datasets)', 'STS Average (10 datasets)', 'Reranking Average (4 datasets)']

mteb = pd.read_csv('exported_data/df_mteb_avg.csv')

import re

def extract_url_from_html_link(html):
    return re.findall(r'href=[\'"]?([^\'" >]+)', html)[0]

mteb['model'] = mteb['Model'].apply(extract_url_from_html_link).apply(lambda x: "/".join(x.split('/')[-2:]))

avg_results = table.median(axis=1).sort_values(ascending=False).to_frame().reset_index().rename(columns={0: METRIC, 'model_1': 'model'}).merge(mteb, on='model', how='left').sort_values(METRIC, ascending=False)

# make model shorter
avg_results['model'] = avg_results['model'].apply(lambda x: x.split('/')[-1])



  .pivot("model_1", "model_2", metric)


In [17]:
mteb_info = avg_results[['model', METRIC]+METRICS].set_index('model')

In [75]:

classifcation_df = pd.read_csv('exported_data/classification_many_2.csv')

classifcation_df['model'] = classifcation_df['model'].apply(lambda x: eval(x)[0])

print(len(classifcation_df['dataset'].unique()))

classifcation_df = classifcation_df.groupby('model').mean().reset_index()

pivoted_classif = classifcation_df[['model','success']].set_index('model') # .pivot_table(index='model', columns='dataset', values='success')

pivoted_classif = pivoted_classif.rename(columns={'success': 'Classification 2 (8 datasets)'})
pivoted_classif.index = pivoted_classif.index.map(lambda x: x.split('/')[-1])

8


  classifcation_df = classifcation_df.groupby('model').mean().reset_index()


In [76]:


pearson = pd.merge(mteb_info, pivoted_classif, left_index=True, right_index=True).corr(method="pearson").loc[METRIC, :].sort_values(ascending=False)
spearman = pd.merge(mteb_info, pivoted_classif, left_index=True, right_index=True).corr(method="spearman").loc[METRIC, :].sort_values(ascending=False)
kendall = pd.merge(mteb_info, pivoted_classif, left_index=True, right_index=True).corr(method="kendall").loc[METRIC, :].sort_values(ascending=False)

correlations = pd.concat([pearson, spearman, kendall], axis=1, keys=['$\\rho_p$', '$\\rho_s$', '$\\tau$']).sort_values('$\\rho_s$', ascending=False)


correlations = correlations.drop(index=METRIC)

import re
# Remove "Average" from the names
correlations.index = correlations.index.map(lambda x: re.sub(r'\sAverage', '', x))

# correlations.index = correlations.index.map(lambda x: re.sub(r'\s\(\d+ datasets\)', '', x))


In [77]:
style = correlations.style.format(precision=2)

In [78]:



path = Path(f"../../../papers/emir-embedding-comparison/tables/nlp/{PREFIX}_overall_correlation_table_{sanitize_metric_name(METRIC)}.tex")
path.parent.mkdir(parents=True, exist_ok=True)
latex = style.to_latex(clines="skip-last;data", sparse_index=True, hrules=True)

print(latex)


# save the latex
path.write_text(latex)


\begin{tabular}{lrrr}
\toprule
 & $\rho_p$ & $\rho_s$ & $\tau$ \\
\midrule
Average (56 datasets) & 0.94 & 0.90 & 0.74 \\
Retrieval (15 datasets) & 0.89 & 0.89 & 0.70 \\
Classification (12 datasets) & 0.92 & 0.88 & 0.73 \\
Clustering (11 datasets) & 0.86 & 0.85 & 0.67 \\
STS (10 datasets) & 0.92 & 0.82 & 0.62 \\
Reranking (4 datasets) & 0.84 & 0.79 & 0.64 \\
Classification 2 (8 datasets) & 0.85 & 0.77 & 0.57 \\
\bottomrule
\end{tabular}


440