## Creating data for main results table

In [1]:
from __future__ import annotations

import pandas as pd

import mteb

mdl_names = [
    "sentence-transformers/all-MiniLM-L6-v2",
    "sentence-transformers/all-MiniLM-L12-v2",
    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
    "sentence-transformers/all-mpnet-base-v2",
    "sentence-transformers/LaBSE",
    "intfloat/multilingual-e5-large-instruct",
    "intfloat/e5-mistral-7b-instruct",
    "GritLM/GritLM-7B",
    "intfloat/multilingual-e5-small",
    "intfloat/multilingual-e5-base",
    "intfloat/multilingual-e5-large",
]
model_metas = [mteb.get_model_meta(name) for name in mdl_names]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def add_aggregate_columns(results):
    task_names = results.columns[2:]

    borda = results[task_names].rank(ascending=True, method="min").sum(axis=1)
    results["Borda Count"] = borda
    results = results.sort_values("Borda Count", ascending=False)
    # borda str: 1 ({borda count}) 2 ({borda count}) 3 ({borda count}) ...
    results["Borda str"] = [
        f"{i+1} ({int(borda_count)})"
        for i, borda_count in enumerate(results["Borda Count"].to_list())
    ]

    # add mean across tasks
    results["Mean"] = results[task_names].mean(axis=1)

    # add mean pr. task type
    task_types = [
        "BitextMining",
        "PairClassification",
        "Classification",
        "STS",
        "Retrieval",
        "MultilabelClassification",
        "Clustering",
        "Reranking",
    ]

    tasks = [mteb.get_task(name) for name in task_names]
    tasktype_to_tasks = {
        task_type: [t for t in tasks if t.metadata.type == task_type]
        for task_type in task_types
    }

    for task_type, tasks in tasktype_to_tasks.items():
        task_names = [t.metadata.name for t in tasks]
        results[f"Mean {task_type}"] = results[task_names].mean(axis=1)

    # add mean pr. task type
    cols = [f"Mean {task_type}" for task_type in task_types]
    results["mean pr. task type"] = results[cols].mean(axis=1)
    return results

## Indic


In [3]:
mult_tasks = mteb.get_benchmark("MTEB(Indic)").tasks

# load task results for the specified models from mteb/results repository
mteb_results = mteb.load_results(
    models=model_metas,
    tasks=mult_tasks,
    download_latest=False,
)

mteb_results = mteb_results.join_revisions().filter_models()

# manual check that everything is there
# pd.DataFrame(mteb_results.get_scores()).to_csv("tmp.csv")

results = pd.DataFrame(mteb_results.get_scores())
results = add_aggregate_columns(results=results)


# create latex table
# column order
cols = [
    "model",
    "Borda str",
    "Mean",
    "mean pr. task type",
    "Mean BitextMining",
    "Mean PairClassification",
    "Mean Classification",
    "Mean STS",
    "Mean Retrieval",
    "Mean MultilabelClassification",
    "Mean Clustering",
    "Mean Reranking",
]

latex_df = results[cols]



In [4]:
results

Unnamed: 0,model,revision,BelebeleRetrieval,BengaliSentimentAnalysis,GujaratiNewsClassification,HindiDiscourseClassification,IN22ConvBitextMining,IN22GenBitextMining,MTOPIntentClassification,MalayalamNewsClassification,...,Mean,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking,mean pr. task type
4,intfloat/multilingual-e5-large-instruct,baa7be480a7de1539afce709c8f13f833a510e0a,0.737546,0.839849,0.87519,0.352344,0.718735,0.888754,0.62953,0.893651,...,0.710544,0.803745,0.763143,0.670092,,0.848628,,0.516713,0.874621,0.746157
3,intfloat/multilingual-e5-large,4dc6d853a804b9c8886ede6dda8a073b7dc08a81,0.681993,0.830707,0.767375,0.387402,0.677846,0.876967,0.591989,0.759921,...,0.675385,0.777406,0.750576,0.646592,,0.826046,,0.256027,0.859706,0.686059
2,intfloat/multilingual-e5-base,d13f1b27baf31030b7fd040960d60d909913633f,0.603717,0.796431,0.74909,0.390381,0.631307,0.8529,0.540492,0.726825,...,0.658061,0.742104,0.727958,0.637539,,0.778423,,0.246079,0.837615,0.661619
5,intfloat/multilingual-e5-small,e4ce9877abf3edfe10b0d82785e83bdcb973e22e,0.581935,0.834304,0.74393,0.393359,0.627392,0.846632,0.521185,0.722302,...,0.659812,0.737012,0.737951,0.637822,,0.768173,,0.290541,0.843696,0.669199
0,GritLM/GritLM-7B,13f00a0e36500c80ce12870ea513846a066004af,0.700637,0.721005,0.698558,0.370898,0.421376,0.746675,0.636601,0.562778,...,0.619388,0.584026,0.678382,0.60044,,0.794968,,0.279783,0.846951,0.630758
1,intfloat/e5-mistral-7b-instruct,07163b72af1488142a360786df853f237b1a3ca1,0.662894,0.720748,0.73088,0.320068,0.443028,0.737999,0.592321,0.580238,...,0.619724,0.590513,0.729511,0.595639,,0.772662,,0.327025,0.844201,0.643259
11,sentence-transformers/paraphrase-multilingual-...,79f2382ceacceacdf38563d7c5d16b9ff8d725d6,0.361017,0.748826,0.819347,0.386914,0.335884,0.548024,0.616998,0.71254,...,0.597874,0.441954,0.820361,0.619433,,0.579103,,0.320617,0.743323,0.587465
6,sentence-transformers/LaBSE,e34fab64a3011d2176c99545a93d5cbddc9a91b7,0.475155,0.804154,0.763581,0.383984,0.634593,0.846688,0.62864,0.730635,...,0.623339,0.74064,0.645834,0.619065,,0.643348,,0.211052,0.789805,0.608291
10,sentence-transformers/paraphrase-multilingual-...,bf3bf13ab40c3157080a7ab344c831b9ad18b5eb,0.193951,0.606135,0.768741,0.374316,0.118672,0.187096,0.591969,0.628968,...,0.512455,0.152884,0.778495,0.576454,,0.48779,,0.166754,0.592587,0.459161
9,sentence-transformers/all-mpnet-base-v2,84f2bcc00d77236f9e89c8a360a00fb1139bf47d,0.096276,0.541883,0.441047,0.343164,0.021104,0.053531,0.181504,0.417857,...,0.355324,0.037317,0.526345,0.452246,,0.128538,,0.040126,0.426018,0.268432


In [5]:
latex_df

Unnamed: 0,model,Borda str,Mean,mean pr. task type,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking
4,intfloat/multilingual-e5-large-instruct,1 (197),0.710544,0.746157,0.803745,0.763143,0.670092,,0.848628,,0.516713,0.874621
3,intfloat/multilingual-e5-large,2 (178),0.675385,0.686059,0.777406,0.750576,0.646592,,0.826046,,0.256027,0.859706
2,intfloat/multilingual-e5-base,3 (164),0.658061,0.661619,0.742104,0.727958,0.637539,,0.778423,,0.246079,0.837615
5,intfloat/multilingual-e5-small,4 (156),0.659812,0.669199,0.737012,0.737951,0.637822,,0.768173,,0.290541,0.843696
0,GritLM/GritLM-7B,5 (145),0.619388,0.630758,0.584026,0.678382,0.60044,,0.794968,,0.279783,0.846951
1,intfloat/e5-mistral-7b-instruct,6 (139),0.619724,0.643259,0.590513,0.729511,0.595639,,0.772662,,0.327025,0.844201
11,sentence-transformers/paraphrase-multilingual-...,7 (130),0.597874,0.587465,0.441954,0.820361,0.619433,,0.579103,,0.320617,0.743323
6,sentence-transformers/LaBSE,8 (128),0.623339,0.608291,0.74064,0.645834,0.619065,,0.643348,,0.211052,0.789805
10,sentence-transformers/paraphrase-multilingual-...,9 (94),0.512455,0.459161,0.152884,0.778495,0.576454,,0.48779,,0.166754,0.592587
9,sentence-transformers/all-mpnet-base-v2,10 (65),0.355324,0.268432,0.037317,0.526345,0.452246,,0.128538,,0.040126,0.426018


In [6]:
latex_df.to_latex(index=True, float_format="%.1f")

'\\begin{tabular}{lllrrrrrrrrrr}\n\\toprule\n & model & Borda str & Mean & mean pr. task type & Mean BitextMining & Mean PairClassification & Mean Classification & Mean STS & Mean Retrieval & Mean MultilabelClassification & Mean Clustering & Mean Reranking \\\\\n\\midrule\n4 & intfloat/multilingual-e5-large-instruct & 1 (197) & 0.7 & 0.7 & 0.8 & 0.8 & 0.7 & NaN & 0.8 & NaN & 0.5 & 0.9 \\\\\n3 & intfloat/multilingual-e5-large & 2 (178) & 0.7 & 0.7 & 0.8 & 0.8 & 0.6 & NaN & 0.8 & NaN & 0.3 & 0.9 \\\\\n2 & intfloat/multilingual-e5-base & 3 (164) & 0.7 & 0.7 & 0.7 & 0.7 & 0.6 & NaN & 0.8 & NaN & 0.2 & 0.8 \\\\\n5 & intfloat/multilingual-e5-small & 4 (156) & 0.7 & 0.7 & 0.7 & 0.7 & 0.6 & NaN & 0.8 & NaN & 0.3 & 0.8 \\\\\n0 & GritLM/GritLM-7B & 5 (145) & 0.6 & 0.6 & 0.6 & 0.7 & 0.6 & NaN & 0.8 & NaN & 0.3 & 0.8 \\\\\n1 & intfloat/e5-mistral-7b-instruct & 6 (139) & 0.6 & 0.6 & 0.6 & 0.7 & 0.6 & NaN & 0.8 & NaN & 0.3 & 0.8 \\\\\n11 & sentence-transformers/paraphrase-multilingual-mpnet-base-v2 

## Europe

In [7]:
mult_tasks = mteb.get_benchmark("MTEB(Europe)").tasks

# load task results for the specified models from mteb/results repository
mteb_results = mteb.load_results(
    models=model_metas,
    tasks=mult_tasks,
    download_latest=False,
)

mteb_results = mteb_results.join_revisions().filter_models()

# manual check that everything is there
pd.DataFrame(mteb_results.get_scores()).to_csv("tmp.csv")

results = pd.DataFrame(mteb_results.get_scores())
results = add_aggregate_columns(results=results)


# create latex table
# column order
cols = [
    "model",
    "Borda str",
    "Mean",
    "mean pr. task type",
    "Mean BitextMining",
    "Mean PairClassification",
    "Mean Classification",
    "Mean STS",
    "Mean Retrieval",
    "Mean MultilabelClassification",
    "Mean Clustering",
    "Mean Reranking",
]

latex_df = results[cols]



In [8]:
results

Unnamed: 0,model,revision,AlloProfClusteringS2S.v2,AlloprofReranking,AlloprofRetrieval,AmazonCounterfactualClassification,ArguAna,BUCC.v2,BelebeleRetrieval,BibleNLPBitextMining,...,Mean,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking,mean pr. task type
0,GritLM/GritLM-7B,13f00a0e36500c80ce12870ea513846a066004af,0.564118,0.779262,0.55422,0.784528,0.63171,0.995024,0.913931,0.973377,...,0.629708,0.904207,0.899394,0.647369,0.760502,0.571053,0.175512,0.452811,0.602696,0.626693
4,intfloat/multilingual-e5-large-instruct,baa7be480a7de1539afce709c8f13f833a510e0a,0.564657,0.746777,0.52118,0.676198,0.58476,0.994738,0.922401,0.979167,...,0.621929,0.903838,0.899859,0.632413,0.774287,0.548026,0.172659,0.468956,0.584201,0.62303
1,intfloat/e5-mistral-7b-instruct,07163b72af1488142a360786df853f237b1a3ca1,0.57112,0.783177,0.54619,0.739015,0.61653,0.993846,0.883939,0.967313,...,0.617291,0.895802,0.911542,0.629467,0.764813,0.536441,0.154645,0.464733,0.598153,0.61945
3,intfloat/multilingual-e5-large,4dc6d853a804b9c8886ede6dda8a073b7dc08a81,0.351508,0.694429,0.39341,0.751167,0.54357,0.990225,0.927284,0.945715,...,0.584915,0.844564,0.887535,0.603851,0.757591,0.508144,0.1498,0.382359,0.559106,0.586619
2,intfloat/multilingual-e5-base,d13f1b27baf31030b7fd040960d60d909913633f,0.341132,0.658972,0.34447,0.750981,0.44206,0.986999,0.876499,0.942837,...,0.571867,0.841107,0.873526,0.578541,0.736695,0.502016,0.148623,0.381606,0.538545,0.575082
11,sentence-transformers/paraphrase-multilingual-...,79f2382ceacceacdf38563d7c5d16b9ff8d725d6,0.418063,0.672043,0.30799,0.739837,0.48908,0.983302,0.797387,0.952051,...,0.544105,0.794687,0.907255,0.565993,0.742535,0.411603,0.068978,0.357832,0.523367,0.546531
5,intfloat/multilingual-e5-small,e4ce9877abf3edfe10b0d82785e83bdcb973e22e,0.353933,0.6441,0.2738,0.717466,0.39088,0.96353,0.825924,0.85072,...,0.550381,0.809488,0.863741,0.561089,0.716361,0.460723,0.139674,0.364981,0.541089,0.557143
6,sentence-transformers/LaBSE,e34fab64a3011d2176c99545a93d5cbddc9a91b7,0.302089,0.553748,0.19775,0.744855,0.34178,0.991891,0.726297,0.97471,...,0.518437,0.887793,0.851802,0.551009,0.656838,0.343517,0.162981,0.342531,0.486601,0.535384
10,sentence-transformers/paraphrase-multilingual-...,bf3bf13ab40c3157080a7ab344c831b9ad18b5eb,0.404512,0.624244,0.26634,0.697727,0.44878,0.971674,0.745612,0.936695,...,0.517319,0.76989,0.889251,0.526784,0.725364,0.375988,0.05688,0.344432,0.501981,0.523821
9,sentence-transformers/all-mpnet-base-v2,84f2bcc00d77236f9e89c8a360a00fb1139bf47d,0.352152,0.696301,0.3427,0.621937,0.46521,0.263576,0.39288,0.065886,...,0.446874,0.298075,0.805196,0.492492,0.638836,0.373078,0.108721,0.361901,0.496085,0.446798


In [9]:
latex_df

Unnamed: 0,model,Borda str,Mean,mean pr. task type,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking
0,GritLM/GritLM-7B,1 (757),0.629708,0.626693,0.904207,0.899394,0.647369,0.760502,0.571053,0.175512,0.452811,0.602696
4,intfloat/multilingual-e5-large-instruct,2 (732),0.621929,0.62303,0.903838,0.899859,0.632413,0.774287,0.548026,0.172659,0.468956,0.584201
1,intfloat/e5-mistral-7b-instruct,3 (725),0.617291,0.61945,0.895802,0.911542,0.629467,0.764813,0.536441,0.154645,0.464733,0.598153
3,intfloat/multilingual-e5-large,4 (586),0.584915,0.586619,0.844564,0.887535,0.603851,0.757591,0.508144,0.1498,0.382359,0.559106
2,intfloat/multilingual-e5-base,5 (499),0.571867,0.575082,0.841107,0.873526,0.578541,0.736695,0.502016,0.148623,0.381606,0.538545
11,sentence-transformers/paraphrase-multilingual-...,6 (463),0.544105,0.546531,0.794687,0.907255,0.565993,0.742535,0.411603,0.068978,0.357832,0.523367
5,intfloat/multilingual-e5-small,7 (399),0.550381,0.557143,0.809488,0.863741,0.561089,0.716361,0.460723,0.139674,0.364981,0.541089
6,sentence-transformers/LaBSE,8 (358),0.518437,0.535384,0.887793,0.851802,0.551009,0.656838,0.343517,0.162981,0.342531,0.486601
10,sentence-transformers/paraphrase-multilingual-...,9 (328),0.517319,0.523821,0.76989,0.889251,0.526784,0.725364,0.375988,0.05688,0.344432,0.501981
9,sentence-transformers/all-mpnet-base-v2,10 (310),0.446874,0.446798,0.298075,0.805196,0.492492,0.638836,0.373078,0.108721,0.361901,0.496085


In [10]:
latex_df.to_latex(index=True, float_format="%.1f")

'\\begin{tabular}{lllrrrrrrrrrr}\n\\toprule\n & model & Borda str & Mean & mean pr. task type & Mean BitextMining & Mean PairClassification & Mean Classification & Mean STS & Mean Retrieval & Mean MultilabelClassification & Mean Clustering & Mean Reranking \\\\\n\\midrule\n0 & GritLM/GritLM-7B & 1 (757) & 0.6 & 0.6 & 0.9 & 0.9 & 0.6 & 0.8 & 0.6 & 0.2 & 0.5 & 0.6 \\\\\n4 & intfloat/multilingual-e5-large-instruct & 2 (732) & 0.6 & 0.6 & 0.9 & 0.9 & 0.6 & 0.8 & 0.5 & 0.2 & 0.5 & 0.6 \\\\\n1 & intfloat/e5-mistral-7b-instruct & 3 (725) & 0.6 & 0.6 & 0.9 & 0.9 & 0.6 & 0.8 & 0.5 & 0.2 & 0.5 & 0.6 \\\\\n3 & intfloat/multilingual-e5-large & 4 (586) & 0.6 & 0.6 & 0.8 & 0.9 & 0.6 & 0.8 & 0.5 & 0.1 & 0.4 & 0.6 \\\\\n2 & intfloat/multilingual-e5-base & 5 (499) & 0.6 & 0.6 & 0.8 & 0.9 & 0.6 & 0.7 & 0.5 & 0.1 & 0.4 & 0.5 \\\\\n11 & sentence-transformers/paraphrase-multilingual-mpnet-base-v2 & 6 (463) & 0.5 & 0.5 & 0.8 & 0.9 & 0.6 & 0.7 & 0.4 & 0.1 & 0.4 & 0.5 \\\\\n5 & intfloat/multilingual-e5-small 

## Multilingual

In [11]:
mult_tasks = mteb.get_benchmark("MTEB(Multilingual)").tasks

# load task results for the specified models from mteb/results repository
mteb_results = mteb.load_results(
    models=model_metas,
    tasks=mult_tasks,
    download_latest=False,
)

mteb_results = mteb_results.join_revisions().filter_models()

# manual check that everything is there
pd.DataFrame(mteb_results.get_scores()).to_csv("tmp.csv")

results = pd.DataFrame(mteb_results.get_scores())
results = add_aggregate_columns(results=results)


# create latex table
# column order
cols = [
    "model",
    "Borda str",
    "Mean",
    "mean pr. task type",
    "Mean BitextMining",
    "Mean PairClassification",
    "Mean Classification",
    "Mean STS",
    "Mean Retrieval",
    "Mean MultilabelClassification",
    "Mean Clustering",
    "Mean Reranking",
]

latex_df = results[cols]

In [12]:
results

Unnamed: 0,model,revision,AILAStatutes,AfriSentiClassification,AlloProfClusteringS2S.v2,AlloprofReranking,AmazonCounterfactualClassification,ArXivHierarchicalClusteringP2P,ArXivHierarchicalClusteringS2S,ArguAna,...,Mean,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking,mean pr. task type
4,intfloat/multilingual-e5-large-instruct,baa7be480a7de1539afce709c8f13f833a510e0a,0.29659,0.453874,0.564657,0.746777,0.686064,0.62535,0.612841,0.58476,...,0.632272,0.801265,0.808636,0.649421,0.768147,0.571167,0.229135,0.51538,0.626133,0.62116
0,GritLM/GritLM-7B,13f00a0e36500c80ce12870ea513846a066004af,0.418,0.450786,0.564118,0.779262,0.792965,0.5976,0.622832,0.63171,...,0.609309,0.705317,0.799444,0.618302,0.73328,0.583067,0.227738,0.504825,0.637788,0.60122
1,intfloat/e5-mistral-7b-instruct,07163b72af1488142a360786df853f237b1a3ca1,0.34535,0.444763,0.57112,0.783177,0.735558,0.652837,0.612781,0.61653,...,0.602797,0.705799,0.811222,0.603143,0.740216,0.557501,0.221968,0.513901,0.638192,0.598993
3,intfloat/multilingual-e5-large,4dc6d853a804b9c8886ede6dda8a073b7dc08a81,0.20842,0.455005,0.351508,0.694429,0.761635,0.555721,0.562122,0.54357,...,0.585706,0.716662,0.790284,0.59917,0.734884,0.541112,0.213024,0.429238,0.628405,0.581597
2,intfloat/multilingual-e5-base,d13f1b27baf31030b7fd040960d60d909913633f,0.20371,0.438023,0.341132,0.658972,0.74334,0.566831,0.561151,0.44206,...,0.57013,0.694379,0.771544,0.582057,0.714442,0.527218,0.201621,0.426745,0.601764,0.564971
11,sentence-transformers/paraphrase-multilingual-...,79f2382ceacceacdf38563d7c5d16b9ff8d725d6,0.22236,0.424455,0.418063,0.672043,0.727665,0.553428,0.551603,0.48908,...,0.520053,0.520629,0.811544,0.550644,0.69661,0.397578,0.16398,0.410807,0.533747,0.510692
5,intfloat/multilingual-e5-small,e4ce9877abf3edfe10b0d82785e83bdcb973e22e,0.19011,0.423581,0.353933,0.6441,0.691637,0.542762,0.541987,0.39088,...,0.554567,0.674729,0.763291,0.565009,0.703608,0.49345,0.190964,0.417354,0.60391,0.55154
6,sentence-transformers/LaBSE,e34fab64a3011d2176c99545a93d5cbddc9a91b7,0.16717,0.431707,0.302089,0.553748,0.749879,0.534427,0.499861,0.34178,...,0.521001,0.76351,0.759691,0.546008,0.653498,0.331691,0.201221,0.391592,0.501976,0.518648
10,sentence-transformers/paraphrase-multilingual-...,bf3bf13ab40c3157080a7ab344c831b9ad18b5eb,0.20525,0.376727,0.404512,0.624244,0.680756,0.536179,0.522457,0.44878,...,0.487815,0.445634,0.789932,0.516569,0.66582,0.36615,0.149303,0.393374,0.509724,0.479563
9,sentence-transformers/all-mpnet-base-v2,84f2bcc00d77236f9e89c8a360a00fb1139bf47d,0.21275,0.372677,0.352152,0.696301,0.618463,0.614734,0.564593,0.46521,...,0.4247,0.211613,0.708935,0.469855,0.575997,0.328086,0.162805,0.407659,0.422344,0.410912


In [13]:
latex_df

Unnamed: 0,model,Borda str,Mean,mean pr. task type,Mean BitextMining,Mean PairClassification,Mean Classification,Mean STS,Mean Retrieval,Mean MultilabelClassification,Mean Clustering,Mean Reranking
4,intfloat/multilingual-e5-large-instruct,1 (1375),0.632272,0.62116,0.801265,0.808636,0.649421,0.768147,0.571167,0.229135,0.51538,0.626133
0,GritLM/GritLM-7B,2 (1258),0.609309,0.60122,0.705317,0.799444,0.618302,0.73328,0.583067,0.227738,0.504825,0.637788
1,intfloat/e5-mistral-7b-instruct,3 (1233),0.602797,0.598993,0.705799,0.811222,0.603143,0.740216,0.557501,0.221968,0.513901,0.638192
3,intfloat/multilingual-e5-large,4 (1109),0.585706,0.581597,0.716662,0.790284,0.59917,0.734884,0.541112,0.213024,0.429238,0.628405
2,intfloat/multilingual-e5-base,5 (944),0.57013,0.564971,0.694379,0.771544,0.582057,0.714442,0.527218,0.201621,0.426745,0.601764
11,sentence-transformers/paraphrase-multilingual-...,6 (830),0.520053,0.510692,0.520629,0.811544,0.550644,0.69661,0.397578,0.16398,0.410807,0.533747
5,intfloat/multilingual-e5-small,7 (784),0.554567,0.55154,0.674729,0.763291,0.565009,0.703608,0.49345,0.190964,0.417354,0.60391
6,sentence-transformers/LaBSE,8 (719),0.521001,0.518648,0.76351,0.759691,0.546008,0.653498,0.331691,0.201221,0.391592,0.501976
10,sentence-transformers/paraphrase-multilingual-...,9 (603),0.487815,0.479563,0.445634,0.789932,0.516569,0.66582,0.36615,0.149303,0.393374,0.509724
9,sentence-transformers/all-mpnet-base-v2,10 (526),0.4247,0.410912,0.211613,0.708935,0.469855,0.575997,0.328086,0.162805,0.407659,0.422344


In [14]:
latex_df.to_latex(index=True, float_format="%.1f")

'\\begin{tabular}{lllrrrrrrrrrr}\n\\toprule\n & model & Borda str & Mean & mean pr. task type & Mean BitextMining & Mean PairClassification & Mean Classification & Mean STS & Mean Retrieval & Mean MultilabelClassification & Mean Clustering & Mean Reranking \\\\\n\\midrule\n4 & intfloat/multilingual-e5-large-instruct & 1 (1375) & 0.6 & 0.6 & 0.8 & 0.8 & 0.6 & 0.8 & 0.6 & 0.2 & 0.5 & 0.6 \\\\\n0 & GritLM/GritLM-7B & 2 (1258) & 0.6 & 0.6 & 0.7 & 0.8 & 0.6 & 0.7 & 0.6 & 0.2 & 0.5 & 0.6 \\\\\n1 & intfloat/e5-mistral-7b-instruct & 3 (1233) & 0.6 & 0.6 & 0.7 & 0.8 & 0.6 & 0.7 & 0.6 & 0.2 & 0.5 & 0.6 \\\\\n3 & intfloat/multilingual-e5-large & 4 (1109) & 0.6 & 0.6 & 0.7 & 0.8 & 0.6 & 0.7 & 0.5 & 0.2 & 0.4 & 0.6 \\\\\n2 & intfloat/multilingual-e5-base & 5 (944) & 0.6 & 0.6 & 0.7 & 0.8 & 0.6 & 0.7 & 0.5 & 0.2 & 0.4 & 0.6 \\\\\n11 & sentence-transformers/paraphrase-multilingual-mpnet-base-v2 & 6 (830) & 0.5 & 0.5 & 0.5 & 0.8 & 0.6 & 0.7 & 0.4 & 0.2 & 0.4 & 0.5 \\\\\n5 & intfloat/multilingual-e5-sm