embeddings-benchmark · isaac-chung · Jun 20, 2024 · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024
diff --git a/docs/mmteb/points/958.jsonl b/docs/mmteb/points/958.jsonl
@@ -0,0 +1,2 @@
+{"GitHub": "isaac-chung", "Bug fixes": 2}
+{"GitHub": "KennethEnevoldsen", "Review PR": 2}
diff --git a/mteb/evaluation/evaluators/ClassificationEvaluator.py b/mteb/evaluation/evaluators/ClassificationEvaluator.py
@@ -91,7 +91,7 @@ def __init__(
         y_train,
         sentences_test,
         y_test,
-        task_name: str,
+        task_name: str | None = None,
         k: int = 1,
         batch_size: int = 32,
         limit: int | None = None,

diff --git a/mteb/evaluation/evaluators/RerankingEvaluator.py b/mteb/evaluation/evaluators/RerankingEvaluator.py
@@ -33,7 +33,7 @@ class RerankingEvaluator(Evaluator):
     def __init__(
         self,
         samples,
-        task_name: str | None,
+        task_name: str | None = None,
         mrr_at_k: int = 10,
         name: str = "",
         similarity_fct=cos_sim,

diff --git a/mteb/evaluation/evaluators/SummarizationEvaluator.py b/mteb/evaluation/evaluators/SummarizationEvaluator.py
@@ -19,7 +19,7 @@
 class SummarizationEvaluator(Evaluator):
     def __init__(
         self,
-        task_name: str | None,
+        task_name: str | None = None,
         human_summaries=None,
         machine_summaries=None,
         texts=None,

diff --git a/mteb/evaluation/evaluators/model_encode.py b/mteb/evaluation/evaluators/model_encode.py
@@ -14,12 +14,11 @@
 def model_encode(
     sentences: Sequence[str], *, model: Encoder, task_name: str | None, **kwargs
 ) -> np.ndarray:
-    kwargs["prompt_name"] = task_name
-    if hasattr(model, "prompts") and task_name not in model.prompts:  # type: ignore
+    if hasattr(model, "prompts") and task_name in model.prompts:  # type: ignore
         logger.info(
-            f"Prompt {task_name} not found in model prompts. Removing prompt_name argument."
+            f"Prompt {task_name} found in model prompts. Adding prompt_name argument."
         )
-        kwargs.pop("prompt_name")
+        kwargs["prompt_name"] = task_name
 
     logger.info(f"Encoding {len(sentences)} sentences.")
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -4,6 +4,7 @@
 from argparse import Namespace
 from pathlib import Path
 
+import pytest
 import yaml
 
 from mteb.cli import create_meta
@@ -18,17 +19,33 @@ def test_available_tasks():
     ), "Sample task Banking77Classification task not found in available tasks"
 
 
+run_task_fixures = [
+    (
+        "average_word_embeddings_komninos",
+        "BornholmBitextMining",
+        "21eec43590414cb8e3a6f654857abed0483ae36e",
+    ),
+    (
+        "intfloat/multilingual-e5-small",
+        "BornholmBitextMining",
+        "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
+    ),
+]
+
+
+@pytest.mark.parametrize("model_name,task_name,model_revision", run_task_fixures)
 def test_run_task(
-    model_name: str = "average_word_embeddings_komninos",
-    task_name="BornholmBitextMining",
-    model_revision="21eec43590414cb8e3a6f654857abed0483ae36e",
+    model_name: str,
+    task_name: str,
+    model_revision: str,
 ):
     command = f"mteb run -m {model_name} -t {task_name} --verbosity 3 --output_folder tests/results/test_model --model_revision {model_revision}"
     result = subprocess.run(command, shell=True, capture_output=True, text=True)
     assert result.returncode == 0, "Command failed"
 
+    model_name_as_path = model_name.replace("/", "__").replace(" ", "_")
     results_path = Path(
-        f"tests/results/test_model/average_word_embeddings_komninos/{model_revision}"
+        f"tests/results/test_model/{model_name_as_path}/{model_revision}"
     )
     assert results_path.exists(), "Output folder not created"
     assert "model_meta.json" in [

diff --git a/tests/test_mteb.py b/tests/test_mteb.py
@@ -60,15 +60,15 @@ def test_mteb_task(task: Union[str, AbsTask], model_name: str):
     "task_name",
     [
         "BornholmBitextMining",
-        "TwentyNewsgroupsClustering",
-        "TwentyNewsgroupsClustering.v2",
-        "Banking77Classification",
-        "SciDocsRR",
-        "SprintDuplicateQuestions",
-        "NFCorpus",
-        "MalteseNewsClassification",
-        "STS12",
-        "SummEval",
+        # "TwentyNewsgroupsClustering",
+        # "TwentyNewsgroupsClustering.v2",
+        # "Banking77Classification",
+        # "SciDocsRR",
+        # "SprintDuplicateQuestions",
+        # "NFCorpus",
+        # "MalteseNewsClassification",
+        # "STS12",
+        # "SummEval",
     ],
 )
 def test_mteb_with_instructions(task_name: str):
@@ -77,6 +77,9 @@ def test_mteb_with_instructions(task_name: str):
     """
 
     class EncoderWithInstructions(Encoder):
+        def __init__(self, task_name: str):
+            self.prompts = {task_name: "Dummy prompt"}
+
         def encode(self, sentences, prompt_name: str | None = None, **kwargs):
             assert prompt_name == task_name
             return np.zeros((len(sentences), 10))
@@ -91,7 +94,7 @@ def encode(self, sentences, **kwargs):
     eval = mteb.MTEB(tasks=tasks)
 
     # Test that the task_name is passed down to the encoder
-    model = EncoderWithInstructions()
+    model = EncoderWithInstructions(task_name)
     eval.run(model, output_folder="tests/results", overwrite_results=True)
     # Test that the task_name is not passed down to the encoder
     model = EncoderWithoutInstructions("average_word_embeddings_levy_dependency")