diff --git a/docs/mmteb/points/808.jsonl b/docs/mmteb/points/808.jsonl new file mode 100644 index 000000000..2f59f2181 --- /dev/null +++ b/docs/mmteb/points/808.jsonl @@ -0,0 +1,2 @@ +{"GitHub": "awinml", "New dataset": 8} +{"GitHub": "KennethEnevoldsen", "Review PR": 2} diff --git a/mteb/tasks/Retrieval/__init__.py b/mteb/tasks/Retrieval/__init__.py index 17d20e490..3f3a1c8b6 100644 --- a/mteb/tasks/Retrieval/__init__.py +++ b/mteb/tasks/Retrieval/__init__.py @@ -80,7 +80,11 @@ from .fra.FQuADRetrieval import * from .fra.SyntecRetrieval import * from .hun.HunSum2 import * +from .jpn.JaGovFaqsRetrieval import * from .jpn.JaQuADRetrieval import * +from .jpn.NLPJournalAbsIntroRetrieval import * +from .jpn.NLPJournalTitleAbsRetrieval import * +from .jpn.NLPJournalTitleIntroRetrieval import * from .kat.GeorgianFAQRetrieval import * from .kor.KoMiracl import * from .kor.KoStrategyQA import * diff --git a/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py b/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py new file mode 100644 index 000000000..e8a32168c --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/JaGovFaqsRetrieval.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + +_EVAL_SPLIT = "test" +_MAX_EVAL_SIZE = 2048 + + +class JaGovFaqsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="JaGovFaqsRetrieval", + description="JaGovFaqs is a dataset consisting of FAQs manully extracted from the website of Japanese bureaus. The dataset consists of 22k FAQs, where the queries (questions) and corpus (answers) have been shuffled, and the goal is to match the answer with the question.", + reference="https://github.com/sbintuitions/JMTEB", + dataset={ + "path": "sbintuitions/JMTEB", + "revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + }, + type="Retrieval", + category="s2s", + eval_splits=[_EVAL_SPLIT], + eval_langs=["jpn-Jpan"], + main_score="ndcg_at_10", + date=("2000-01-01", "2023-12-31"), + form=["written"], + domains=["Web"], + task_subtypes=[], + license="cc-by-4.0", + socioeconomic_status="high", + annotations_creators="derived", + dialect=[], + text_creation="found", + bibtex_citation="", + n_samples={_EVAL_SPLIT: _MAX_EVAL_SIZE}, + avg_character_length={_EVAL_SPLIT: 210.02}, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + query_list = datasets.load_dataset( + name="jagovfaqs_22k-query", + split=_EVAL_SPLIT, + **self.metadata_dict["dataset"], + ) + + # Limit the dataset size to make sure the task does not take too long to run, sample the dataset to 2048 queries + query_list = query_list.shuffle(seed=self.seed) + max_samples = min(_MAX_EVAL_SIZE, len(query_list)) + query_list = query_list.select(range(max_samples)) + + queries = {} + qrels = {} + for row_id, row in enumerate(query_list): + queries[str(row_id)] = row["query"] + qrels[str(row_id)] = {str(row["relevant_docs"][0]): 1} + + corpus_list = datasets.load_dataset( + name="jagovfaqs_22k-corpus", split="corpus", **self.metadata_dict["dataset"] + ) + + corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} + + self.corpus = {_EVAL_SPLIT: corpus} + self.queries = {_EVAL_SPLIT: queries} + self.relevant_docs = {_EVAL_SPLIT: qrels} + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py new file mode 100644 index 000000000..f849d6647 --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/NLPJournalAbsIntroRetrieval.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + +_EVAL_SPLIT = "test" + + +class NLPJournalAbsIntroRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NLPJournalAbsIntroRetrieval", + description="This dataset was created from the Japanese NLP Journal LaTeX Corpus. The titles, abstracts and introductions of the academic papers were shuffled. The goal is to find the corresponding introduction with the given abstract.", + reference="https://github.com/sbintuitions/JMTEB", + dataset={ + "path": "sbintuitions/JMTEB", + "revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + }, + type="Retrieval", + category="s2s", + eval_splits=[_EVAL_SPLIT], + eval_langs=["jpn-Jpan"], + main_score="ndcg_at_10", + date=("2000-01-01", "2023-12-31"), + form=["written"], + domains=["Academic"], + task_subtypes=[], + license="cc-by-4.0", + socioeconomic_status="high", + annotations_creators="derived", + dialect=[], + text_creation="found", + bibtex_citation="", + n_samples={_EVAL_SPLIT: 404}, + avg_character_length={_EVAL_SPLIT: 1246.49}, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + query_list = datasets.load_dataset( + name="nlp_journal_abs_intro-query", + split=_EVAL_SPLIT, + **self.metadata_dict["dataset"], + ) + + queries = {} + qrels = {} + for row_id, row in enumerate(query_list): + queries[str(row_id)] = row["query"] + qrels[str(row_id)] = {str(row["relevant_docs"]): 1} + + corpus_list = datasets.load_dataset( + name="nlp_journal_abs_intro-corpus", + split="corpus", + **self.metadata_dict["dataset"], + ) + + corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} + + self.corpus = {_EVAL_SPLIT: corpus} + self.queries = {_EVAL_SPLIT: queries} + self.relevant_docs = {_EVAL_SPLIT: qrels} + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py new file mode 100644 index 000000000..0c11bca6b --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/NLPJournalTitleAbsRetrieval.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + +_EVAL_SPLIT = "test" + + +class NLPJournalTitleAbsRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NLPJournalTitleAbsRetrieval", + description="This dataset was created from the Japanese NLP Journal LaTeX Corpus. The titles, abstracts and introductions of the academic papers were shuffled. The goal is to find the corresponding abstract with the given title.", + reference="https://github.com/sbintuitions/JMTEB", + dataset={ + "path": "sbintuitions/JMTEB", + "revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + }, + type="Retrieval", + category="s2s", + eval_splits=[_EVAL_SPLIT], + eval_langs=["jpn-Jpan"], + main_score="ndcg_at_10", + date=("2000-01-01", "2023-12-31"), + form=["written"], + domains=["Academic"], + task_subtypes=[], + license="cc-by-4.0", + socioeconomic_status="high", + annotations_creators="derived", + dialect=[], + text_creation="found", + bibtex_citation="", + n_samples={_EVAL_SPLIT: 404}, + avg_character_length={_EVAL_SPLIT: 234.59}, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + query_list = datasets.load_dataset( + name="nlp_journal_title_abs-query", + split=_EVAL_SPLIT, + **self.metadata_dict["dataset"], + ) + + queries = {} + qrels = {} + for row_id, row in enumerate(query_list): + queries[str(row_id)] = row["query"] + qrels[str(row_id)] = {str(row["relevant_docs"]): 1} + + corpus_list = datasets.load_dataset( + name="nlp_journal_title_abs-corpus", + split="corpus", + **self.metadata_dict["dataset"], + ) + + corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} + + self.corpus = {_EVAL_SPLIT: corpus} + self.queries = {_EVAL_SPLIT: queries} + self.relevant_docs = {_EVAL_SPLIT: qrels} + + self.data_loaded = True diff --git a/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py b/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py new file mode 100644 index 000000000..799669949 --- /dev/null +++ b/mteb/tasks/Retrieval/jpn/NLPJournalTitleIntroRetrieval.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import datasets + +from mteb.abstasks.AbsTaskRetrieval import AbsTaskRetrieval +from mteb.abstasks.TaskMetadata import TaskMetadata + +_EVAL_SPLIT = "test" + + +class NLPJournalTitleIntroRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="NLPJournalTitleIntroRetrieval", + description="This dataset was created from the Japanese NLP Journal LaTeX Corpus. The titles, abstracts and introductions of the academic papers were shuffled. The goal is to find the corresponding introduction with the given title.", + reference="https://github.com/sbintuitions/JMTEB", + dataset={ + "path": "sbintuitions/JMTEB", + "revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + }, + type="Retrieval", + category="s2s", + eval_splits=[_EVAL_SPLIT], + eval_langs=["jpn-Jpan"], + main_score="ndcg_at_10", + date=("2000-01-01", "2023-12-31"), + form=["written"], + domains=["Academic"], + task_subtypes=[], + license="cc-by-4.0", + socioeconomic_status="high", + annotations_creators="derived", + dialect=[], + text_creation="found", + bibtex_citation="", + n_samples={_EVAL_SPLIT: 404}, + avg_character_length={_EVAL_SPLIT: 1040.19}, + ) + + def load_data(self, **kwargs): + if self.data_loaded: + return + + query_list = datasets.load_dataset( + name="nlp_journal_title_intro-query", + split=_EVAL_SPLIT, + **self.metadata_dict["dataset"], + ) + + queries = {} + qrels = {} + for row_id, row in enumerate(query_list): + queries[str(row_id)] = row["query"] + qrels[str(row_id)] = {str(row["relevant_docs"]): 1} + + corpus_list = datasets.load_dataset( + name="nlp_journal_title_intro-corpus", + split="corpus", + **self.metadata_dict["dataset"], + ) + + corpus = {str(row["docid"]): {"text": row["text"]} for row in corpus_list} + + self.corpus = {_EVAL_SPLIT: corpus} + self.queries = {_EVAL_SPLIT: queries} + self.relevant_docs = {_EVAL_SPLIT: qrels} + + self.data_loaded = True diff --git a/results/intfloat__multilingual-e5-small/JaGovFaqsRetrieval.json b/results/intfloat__multilingual-e5-small/JaGovFaqsRetrieval.json new file mode 100644 index 000000000..2e4cf0830 --- /dev/null +++ b/results/intfloat__multilingual-e5-small/JaGovFaqsRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "JaGovFaqsRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 44.36, + "map_at_1": 0.50195, + "map_at_10": 0.6005, + "map_at_100": 0.60564, + "map_at_1000": 0.60589, + "map_at_20": 0.60358, + "map_at_3": 0.57829, + "map_at_5": 0.59252, + "mrr_at_1": 0.50293, + "mrr_at_10": 0.60095, + "mrr_at_100": 0.60604, + "mrr_at_1000": 0.60629, + "mrr_at_20": 0.60398, + "mrr_at_3": 0.57878, + "mrr_at_5": 0.59291, + "ndcg_at_1": 0.50195, + "ndcg_at_10": 0.64773, + "ndcg_at_100": 0.6725, + "ndcg_at_1000": 0.67931, + "ndcg_at_20": 0.65864, + "ndcg_at_3": 0.60285, + "ndcg_at_5": 0.62844, + "precision_at_1": 0.50195, + "precision_at_10": 0.07954, + "precision_at_100": 0.00911, + "precision_at_1000": 0.00096, + "precision_at_20": 0.04189, + "precision_at_3": 0.22461, + "precision_at_5": 0.14717, + "recall_at_1": 0.50195, + "recall_at_10": 0.79541, + "recall_at_100": 0.91064, + "recall_at_1000": 0.96436, + "recall_at_20": 0.83789, + "recall_at_3": 0.67383, + "recall_at_5": 0.73584 + } +} \ No newline at end of file diff --git a/results/intfloat__multilingual-e5-small/NLPJournalAbsIntroRetrieval.json b/results/intfloat__multilingual-e5-small/NLPJournalAbsIntroRetrieval.json new file mode 100644 index 000000000..cea9f0e38 --- /dev/null +++ b/results/intfloat__multilingual-e5-small/NLPJournalAbsIntroRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalAbsIntroRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 475.45, + "map_at_1": 0.7698, + "map_at_10": 0.82718, + "map_at_100": 0.8307, + "map_at_1000": 0.83074, + "map_at_20": 0.82955, + "map_at_3": 0.82054, + "map_at_5": 0.82537, + "mrr_at_1": 0.7698, + "mrr_at_10": 0.82718, + "mrr_at_100": 0.8307, + "mrr_at_1000": 0.83074, + "mrr_at_20": 0.82955, + "mrr_at_3": 0.82054, + "mrr_at_5": 0.82537, + "ndcg_at_1": 0.7698, + "ndcg_at_10": 0.84924, + "ndcg_at_100": 0.86592, + "ndcg_at_1000": 0.86693, + "ndcg_at_20": 0.85837, + "ndcg_at_3": 0.83619, + "ndcg_at_5": 0.84461, + "precision_at_1": 0.7698, + "precision_at_10": 0.09158, + "precision_at_100": 0.00993, + "precision_at_1000": 0.001, + "precision_at_20": 0.04765, + "precision_at_3": 0.29373, + "precision_at_5": 0.1802, + "recall_at_1": 0.7698, + "recall_at_10": 0.91584, + "recall_at_100": 0.99257, + "recall_at_1000": 1.0, + "recall_at_20": 0.95297, + "recall_at_3": 0.88119, + "recall_at_5": 0.90099 + } +} \ No newline at end of file diff --git a/results/intfloat__multilingual-e5-small/NLPJournalTitleAbsRetrieval.json b/results/intfloat__multilingual-e5-small/NLPJournalTitleAbsRetrieval.json new file mode 100644 index 000000000..3a873f4df --- /dev/null +++ b/results/intfloat__multilingual-e5-small/NLPJournalTitleAbsRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalTitleAbsRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 139.23, + "map_at_1": 0.91832, + "map_at_10": 0.9492, + "map_at_100": 0.94929, + "map_at_1000": 0.94931, + "map_at_20": 0.9492, + "map_at_3": 0.94513, + "map_at_5": 0.94847, + "mrr_at_1": 0.91832, + "mrr_at_10": 0.9492, + "mrr_at_100": 0.94929, + "mrr_at_1000": 0.94931, + "mrr_at_20": 0.9492, + "mrr_at_3": 0.94513, + "mrr_at_5": 0.94847, + "ndcg_at_1": 0.91832, + "ndcg_at_10": 0.96067, + "ndcg_at_100": 0.9612, + "ndcg_at_1000": 0.96153, + "ndcg_at_20": 0.96067, + "ndcg_at_3": 0.95294, + "ndcg_at_5": 0.95901, + "precision_at_1": 0.91832, + "precision_at_10": 0.0995, + "precision_at_100": 0.00998, + "precision_at_1000": 0.001, + "precision_at_20": 0.04975, + "precision_at_3": 0.32508, + "precision_at_5": 0.19802, + "recall_at_1": 0.91832, + "recall_at_10": 0.99505, + "recall_at_100": 0.99752, + "recall_at_1000": 1.0, + "recall_at_20": 0.99505, + "recall_at_3": 0.97525, + "recall_at_5": 0.9901 + } +} \ No newline at end of file diff --git a/results/intfloat__multilingual-e5-small/NLPJournalTitleIntroRetrieval.json b/results/intfloat__multilingual-e5-small/NLPJournalTitleIntroRetrieval.json new file mode 100644 index 000000000..a9dc25402 --- /dev/null +++ b/results/intfloat__multilingual-e5-small/NLPJournalTitleIntroRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalTitleIntroRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 283.97, + "map_at_1": 0.58663, + "map_at_10": 0.68612, + "map_at_100": 0.69022, + "map_at_1000": 0.69039, + "map_at_20": 0.68892, + "map_at_3": 0.66708, + "map_at_5": 0.67785, + "mrr_at_1": 0.58663, + "mrr_at_10": 0.68612, + "mrr_at_100": 0.69022, + "mrr_at_1000": 0.69039, + "mrr_at_20": 0.68892, + "mrr_at_3": 0.66708, + "mrr_at_5": 0.67785, + "ndcg_at_1": 0.58663, + "ndcg_at_10": 0.73274, + "ndcg_at_100": 0.75247, + "ndcg_at_1000": 0.75641, + "ndcg_at_20": 0.74324, + "ndcg_at_3": 0.69286, + "ndcg_at_5": 0.71225, + "precision_at_1": 0.58663, + "precision_at_10": 0.08787, + "precision_at_100": 0.0097, + "precision_at_1000": 0.001, + "precision_at_20": 0.04604, + "precision_at_3": 0.25578, + "precision_at_5": 0.16287, + "recall_at_1": 0.58663, + "recall_at_10": 0.87871, + "recall_at_100": 0.9703, + "recall_at_1000": 1.0, + "recall_at_20": 0.92079, + "recall_at_3": 0.76733, + "recall_at_5": 0.81436 + } +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/JaGovFaqsRetrieval.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/JaGovFaqsRetrieval.json new file mode 100644 index 000000000..d9da6b43d --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/JaGovFaqsRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "JaGovFaqsRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 28.16, + "map_at_1": 0.32715, + "map_at_10": 0.41868, + "map_at_100": 0.42649, + "map_at_1000": 0.42698, + "map_at_20": 0.42291, + "map_at_3": 0.39502, + "map_at_5": 0.40979, + "mrr_at_1": 0.32471, + "mrr_at_10": 0.41742, + "mrr_at_100": 0.4251, + "mrr_at_1000": 0.42559, + "mrr_at_20": 0.42152, + "mrr_at_3": 0.3938, + "mrr_at_5": 0.40842, + "ndcg_at_1": 0.32715, + "ndcg_at_10": 0.46488, + "ndcg_at_100": 0.5062, + "ndcg_at_1000": 0.52086, + "ndcg_at_20": 0.48023, + "ndcg_at_3": 0.41686, + "ndcg_at_5": 0.44365, + "precision_at_1": 0.32715, + "precision_at_10": 0.06104, + "precision_at_100": 0.00811, + "precision_at_1000": 0.00093, + "precision_at_20": 0.03354, + "precision_at_3": 0.15999, + "precision_at_5": 0.10908, + "recall_at_1": 0.32715, + "recall_at_10": 0.61035, + "recall_at_100": 0.81104, + "recall_at_1000": 0.92969, + "recall_at_20": 0.6709, + "recall_at_3": 0.47998, + "recall_at_5": 0.54541 + } +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalAbsIntroRetrieval.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalAbsIntroRetrieval.json new file mode 100644 index 000000000..03a0922e7 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalAbsIntroRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalAbsIntroRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 134.99, + "map_at_1": 0.35644, + "map_at_10": 0.42334, + "map_at_100": 0.43312, + "map_at_1000": 0.43401, + "map_at_20": 0.42828, + "map_at_3": 0.40182, + "map_at_5": 0.41456, + "mrr_at_1": 0.35644, + "mrr_at_10": 0.42334, + "mrr_at_100": 0.43312, + "mrr_at_1000": 0.43401, + "mrr_at_20": 0.42828, + "mrr_at_3": 0.40182, + "mrr_at_5": 0.41456, + "ndcg_at_1": 0.35644, + "ndcg_at_10": 0.46035, + "ndcg_at_100": 0.51376, + "ndcg_at_1000": 0.53474, + "ndcg_at_20": 0.47845, + "ndcg_at_3": 0.41566, + "ndcg_at_5": 0.43888, + "precision_at_1": 0.35644, + "precision_at_10": 0.05792, + "precision_at_100": 0.00842, + "precision_at_1000": 0.001, + "precision_at_20": 0.03255, + "precision_at_3": 0.15182, + "precision_at_5": 0.10248, + "recall_at_1": 0.35644, + "recall_at_10": 0.57921, + "recall_at_100": 0.84158, + "recall_at_1000": 1.0, + "recall_at_20": 0.65099, + "recall_at_3": 0.45545, + "recall_at_5": 0.51238 + } +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleAbsRetrieval.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleAbsRetrieval.json new file mode 100644 index 000000000..39f6be132 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleAbsRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalTitleAbsRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 50.87, + "map_at_1": 0.50248, + "map_at_10": 0.58551, + "map_at_100": 0.59386, + "map_at_1000": 0.5941, + "map_at_20": 0.59036, + "map_at_3": 0.56229, + "map_at_5": 0.57801, + "mrr_at_1": 0.50248, + "mrr_at_10": 0.58551, + "mrr_at_100": 0.59386, + "mrr_at_1000": 0.5941, + "mrr_at_20": 0.59036, + "mrr_at_3": 0.56229, + "mrr_at_5": 0.57801, + "ndcg_at_1": 0.50248, + "ndcg_at_10": 0.62694, + "ndcg_at_100": 0.66908, + "ndcg_at_1000": 0.67469, + "ndcg_at_20": 0.64451, + "ndcg_at_3": 0.58071, + "ndcg_at_5": 0.60862, + "precision_at_1": 0.50248, + "precision_at_10": 0.07574, + "precision_at_100": 0.00958, + "precision_at_1000": 0.001, + "precision_at_20": 0.04134, + "precision_at_3": 0.21122, + "precision_at_5": 0.1401, + "recall_at_1": 0.50248, + "recall_at_10": 0.75743, + "recall_at_100": 0.95792, + "recall_at_1000": 1.0, + "recall_at_20": 0.82673, + "recall_at_3": 0.63366, + "recall_at_5": 0.7005 + } +} \ No newline at end of file diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleIntroRetrieval.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleIntroRetrieval.json new file mode 100644 index 000000000..17362d717 --- /dev/null +++ b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NLPJournalTitleIntroRetrieval.json @@ -0,0 +1,43 @@ +{ + "dataset_revision": "e4af6c73182bebb41d94cb336846e5a452454ea7", + "mteb_dataset_name": "NLPJournalTitleIntroRetrieval", + "mteb_version": "1.10.15", + "test": { + "evaluation_time": 88.86, + "map_at_1": 0.17574, + "map_at_10": 0.2413, + "map_at_100": 0.2551, + "map_at_1000": 0.25625, + "map_at_20": 0.24976, + "map_at_3": 0.21988, + "map_at_5": 0.23115, + "mrr_at_1": 0.17574, + "mrr_at_10": 0.2413, + "mrr_at_100": 0.2551, + "mrr_at_1000": 0.25625, + "mrr_at_20": 0.24976, + "mrr_at_3": 0.21988, + "mrr_at_5": 0.23115, + "ndcg_at_1": 0.17574, + "ndcg_at_10": 0.27907, + "ndcg_at_100": 0.35418, + "ndcg_at_1000": 0.38361, + "ndcg_at_20": 0.31019, + "ndcg_at_3": 0.234, + "ndcg_at_5": 0.25434, + "precision_at_1": 0.17574, + "precision_at_10": 0.0401, + "precision_at_100": 0.00772, + "precision_at_1000": 0.001, + "precision_at_20": 0.02624, + "precision_at_3": 0.09158, + "precision_at_5": 0.06485, + "recall_at_1": 0.17574, + "recall_at_10": 0.40099, + "recall_at_100": 0.77228, + "recall_at_1000": 1.0, + "recall_at_20": 0.52475, + "recall_at_3": 0.27475, + "recall_at_5": 0.32426 + } +} \ No newline at end of file