Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add Xstance and ensure valid dataset paths #795

Merged
merged 2 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/mmteb/points/737.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"GitHub": "malteos", "New dataset": 6}
{"GitHub": "KranthiGV", "Review PR": 2}
{"GitHub": "wissam-sib", "Review PR": 2}
{"GitHub": "Ruqyai", "Review PR": 2}
{"GitHub": "KennethEnevoldsen", "Review PR": 2, "Bug Fixes": 2}
2 changes: 1 addition & 1 deletion mteb/tasks/Classification/ara/AJGT.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class AJGT(AbsTaskClassification):
metadata = TaskMetadata(
name="AJGT",
dataset={
"path": "ajgt_twitter_ar",
"path": "komari6/ajgt_twitter_ar",
"revision": "af3f2fa5462ac461b696cb300d66e07ad366057f",
},
description="Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="RestaurantReviewSentimentClassification",
dataset={
"path": "ar_res_reviews",
"path": "hadyelsahar/ar_res_reviews",
"revision": "d51bf2435d030e0041344f576c5e8d7154828977",
},
description="Dataset of 8364 restaurant reviews from qaym.com in Arabic for sentiment analysis",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TweetSarcasmClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="TweetSarcasmClassification",
dataset={
"path": "ar_sarcasm",
"path": "iabufarha/ar_sarcasm",
"revision": "557bf94ac6177cc442f42d0b09b6e4b76e8f47c9",
},
description="Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
description="The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text.",
reference="https://huggingface.co/datasets/bn_hate_speech",
dataset={
"path": "bn_hate_speech",
"path": "rezacsedu/bn_hate_speech",
"revision": "99612296bc093f0720cac7d7cbfcb67eecf1ca2f",
},
type="Classification",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class FinancialPhrasebankClassification(AbsTaskClassification):
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral.",
reference="https://arxiv.org/abs/1307.5336",
dataset={
"path": "financial_phrasebank",
"path": "takala/financial_phrasebank",
"revision": "1484d06fe7af23030c7c977b12556108d1f67039",
"name": "sentences_allagree",
},
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/Classification/eng/NewsClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class NewsClassification(AbsTaskClassification):
name="NewsClassification",
description="Large News Classification Dataset",
dataset={
"path": "ag_news",
"path": "fancyzhx/ag_news",
"revision": "eb185aade064a813bc0b7f42de02595523103ca4",
},
reference="https://arxiv.org/abs/1509.01626",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class MovieReviewSentimentClassification(AbsTaskClassification):
metadata = TaskMetadata(
name="MovieReviewSentimentClassification",
dataset={
"path": "allocine",
"path": "tblard/allocine",
"revision": "a4654f4896408912913a62ace89614879a549287",
},
description="The Allociné dataset is a French-language dataset for sentiment analysis that contains movie reviews produced by the online community of the Allociné.fr website.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class DutchBookReviewSentimentClassification(AbsTaskClassification):
description="A Dutch book review for sentiment classification.",
reference="https://github.com/benjaminvdb/DBRD",
dataset={
"path": "dbrd",
"path": "benjaminvdb/dbrd",
"revision": "3f756ab4572e071eb53e887ab629f19fa747d39e",
},
type="Classification",
Expand Down
1 change: 1 addition & 0 deletions mteb/tasks/PairClassification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .multilingual.PawsX import *
from .multilingual.RTE3 import *
from .multilingual.XNLI import *
from .multilingual.XStance import *
from .pol.PolishPC import *
from .por.Assin2RTE import *
from .por.SickBrPC import *
Expand Down
109 changes: 109 additions & 0 deletions mteb/tasks/PairClassification/multilingual/XStance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from __future__ import annotations

from datasets import load_dataset

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks import MultilingualTask
from ....abstasks.AbsTaskPairClassification import AbsTaskPairClassification


class XStance(MultilingualTask, AbsTaskPairClassification):
metadata = TaskMetadata(
name="XStance",
dataset={
"path": "ZurichNLP/x_stance",
"revision": "810604b9ad3aafdc6144597fdaa40f21a6f5f3de",
},
description="A Multilingual Multi-Target Dataset for Stance Detection in French, German, and Italian.",
reference="https://github.com/ZurichNLP/xstance",
category="s2s",
type="PairClassification",
eval_splits=["test"],
eval_langs={
"de": ["deu-Latn"],
"fr": ["fra-Latn"],
"it": ["ita-Latn"],
},
main_score="ap",
date=("2011-01-01", "2020-12-31"),
form=["written"],
domains=["Social"],
task_subtypes=["Political classification"],
license="cc by-nc 4.0",
socioeconomic_status="medium",
annotations_creators="human-annotated",
dialect=[],
text_creation="created",
bibtex_citation="""
@inproceedings{vamvas2020xstance,
author = "Vamvas, Jannis and Sennrich, Rico",
title = "{X-Stance}: A Multilingual Multi-Target Dataset for Stance Detection",
booktitle = "Proceedings of the 5th Swiss Text Analytics Conference (SwissText) 16th Conference on Natural Language Processing (KONVENS)",
address = "Zurich, Switzerland",
year = "2020",
month = "jun",
url = "http://ceur-ws.org/Vol-2624/paper9.pdf"
}
""",
n_samples={"test": 2048},
avg_character_length={"test": 152.41}, # length of`sent1` + `sent2`
)

def load_data(self, **kwargs):
"""Load dataset from HuggingFace hub"""
if self.data_loaded:
return

max_n_samples = 2048
self.dataset = {}
path = self.metadata_dict["dataset"]["path"]
revision = self.metadata_dict["dataset"]["revision"]
raw_dataset = load_dataset(path, revision=revision)

def convert_example(example):
return {
"sent1": example["question"],
"sent2": example["comment"],
"labels": 1 if example["label"] == "FAVOR" else 0,
}

for lang in self.metadata.eval_langs:
self.dataset[lang] = {}
for split in self.metadata_dict["eval_splits"]:
# filter by language
self.dataset[lang][split] = raw_dataset[split].filter(
lambda row: row["language"] == lang
)

# reduce samples
if len(self.dataset[lang][split]) > max_n_samples:
# only de + fr are larger than 2048 samples
self.dataset[lang][split] = self.dataset[lang][split].select(
range(max_n_samples)
)

# convert examples
self.dataset[lang][split] = self.dataset[lang][split].map(
convert_example,
remove_columns=self.dataset[lang][split].column_names,
)

self.dataset_transform()
self.data_loaded = True

def dataset_transform(self):
"""Transform dataset into sentence-pair format"""
_dataset = {}

for lang in self.metadata.eval_langs:
_dataset[lang] = {}
for split in self.metadata.eval_splits:
_dataset[lang][split] = [
{
"sent1": self.dataset[lang][split]["sent1"],
"sent2": self.dataset[lang][split]["sent2"],
"labels": self.dataset[lang][split]["labels"],
}
]
self.dataset = _dataset
2 changes: 1 addition & 1 deletion mteb/tasks/PairClassification/por/Assin2RTE.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Assin2RTE(AbsTaskPairClassification):
metadata = TaskMetadata(
name="Assin2RTE",
dataset={
"path": "assin2",
"path": "nilc-nlp/assin2",
"revision": "0ff9c86779e06855536d8775ce5550550e1e5a2d",
},
description="Recognizing Textual Entailment part of the ASSIN 2, an evaluation shared task collocated with STIL 2019.",
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/Retrieval/multilingual/XQuADRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class XQuADRetrieval(MultilingualTask, AbsTaskRetrieval):
metadata = TaskMetadata(
name="XQuADRetrieval",
dataset={
"path": "xquad",
"path": "google/xquad",
"revision": "51adfef1c1287aab1d2d91b5bead9bcfb9c68583",
},
description="XQuAD is a benchmark dataset for evaluating cross-lingual question answering performance. It is repurposed retrieving relevant context for each question.",
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/STS/por/Assin2STS.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Assin2STS(AbsTaskSTS):
metadata = TaskMetadata(
name="Assin2STS",
dataset={
"path": "assin2",
"path": "nilc-nlp/assin2",
"revision": "0ff9c86779e06855536d8775ce5550550e1e5a2d",
},
description="Semantic Textual Similarity part of the ASSIN 2, an evaluation shared task collocated with STIL 2019.",
Expand Down
155 changes: 155 additions & 0 deletions results/intfloat__multilingual-e5-small/XStance.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
"dataset_revision": "810604b9ad3aafdc6144597fdaa40f21a6f5f3de",
"evaluation_time": 3.5373082160949707,
"kg_co2_emissions": null,
"mteb_version": "1.6.38",
"scores": {
"test": [
{
"cos_sim": {
"accuracy": 0.51806640625,
"accuracy_threshold": 0.8497587442398071,
"ap": 0.5038443561739071,
"f1": 0.6712195121951219,
"f1_threshold": 0.7853705883026123,
"precision": 0.5053868756121449,
"recall": 0.9990319457889641
},
"dot": {
"accuracy": 0.51806640625,
"accuracy_threshold": 0.8497587442398071,
"ap": 0.5038443561739071,
"f1": 0.6712195121951219,
"f1_threshold": 0.7853705286979675,
"precision": 0.5053868756121449,
"recall": 0.9990319457889641
},
"euclidean": {
"accuracy": 0.51806640625,
"accuracy_threshold": 0.5481629371643066,
"ap": 0.5038443561739071,
"f1": 0.6712195121951219,
"f1_threshold": 0.655178427696228,
"precision": 0.5053868756121449,
"recall": 0.9990319457889641
},
"hf_subset": "de",
"languages": [
"deu-Latn"
],
"main_score": 0.5040793387249269,
"manhattan": {
"accuracy": 0.51416015625,
"accuracy_threshold": 8.64465045928955,
"ap": 0.5040793387249269,
"f1": 0.6714332141696457,
"f1_threshold": 10.558012962341309,
"precision": 0.5053816046966731,
"recall": 1.0
},
"max": {
"accuracy": 0.51806640625,
"ap": 0.5040793387249269,
"f1": 0.6714332141696457
}
},
{
"cos_sim": {
"accuracy": 0.5712890625,
"accuracy_threshold": 0.7847779989242554,
"ap": 0.5942397553753017,
"f1": 0.7265940902021772,
"f1_threshold": 0.761985719203949,
"precision": 0.570591108939912,
"recall": 1.0
},
"dot": {
"accuracy": 0.5712890625,
"accuracy_threshold": 0.7847781181335449,
"ap": 0.5942413209080255,
"f1": 0.7265940902021772,
"f1_threshold": 0.7619856595993042,
"precision": 0.570591108939912,
"recall": 1.0
},
"euclidean": {
"accuracy": 0.5712890625,
"accuracy_threshold": 0.6560750603675842,
"ap": 0.5942397553753018,
"f1": 0.7265940902021772,
"f1_threshold": 0.6898986101150513,
"precision": 0.570591108939912,
"recall": 1.0
},
"hf_subset": "fr",
"languages": [
"fra-Latn"
],
"main_score": 0.5942413209080255,
"manhattan": {
"accuracy": 0.57177734375,
"accuracy_threshold": 9.83655834197998,
"ap": 0.5932090773271775,
"f1": 0.7265940902021772,
"f1_threshold": 10.73774242401123,
"precision": 0.570591108939912,
"recall": 1.0
},
"max": {
"accuracy": 0.57177734375,
"ap": 0.5942413209080255,
"f1": 0.7265940902021772
}
},
{
"cos_sim": {
"accuracy": 0.5408022130013831,
"accuracy_threshold": 0.7930425405502319,
"ap": 0.5182917988582176,
"f1": 0.701123595505618,
"f1_threshold": 0.7833303213119507,
"precision": 0.5397923875432526,
"recall": 1.0
},
"dot": {
"accuracy": 0.5408022130013831,
"accuracy_threshold": 0.7930425405502319,
"ap": 0.5182917988582176,
"f1": 0.701123595505618,
"f1_threshold": 0.7833303213119507,
"precision": 0.5397923875432526,
"recall": 1.0
},
"euclidean": {
"accuracy": 0.5408022130013831,
"accuracy_threshold": 0.6433612108230591,
"ap": 0.5182917988582176,
"f1": 0.701123595505618,
"f1_threshold": 0.6582847833633423,
"precision": 0.5397923875432526,
"recall": 1.0
},
"hf_subset": "it",
"languages": [
"ita-Latn"
],
"main_score": 0.5182917988582176,
"manhattan": {
"accuracy": 0.5401106500691563,
"accuracy_threshold": 10.210434913635254,
"ap": 0.5178128531012762,
"f1": 0.7008547008547008,
"f1_threshold": 10.210434913635254,
"precision": 0.5398475398475399,
"recall": 0.9987179487179487
},
"max": {
"accuracy": 0.5408022130013831,
"ap": 0.5182917988582176,
"f1": 0.701123595505618
}
}
]
},
"task_name": "XStance"
}
Loading
Loading