In [2]:
from datasets import load_dataset, DatasetDict, Dataset
from transformers import EvalPrediction
from setfit import SetFitModel, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
import wandb
import pandas as pd
import torch

  from .autonotebook import tqdm as notebook_tqdm


### Save the path to the different datasets

In [3]:
train_en_path = "./data_sources/train/train_en.csv"
test_en_path = "./data_sources/test/test_en.csv"

train_it_path = "./data_sources/train/train_it.csv"
test_it_path = "./data_sources/test/test_it.csv"

train_es_path = "./data_sources/train/train_es.csv"
test_es_path = "./data_sources/test/test_es.csv"

### Set up W&B

In [4]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msravisconti[0m ([33msravisconti-projects[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

### Load data in DatasetDict

In [13]:
# Load CSV manually for the train split
train_df = pd.read_csv(train_it_path)
test_df = pd.read_csv(test_it_path)

# Convert back to Hugging Face Datasets
dataset_it = DatasetDict({
    "train": Dataset.from_pandas(train_df.reset_index(drop=True)),
    "test": Dataset.from_pandas(test_df)
})

### Define Metrics

In [11]:
def compute_metrics(preds, labels):
    precision_macro = precision_score(labels, preds, average="macro", zero_division=0)
    recall_macro = recall_score(labels, preds, average="macro", zero_division=0)
    f1_macro = f1_score(labels, preds, average="macro", zero_division=0)

    return {
        "precision_macro": precision_macro,
        "recall_macro": recall_macro,
        "macro_f1": f1_macro,
    }


### Train and Evaluate

In [15]:
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
model = SetFitModel.from_pretrained(model_name)

# to match labels with meaning: 0 --> "offensive", 1 --> "reappropriative"
model.labels = ["offensive", "reappropriative"]

args = TrainingArguments(
    batch_size=8,
    num_epochs=2,
    num_iterations=5,
    report_to="wandb"
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset_it["train"],
    metric=compute_metrics,
    column_mapping={"text": "text", "label": "label"}
)

wandb.init(project="multi-pride-setfit-pipeline", name="setfit-it")

trainer.train()
final_results = trainer.evaluate(dataset_it["test"])
wandb.log(final_results)
print("Final test metrics:", final_results)

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
Applying column mapping to the training dataset
Map: 100%|██████████| 868/868 [00:00<00:00, 11473.43 examples/s]


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
macro_f1,▁
precision_macro,▁
recall_macro,▁
train/embedding_loss,▆▅▅▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▃▂▁▁▁█▄▂▁▁▁▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▄▁▁▂▂▃▄▅▆█▁▂▅▆▁
train/global_step,▁▁▁▂▂▂▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇██▁▁▁▂▂▂▂▃▂▂▂▂▃
train/grad_norm,▄▃▃▁▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▂▁▁▁▁▁▆▆▂▁▁▁▁█
train/learning_rate,▁▁▁▁▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▇▇▇▇▇▇█▇▆▆▄▃▃▃▂▅█▄▃▁

0,1
macro_f1,0.87399
precision_macro,0.85735
recall_macro,0.89448
total_flos,0
train/embedding_loss,0.2801
train/epoch,0.00092
train/global_step,1
train/grad_norm,3.23317
train/learning_rate,0
train_loss,0.03405


  self.scope.user = {"email": email}


***** Running training *****
  Num unique pairs = 8680
  Batch size = 8
  Num epochs = 2


Step,Training Loss
1,0.2801
50,0.1786
100,0.1375
150,0.1115
200,0.0984
250,0.0672
300,0.0606
350,0.0437
400,0.0195
450,0.0068


Applying column mapping to the evaluation dataset
***** Running evaluation *****


Final test metrics: {'precision_macro': 0.8738624873609707, 'recall_macro': 0.9001623376623377, 'macro_f1': 0.8861024033437827}
