In [2]:
from datasets import load_dataset
from collections import Counter

# Carica il dataset Sentiment140
dataset = load_dataset("stanfordnlp/sentiment140", trust_remote_code=True)

# Conta le occorrenze delle etichette nel dataset di training
train_dataset = dataset["train"]

# Conta le occorrenze delle etichette
label_counts = Counter(train_dataset["sentiment"])

# Stampa la distribuzione delle etichette
print("Distribuzione delle classi nel dataset di training:")
for label, count in label_counts.items():
    print(f"Classe {label}: {count} esempi")





  from .autonotebook import tqdm as notebook_tqdm


Distribuzione delle classi nel dataset di training:
Classe 0: 800000 esempi
Classe 4: 800000 esempi


In [24]:
dataset["train"][0]

{'text': "@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D",
 'labels': 0}

In [4]:
dataset.shape

{'train': (1600000, 5), 'test': (498, 5)}

In [5]:
from transformers import AutoTokenizer
import re

# Inizializza il tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Preprocessing del testo
def preprocess_text(text):
    if not isinstance(text, str):
        return ""  # Restituisci una stringa vuota per valori non validi
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # Rimuovi URL
    text = re.sub(r"@\w+", '[USER]', text)  # Rimuovi menzioni
    text = re.sub(r"#(\w+)", r"\1", text)  # Mantieni solo la parola del hashtag
    text = re.sub(r"[!?]{2,}", '!', text)  # Riduci eccessi di punteggiatura
    text = re.sub(r"\.{2,}", '.', text)  # Riduci punti ripetuti
    text = text.lower()  # Converti in minuscolo
    text = re.sub(r"[^a-zA-Z0-9\s]", '', text)  # Rimuovi simboli non alfanumerici
    return text

# Funzione di preprocessing e tokenizzazione
def preprocess_and_tokenize(batch):
    texts = [preprocess_text(text) for text in batch["text"]]
    tokens = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=128
    )
    return {
        "input_ids": tokens["input_ids"],
        "attention_mask": tokens["attention_mask"],
        "labels": batch["labels"]  # Utilizza 'labels' per le etichette
    }

# Rinomina la colonna 'sentiment' in 'labels'
dataset = dataset.rename_column("sentiment", "labels")

# Rimuovi colonne non necessarie
dataset = dataset.remove_columns(["date", "user", "query"])  # Mantieni solo 'text' e 'labels'

# Applica preprocessing e tokenizzazione
tokenized_dataset = dataset.map(preprocess_and_tokenize, batched=True)







Map: 100%|██████████| 498/498 [00:00<00:00, 6937.59 examples/s]


In [6]:
# Controlla il risultato
tokenized_dataset["train"][0]

{'text': "@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer.  You shoulda got David Carr of Third Day to do it. ;D",
 'labels': 0,
 'input_ids': [101,
  5310,
  1037,
  2008,
  2015,
  1037,
  26352,
  5017,
  2017,
  2323,
  2050,
  2288,
  2585,
  12385,
  1997,
  2353,
  2154,
  2000,
  2079,
  2009,
  1040,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,


In [7]:
# Rimuovi la colonna originale "text" dopo la tokenizzazione
tokenized_dataset = tokenized_dataset.remove_columns(["text"])

# Controlla il risultato
tokenized_dataset["train"][0]


{'labels': 0,
 'input_ids': [101,
  5310,
  1037,
  2008,
  2015,
  1037,
  26352,
  5017,
  2017,
  2323,
  2050,
  2288,
  2585,
  12385,
  1997,
  2353,
  2154,
  2000,
  2079,
  2009,
  1040,
  102,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'attention_mask': [1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0

In [8]:
split_dataset = tokenized_dataset["train"].train_test_split(test_size=0.1, seed=42)
train_dataset = split_dataset["train"]
test_dataset = split_dataset["test"]


In [9]:
unique_labels = set(train_dataset["labels"]) | set(test_dataset["labels"])
print(f"Etichette uniche nel dataset: {unique_labels}")

Etichette uniche nel dataset: {0, 4}


In [10]:
def map_labels(example):
    if example["labels"] == 0:
        example["labels"] = 0  # Negativo
    elif example["labels"] == 4:
        example["labels"] = 1  # Positivo
    return example

# Applica il mapping
train_dataset = train_dataset.map(map_labels)
test_dataset = test_dataset.map(map_labels)

Map: 100%|██████████| 1440000/1440000 [01:40<00:00, 14379.32 examples/s]
Map: 100%|██████████| 160000/160000 [00:10<00:00, 15285.95 examples/s]


In [11]:
unique_labels = set(train_dataset["labels"]) | set(test_dataset["labels"])
print(f"Etichette uniche nel dataset: {unique_labels}")

Etichette uniche nel dataset: {0, 1}


In [52]:
import time
import numpy as np
import evaluate
import torch
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

# Disabilita Triton per la compatibilità con la GTX 1060
import torch._dynamo
torch._dynamo.config.suppress_errors = True

# Shuffle dei dati e selezione di un sottoinsieme
train_dataset = train_dataset.shuffle(seed=42).select(range(min(100000, len(train_dataset))))  # Usa solo 100k esempi
test_dataset = test_dataset.shuffle(seed=42).select(range(min(2000, len(test_dataset))))  # Usa solo 2k esempi

# Carica il modello pre-addestrato
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2  # Cambia a 2 per positivo/negativo
)

# Scongela solo gli ultimi 12 layer
for name, param in model.named_parameters():
    param.requires_grad = False  # Congela tutti i parametri inizialmente
for name, param in list(model.named_parameters())[-8:]:
    param.requires_grad = True  # Sblocca gli ultimi 12 layer

# Configura i Training Arguments con ottimizzazioni
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=1e-5,
    lr_scheduler_type="linear",
    warmup_steps=1000,
    per_device_train_batch_size=64,  # Aumenta il batch size per accelerare
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,  # Riduci la frequenza di logging
    save_total_limit=2,
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    remove_unused_columns=False,
    fp16=True  # Precisione mista per accelerare il training
)

# Funzione per calcolare le metriche
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy.compute(predictions=predictions, references=labels)["accuracy"]
    prec = precision.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    rec = recall.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1_score = f1.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    return {"accuracy": acc, "precision": prec, "recall": rec, "f1": f1_score}

# Configura il Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)







Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [53]:
train_dataset

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 100000
})

In [54]:

# Avvia il training
trainer.train()


  1%|          | 50/4689 [00:31<47:36,  1.62it/s]

{'loss': 0.7533, 'grad_norm': 4.619001865386963, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.03}


  2%|▏         | 100/4689 [01:02<47:42,  1.60it/s]

{'loss': 0.7274, 'grad_norm': 4.110988616943359, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.06}


  3%|▎         | 150/4689 [01:34<47:27,  1.59it/s]

{'loss': 0.7264, 'grad_norm': 1.915428876876831, 'learning_rate': 1.5e-06, 'epoch': 0.1}


  4%|▍         | 200/4689 [02:05<46:46,  1.60it/s]

{'loss': 0.7071, 'grad_norm': 2.095407485961914, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.13}


  5%|▌         | 250/4689 [02:37<46:29,  1.59it/s]

{'loss': 0.6932, 'grad_norm': 0.9612905979156494, 'learning_rate': 2.5e-06, 'epoch': 0.16}


  6%|▋         | 300/4689 [03:08<45:42,  1.60it/s]

{'loss': 0.6859, 'grad_norm': 1.199945330619812, 'learning_rate': 3e-06, 'epoch': 0.19}


  7%|▋         | 350/4689 [03:39<44:15,  1.63it/s]

{'loss': 0.6845, 'grad_norm': 1.5826585292816162, 'learning_rate': 3.5e-06, 'epoch': 0.22}


  9%|▊         | 400/4689 [04:11<46:36,  1.53it/s]

{'loss': 0.6764, 'grad_norm': 1.0959079265594482, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.26}


 10%|▉         | 450/4689 [04:42<45:13,  1.56it/s]

{'loss': 0.6663, 'grad_norm': 2.1891908645629883, 'learning_rate': 4.5e-06, 'epoch': 0.29}


 11%|█         | 500/4689 [05:16<48:00,  1.45it/s]

{'loss': 0.6587, 'grad_norm': 1.149064302444458, 'learning_rate': 5e-06, 'epoch': 0.32}


                                                  
 11%|█         | 500/4689 [05:36<48:00,  1.45it/s]

{'eval_loss': 0.6483832001686096, 'eval_accuracy': 0.6795, 'eval_precision': 0.6813446291560102, 'eval_recall': 0.6795, 'eval_f1': 0.6777337427176823, 'eval_runtime': 19.8587, 'eval_samples_per_second': 100.712, 'eval_steps_per_second': 1.611, 'epoch': 0.32}


 12%|█▏        | 550/4689 [06:10<45:11,  1.53it/s]  

{'loss': 0.6531, 'grad_norm': 1.1903283596038818, 'learning_rate': 5.500000000000001e-06, 'epoch': 0.35}


 13%|█▎        | 600/4689 [06:43<44:52,  1.52it/s]

{'loss': 0.6395, 'grad_norm': 0.5903338193893433, 'learning_rate': 6e-06, 'epoch': 0.38}


 14%|█▍        | 650/4689 [07:16<44:55,  1.50it/s]

{'loss': 0.6342, 'grad_norm': 0.7068638205528259, 'learning_rate': 6.5000000000000004e-06, 'epoch': 0.42}


 15%|█▍        | 700/4689 [07:50<43:45,  1.52it/s]

{'loss': 0.6257, 'grad_norm': 0.9842709898948669, 'learning_rate': 7e-06, 'epoch': 0.45}


 16%|█▌        | 750/4689 [08:23<43:11,  1.52it/s]

{'loss': 0.617, 'grad_norm': 1.479193091392517, 'learning_rate': 7.500000000000001e-06, 'epoch': 0.48}


 17%|█▋        | 800/4689 [08:56<42:17,  1.53it/s]

{'loss': 0.6036, 'grad_norm': 1.136587381362915, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.51}


 18%|█▊        | 850/4689 [09:29<41:32,  1.54it/s]

{'loss': 0.6027, 'grad_norm': 0.576071560382843, 'learning_rate': 8.5e-06, 'epoch': 0.54}


 19%|█▉        | 900/4689 [10:01<41:23,  1.53it/s]

{'loss': 0.5948, 'grad_norm': 0.8097741603851318, 'learning_rate': 9e-06, 'epoch': 0.58}


 20%|██        | 950/4689 [10:34<40:52,  1.52it/s]

{'loss': 0.5799, 'grad_norm': 1.0961198806762695, 'learning_rate': 9.5e-06, 'epoch': 0.61}


 21%|██▏       | 1000/4689 [11:07<40:29,  1.52it/s]

{'loss': 0.5736, 'grad_norm': 1.6982744932174683, 'learning_rate': 1e-05, 'epoch': 0.64}


                                                   
 21%|██▏       | 1000/4689 [11:27<40:29,  1.52it/s]

{'eval_loss': 0.5507526397705078, 'eval_accuracy': 0.723, 'eval_precision': 0.7229706215867472, 'eval_recall': 0.723, 'eval_f1': 0.7228629641201897, 'eval_runtime': 19.4841, 'eval_samples_per_second': 102.648, 'eval_steps_per_second': 1.642, 'epoch': 0.64}


 22%|██▏       | 1050/4689 [12:00<40:15,  1.51it/s]  

{'loss': 0.5641, 'grad_norm': 1.2957950830459595, 'learning_rate': 9.864461913797778e-06, 'epoch': 0.67}


 23%|██▎       | 1100/4689 [12:33<39:50,  1.50it/s]

{'loss': 0.562, 'grad_norm': 0.7140564918518066, 'learning_rate': 9.728923827595556e-06, 'epoch': 0.7}


 25%|██▍       | 1150/4689 [13:06<38:40,  1.52it/s]

{'loss': 0.56, 'grad_norm': 1.3037265539169312, 'learning_rate': 9.593385741393332e-06, 'epoch': 0.74}


 26%|██▌       | 1200/4689 [13:39<38:19,  1.52it/s]

{'loss': 0.5582, 'grad_norm': 0.7101505398750305, 'learning_rate': 9.45784765519111e-06, 'epoch': 0.77}


 27%|██▋       | 1250/4689 [14:12<37:40,  1.52it/s]

{'loss': 0.5458, 'grad_norm': 1.1172276735305786, 'learning_rate': 9.322309568988887e-06, 'epoch': 0.8}


 28%|██▊       | 1300/4689 [14:45<37:39,  1.50it/s]

{'loss': 0.5417, 'grad_norm': 0.6590061783790588, 'learning_rate': 9.186771482786664e-06, 'epoch': 0.83}


 29%|██▉       | 1350/4689 [15:20<38:48,  1.43it/s]

{'loss': 0.5366, 'grad_norm': 1.0758168697357178, 'learning_rate': 9.051233396584442e-06, 'epoch': 0.86}


 30%|██▉       | 1400/4689 [15:55<37:18,  1.47it/s]

{'loss': 0.5453, 'grad_norm': 1.1662821769714355, 'learning_rate': 8.915695310382218e-06, 'epoch': 0.9}


 31%|███       | 1450/4689 [16:28<37:13,  1.45it/s]

{'loss': 0.5356, 'grad_norm': 1.5005592107772827, 'learning_rate': 8.780157224179995e-06, 'epoch': 0.93}


 32%|███▏      | 1500/4689 [17:02<45:12,  1.18it/s]

{'loss': 0.5279, 'grad_norm': 0.8702568411827087, 'learning_rate': 8.644619137977773e-06, 'epoch': 0.96}


                                                   
 32%|███▏      | 1500/4689 [17:29<45:12,  1.18it/s]

{'eval_loss': 0.5108756422996521, 'eval_accuracy': 0.743, 'eval_precision': 0.743645945337701, 'eval_recall': 0.743, 'eval_f1': 0.743025700642516, 'eval_runtime': 26.1291, 'eval_samples_per_second': 76.543, 'eval_steps_per_second': 1.225, 'epoch': 0.96}


 33%|███▎      | 1550/4689 [18:15<46:38,  1.12it/s]  

{'loss': 0.5322, 'grad_norm': 2.04927659034729, 'learning_rate': 8.509081051775549e-06, 'epoch': 0.99}


 34%|███▍      | 1600/4689 [18:57<32:59,  1.56it/s]

{'loss': 0.5324, 'grad_norm': 1.2846870422363281, 'learning_rate': 8.373542965573326e-06, 'epoch': 1.02}


 35%|███▌      | 1650/4689 [19:29<31:45,  1.60it/s]

{'loss': 0.5306, 'grad_norm': 1.2016385793685913, 'learning_rate': 8.238004879371104e-06, 'epoch': 1.06}


 36%|███▋      | 1700/4689 [20:01<31:25,  1.59it/s]

{'loss': 0.5161, 'grad_norm': 0.7538850903511047, 'learning_rate': 8.102466793168881e-06, 'epoch': 1.09}


 37%|███▋      | 1750/4689 [20:33<30:28,  1.61it/s]

{'loss': 0.5262, 'grad_norm': 1.0299625396728516, 'learning_rate': 7.966928706966657e-06, 'epoch': 1.12}


 38%|███▊      | 1800/4689 [21:03<29:04,  1.66it/s]

{'loss': 0.5225, 'grad_norm': 0.7599477767944336, 'learning_rate': 7.831390620764435e-06, 'epoch': 1.15}


 39%|███▉      | 1850/4689 [21:34<28:35,  1.65it/s]

{'loss': 0.5247, 'grad_norm': 1.2788370847702026, 'learning_rate': 7.695852534562212e-06, 'epoch': 1.18}


 41%|████      | 1900/4689 [22:04<28:08,  1.65it/s]

{'loss': 0.5375, 'grad_norm': 1.005683422088623, 'learning_rate': 7.56031444835999e-06, 'epoch': 1.22}


 42%|████▏     | 1950/4689 [22:35<28:22,  1.61it/s]

{'loss': 0.5172, 'grad_norm': 0.855075478553772, 'learning_rate': 7.4247763621577676e-06, 'epoch': 1.25}


 43%|████▎     | 2000/4689 [23:07<28:00,  1.60it/s]

{'loss': 0.5092, 'grad_norm': 0.6245256662368774, 'learning_rate': 7.2892382759555434e-06, 'epoch': 1.28}


                                                   
 43%|████▎     | 2000/4689 [23:25<28:00,  1.60it/s]

{'eval_loss': 0.4995904564857483, 'eval_accuracy': 0.7565, 'eval_precision': 0.7565081453634085, 'eval_recall': 0.7565, 'eval_f1': 0.7563856986387106, 'eval_runtime': 18.4158, 'eval_samples_per_second': 108.602, 'eval_steps_per_second': 1.738, 'epoch': 1.28}


 44%|████▎     | 2050/4689 [23:57<27:44,  1.59it/s]  

{'loss': 0.528, 'grad_norm': 0.8768200278282166, 'learning_rate': 7.153700189753321e-06, 'epoch': 1.31}


 45%|████▍     | 2100/4689 [24:29<27:25,  1.57it/s]

{'loss': 0.523, 'grad_norm': 1.062355637550354, 'learning_rate': 7.0181621035510985e-06, 'epoch': 1.34}


 46%|████▌     | 2150/4689 [25:00<26:29,  1.60it/s]

{'loss': 0.5289, 'grad_norm': 1.672325611114502, 'learning_rate': 6.882624017348875e-06, 'epoch': 1.38}


 47%|████▋     | 2200/4689 [25:32<25:41,  1.61it/s]

{'loss': 0.5213, 'grad_norm': 1.4712483882904053, 'learning_rate': 6.747085931146653e-06, 'epoch': 1.41}


 48%|████▊     | 2250/4689 [26:02<24:32,  1.66it/s]

{'loss': 0.517, 'grad_norm': 1.1880486011505127, 'learning_rate': 6.61154784494443e-06, 'epoch': 1.44}


 49%|████▉     | 2300/4689 [26:33<24:06,  1.65it/s]

{'loss': 0.5152, 'grad_norm': 1.0463629961013794, 'learning_rate': 6.476009758742206e-06, 'epoch': 1.47}


 50%|█████     | 2350/4689 [27:04<25:10,  1.55it/s]

{'loss': 0.5177, 'grad_norm': 1.2373546361923218, 'learning_rate': 6.340471672539984e-06, 'epoch': 1.5}


 51%|█████     | 2400/4689 [27:35<23:44,  1.61it/s]

{'loss': 0.5097, 'grad_norm': 0.7974147200584412, 'learning_rate': 6.204933586337761e-06, 'epoch': 1.54}


 52%|█████▏    | 2450/4689 [28:07<23:22,  1.60it/s]

{'loss': 0.5259, 'grad_norm': 1.106155514717102, 'learning_rate': 6.069395500135539e-06, 'epoch': 1.57}


 53%|█████▎    | 2500/4689 [28:38<22:39,  1.61it/s]

{'loss': 0.523, 'grad_norm': 0.8714849352836609, 'learning_rate': 5.9338574139333164e-06, 'epoch': 1.6}


                                                   
 53%|█████▎    | 2500/4689 [28:57<22:39,  1.61it/s]

{'eval_loss': 0.49233463406562805, 'eval_accuracy': 0.7615, 'eval_precision': 0.7617754721018885, 'eval_recall': 0.7615, 'eval_f1': 0.7615370338744236, 'eval_runtime': 18.3722, 'eval_samples_per_second': 108.86, 'eval_steps_per_second': 1.742, 'epoch': 1.6}


 54%|█████▍    | 2550/4689 [29:29<22:17,  1.60it/s]  

{'loss': 0.524, 'grad_norm': 1.7889565229415894, 'learning_rate': 5.798319327731093e-06, 'epoch': 1.63}


 55%|█████▌    | 2600/4689 [30:00<21:42,  1.60it/s]

{'loss': 0.5186, 'grad_norm': 1.312778115272522, 'learning_rate': 5.66278124152887e-06, 'epoch': 1.66}


 57%|█████▋    | 2650/4689 [30:32<21:50,  1.56it/s]

{'loss': 0.5292, 'grad_norm': 1.1197235584259033, 'learning_rate': 5.527243155326647e-06, 'epoch': 1.7}


 58%|█████▊    | 2700/4689 [31:03<20:43,  1.60it/s]

{'loss': 0.5294, 'grad_norm': 0.6913520097732544, 'learning_rate': 5.391705069124424e-06, 'epoch': 1.73}


 59%|█████▊    | 2750/4689 [31:35<20:19,  1.59it/s]

{'loss': 0.5114, 'grad_norm': 0.8057063221931458, 'learning_rate': 5.256166982922202e-06, 'epoch': 1.76}


 60%|█████▉    | 2800/4689 [32:06<19:39,  1.60it/s]

{'loss': 0.5096, 'grad_norm': 0.6658421158790588, 'learning_rate': 5.120628896719979e-06, 'epoch': 1.79}


 61%|██████    | 2850/4689 [32:37<19:04,  1.61it/s]

{'loss': 0.5205, 'grad_norm': 0.7259219884872437, 'learning_rate': 4.985090810517756e-06, 'epoch': 1.82}


 62%|██████▏   | 2900/4689 [33:08<18:38,  1.60it/s]

{'loss': 0.5022, 'grad_norm': 0.6773280501365662, 'learning_rate': 4.8495527243155335e-06, 'epoch': 1.86}


 63%|██████▎   | 2950/4689 [33:40<18:04,  1.60it/s]

{'loss': 0.5174, 'grad_norm': 1.5878992080688477, 'learning_rate': 4.71401463811331e-06, 'epoch': 1.89}


 64%|██████▍   | 3000/4689 [34:11<17:34,  1.60it/s]

{'loss': 0.5172, 'grad_norm': 0.8702535033226013, 'learning_rate': 4.578476551911088e-06, 'epoch': 1.92}


                                                   
 64%|██████▍   | 3000/4689 [34:29<17:34,  1.60it/s]

{'eval_loss': 0.48870623111724854, 'eval_accuracy': 0.7655, 'eval_precision': 0.7657072404606565, 'eval_recall': 0.7655, 'eval_f1': 0.765534538422856, 'eval_runtime': 18.2904, 'eval_samples_per_second': 109.347, 'eval_steps_per_second': 1.75, 'epoch': 1.92}


 65%|██████▌   | 3050/4689 [35:01<17:00,  1.61it/s]  

{'loss': 0.523, 'grad_norm': 0.6932228207588196, 'learning_rate': 4.4429384657088644e-06, 'epoch': 1.95}


 66%|██████▌   | 3100/4689 [35:32<16:29,  1.61it/s]

{'loss': 0.5059, 'grad_norm': 0.9795307517051697, 'learning_rate': 4.307400379506642e-06, 'epoch': 1.98}


 67%|██████▋   | 3150/4689 [36:03<15:59,  1.60it/s]

{'loss': 0.5163, 'grad_norm': 0.9735245108604431, 'learning_rate': 4.171862293304419e-06, 'epoch': 2.02}


 68%|██████▊   | 3200/4689 [36:35<15:27,  1.60it/s]

{'loss': 0.5165, 'grad_norm': 1.103187084197998, 'learning_rate': 4.036324207102196e-06, 'epoch': 2.05}


 69%|██████▉   | 3250/4689 [37:06<14:53,  1.61it/s]

{'loss': 0.5119, 'grad_norm': 1.0732146501541138, 'learning_rate': 3.900786120899973e-06, 'epoch': 2.08}


 70%|███████   | 3300/4689 [37:37<14:20,  1.61it/s]

{'loss': 0.511, 'grad_norm': 1.3857396841049194, 'learning_rate': 3.7652480346977505e-06, 'epoch': 2.11}


 71%|███████▏  | 3350/4689 [38:08<13:58,  1.60it/s]

{'loss': 0.4964, 'grad_norm': 1.1862035989761353, 'learning_rate': 3.6297099484955272e-06, 'epoch': 2.14}


 73%|███████▎  | 3400/4689 [38:39<13:24,  1.60it/s]

{'loss': 0.5145, 'grad_norm': 1.5497853755950928, 'learning_rate': 3.4941718622933048e-06, 'epoch': 2.18}


 74%|███████▎  | 3450/4689 [39:10<12:53,  1.60it/s]

{'loss': 0.5096, 'grad_norm': 1.5373289585113525, 'learning_rate': 3.358633776091082e-06, 'epoch': 2.21}


 75%|███████▍  | 3500/4689 [39:42<12:15,  1.62it/s]

{'loss': 0.5033, 'grad_norm': 1.1244932413101196, 'learning_rate': 3.2230956898888586e-06, 'epoch': 2.24}


                                                   
 75%|███████▍  | 3500/4689 [40:00<12:15,  1.62it/s]

{'eval_loss': 0.4876049757003784, 'eval_accuracy': 0.767, 'eval_precision': 0.767108366313907, 'eval_recall': 0.767, 'eval_f1': 0.7670265715923449, 'eval_runtime': 18.2929, 'eval_samples_per_second': 109.332, 'eval_steps_per_second': 1.749, 'epoch': 2.24}


 76%|███████▌  | 3550/4689 [40:32<11:46,  1.61it/s]  

{'loss': 0.5141, 'grad_norm': 0.8954123854637146, 'learning_rate': 3.087557603686636e-06, 'epoch': 2.27}


 77%|███████▋  | 3600/4689 [41:03<11:16,  1.61it/s]

{'loss': 0.5233, 'grad_norm': 1.033868670463562, 'learning_rate': 2.9520195174844133e-06, 'epoch': 2.3}


 78%|███████▊  | 3650/4689 [41:34<10:46,  1.61it/s]

{'loss': 0.5074, 'grad_norm': 0.8007789254188538, 'learning_rate': 2.8164814312821904e-06, 'epoch': 2.34}


 79%|███████▉  | 3700/4689 [42:05<10:14,  1.61it/s]

{'loss': 0.5111, 'grad_norm': 1.0070077180862427, 'learning_rate': 2.6809433450799676e-06, 'epoch': 2.37}


 80%|███████▉  | 3750/4689 [42:36<09:43,  1.61it/s]

{'loss': 0.5043, 'grad_norm': 1.1068116426467896, 'learning_rate': 2.545405258877745e-06, 'epoch': 2.4}


 81%|████████  | 3800/4689 [43:07<09:13,  1.61it/s]

{'loss': 0.5023, 'grad_norm': 0.8055641651153564, 'learning_rate': 2.4098671726755223e-06, 'epoch': 2.43}


 82%|████████▏ | 3850/4689 [43:39<08:42,  1.60it/s]

{'loss': 0.5094, 'grad_norm': 1.0068541765213013, 'learning_rate': 2.2743290864732994e-06, 'epoch': 2.46}


 83%|████████▎ | 3900/4689 [44:10<08:11,  1.61it/s]

{'loss': 0.5333, 'grad_norm': 1.338221549987793, 'learning_rate': 2.138791000271076e-06, 'epoch': 2.5}


 84%|████████▍ | 3950/4689 [44:41<07:39,  1.61it/s]

{'loss': 0.5129, 'grad_norm': 1.6711971759796143, 'learning_rate': 2.0032529140688536e-06, 'epoch': 2.53}


 85%|████████▌ | 4000/4689 [45:12<07:07,  1.61it/s]

{'loss': 0.4925, 'grad_norm': 0.9509477615356445, 'learning_rate': 1.8677148278666308e-06, 'epoch': 2.56}


                                                   
 85%|████████▌ | 4000/4689 [45:30<07:07,  1.61it/s]

{'eval_loss': 0.4862086772918701, 'eval_accuracy': 0.7675, 'eval_precision': 0.7676207873933846, 'eval_recall': 0.7675, 'eval_f1': 0.7675279676969443, 'eval_runtime': 18.2782, 'eval_samples_per_second': 109.42, 'eval_steps_per_second': 1.751, 'epoch': 2.56}


 86%|████████▋ | 4050/4689 [46:02<06:39,  1.60it/s]  

{'loss': 0.5141, 'grad_norm': 0.7285280823707581, 'learning_rate': 1.732176741664408e-06, 'epoch': 2.59}


 87%|████████▋ | 4100/4689 [46:33<06:06,  1.61it/s]

{'loss': 0.5168, 'grad_norm': 0.9820714592933655, 'learning_rate': 1.5966386554621848e-06, 'epoch': 2.62}


 89%|████████▊ | 4150/4689 [47:05<05:37,  1.60it/s]

{'loss': 0.5102, 'grad_norm': 0.7698931097984314, 'learning_rate': 1.4611005692599622e-06, 'epoch': 2.66}


 90%|████████▉ | 4200/4689 [47:36<05:03,  1.61it/s]

{'loss': 0.5063, 'grad_norm': 1.4178897142410278, 'learning_rate': 1.3255624830577393e-06, 'epoch': 2.69}


 91%|█████████ | 4250/4689 [48:07<04:34,  1.60it/s]

{'loss': 0.5098, 'grad_norm': 0.7292675971984863, 'learning_rate': 1.1900243968555166e-06, 'epoch': 2.72}


 92%|█████████▏| 4300/4689 [48:38<04:02,  1.61it/s]

{'loss': 0.5088, 'grad_norm': 0.7793207764625549, 'learning_rate': 1.0544863106532936e-06, 'epoch': 2.75}


 93%|█████████▎| 4350/4689 [49:10<03:36,  1.57it/s]

{'loss': 0.503, 'grad_norm': 0.7124835252761841, 'learning_rate': 9.189482244510708e-07, 'epoch': 2.78}


 94%|█████████▍| 4400/4689 [49:41<03:01,  1.60it/s]

{'loss': 0.523, 'grad_norm': 1.337559461593628, 'learning_rate': 7.834101382488479e-07, 'epoch': 2.82}


 95%|█████████▍| 4450/4689 [50:12<02:28,  1.61it/s]

{'loss': 0.5025, 'grad_norm': 1.16289222240448, 'learning_rate': 6.478720520466252e-07, 'epoch': 2.85}


 96%|█████████▌| 4500/4689 [50:43<01:57,  1.61it/s]

{'loss': 0.5121, 'grad_norm': 2.0976014137268066, 'learning_rate': 5.123339658444023e-07, 'epoch': 2.88}


                                                   
 96%|█████████▌| 4500/4689 [51:02<01:57,  1.61it/s]

{'eval_loss': 0.4858609139919281, 'eval_accuracy': 0.7665, 'eval_precision': 0.766620943415852, 'eval_recall': 0.7665, 'eval_f1': 0.766528087988114, 'eval_runtime': 18.311, 'eval_samples_per_second': 109.224, 'eval_steps_per_second': 1.748, 'epoch': 2.88}


 97%|█████████▋| 4550/4689 [51:34<01:26,  1.61it/s]

{'loss': 0.5116, 'grad_norm': 0.7174145579338074, 'learning_rate': 3.767958796421795e-07, 'epoch': 2.91}


 98%|█████████▊| 4600/4689 [52:05<00:56,  1.57it/s]

{'loss': 0.5159, 'grad_norm': 1.230270504951477, 'learning_rate': 2.4125779343995666e-07, 'epoch': 2.94}


 99%|█████████▉| 4650/4689 [52:37<00:24,  1.60it/s]

{'loss': 0.5238, 'grad_norm': 1.8199288845062256, 'learning_rate': 1.057197072377338e-07, 'epoch': 2.98}


100%|██████████| 4689/4689 [53:02<00:00,  1.47it/s]

{'train_runtime': 3182.1902, 'train_samples_per_second': 94.275, 'train_steps_per_second': 1.474, 'train_loss': 0.549083533585008, 'epoch': 3.0}





TrainOutput(global_step=4689, training_loss=0.549083533585008, metrics={'train_runtime': 3182.1902, 'train_samples_per_second': 94.275, 'train_steps_per_second': 1.474, 'total_flos': 1.9733329152e+16, 'train_loss': 0.549083533585008, 'epoch': 3.0})

In [None]:
import torch
torch.cuda.empty_cache()


In [55]:
results = trainer.evaluate()
print(results)


100%|██████████| 32/32 [00:18<00:00,  1.75it/s]

{'eval_loss': 0.4862086772918701, 'eval_accuracy': 0.7675, 'eval_precision': 0.7676207873933846, 'eval_recall': 0.7675, 'eval_f1': 0.7675279676969443, 'eval_runtime': 18.9204, 'eval_samples_per_second': 105.706, 'eval_steps_per_second': 1.691, 'epoch': 3.0}





In [42]:
sentences = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "It's okay, not great but not terrible."
]


In [48]:
test_dataset

Dataset({
    features: ['labels', 'input_ids', 'attention_mask'],
    num_rows: 2000
})

In [56]:
# Determina il dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Sposta il modello sul dispositivo
model = model.to(device)

# Sposta i dati sullo stesso dispositivo del modello
test_inputs = {
    "input_ids": torch.tensor(test_dataset["input_ids"]).to(device),
    "attention_mask": torch.tensor(test_dataset["attention_mask"]).to(device)
}

# Etichette reali
true_labels = torch.tensor(test_dataset["labels"]).to(device)

# Esegui le predizioni
with torch.no_grad():
    outputs = model(**test_inputs)

# Ottieni le predizioni come classi
predicted_classes = torch.argmax(outputs.logits, dim=1).cpu().numpy()

# Confronta con le etichette reali
correct_predictions = np.sum(predicted_classes == true_labels.cpu().numpy())
total_predictions = len(true_labels)

accuracy = correct_predictions / total_predictions
print(f"Accuracy sul dataset di test: {accuracy:.4f}")



Accuracy sul dataset di test: 0.7675


In [57]:
import torch

# Imposta il modello in modalità di valutazione
model.eval()

# Trasferisci il modello su GPU (se disponibile)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Frasi di test
sentences = [
    "I love this product! It's amazing.",
    "This is the worst experience I've ever had.",
    "It's okay, not great but not terrible."
]

# Tokenizza le frasi di test
inputs = tokenizer(
    sentences,
    padding=True,
    truncation=True,
    max_length=128,
    return_tensors="pt"
)

# Rimuovi `token_type_ids` se esiste negli input
if "token_type_ids" in inputs:
    del inputs["token_type_ids"]

# Trasferisci gli input sulla GPU (se disponibile)
inputs = {key: value.to(device) for key, value in inputs.items()}

# Esegui le previsioni
with torch.no_grad():
    outputs = model(**inputs)

# Calcola le probabilità e ottieni le classi predette
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_classes = torch.argmax(probabilities, dim=-1)

# Stampa i risultati
for i, sentence in enumerate(sentences):
    print(f"Sentence: {sentence}")
    print(f"Predicted Class: {predicted_classes[i].item()}")
    print(f"Probabilities: {probabilities[i].cpu().numpy()}")
    print()





Sentence: I love this product! It's amazing.
Predicted Class: 1
Probabilities: [0.0288162  0.97118384]

Sentence: This is the worst experience I've ever had.
Predicted Class: 0
Probabilities: [0.9548583  0.04514164]

Sentence: It's okay, not great but not terrible.
Predicted Class: 0
Probabilities: [0.77398604 0.22601396]



In [58]:
model.save_pretrained("./model76")
tokenizer.save_pretrained("./model76")


('./model76\\tokenizer_config.json',
 './model76\\special_tokens_map.json',
 './model76\\vocab.txt',
 './model76\\added_tokens.json',
 './model76\\tokenizer.json')

In [48]:
import pandas as pd

# Percorso dei file scaricati
file_paths = [
    "../data/geoemotions/goemotions_1.csv",
    "../data/geoemotions/goemotions_2.csv",
    "../data/geoemotions/goemotions_3.csv",
]

# Leggi e unisci i file
dataframes = [pd.read_csv(file) for file in file_paths]
full_dataset = pd.concat(dataframes, ignore_index=True)

# Visualizza alcune righe
full_dataset.head()



Unnamed: 0,text,id,author,subreddit,link_id,parent_id,created_utc,rater_id,example_very_unclear,admiration,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,eew5j0j,Brdd9,nrl,t3_ajis4z,t1_eew18eq,1548381000.0,1,False,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,eemcysk,TheGreen888,unpopularopinion,t3_ai4q37,t3_ai4q37,1548084000.0,37,True,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,t3_abru74,t1_ed2m7g7,1546428000.0,37,False,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,t3_ahulml,t3_ahulml,1547965000.0,18,False,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",eda6yn6,American_Fascist713,starwarsspeculation,t3_ackt2f,t1_eda65q2,1546669000.0,2,False,0,...,0,0,0,0,0,0,0,0,0,1


In [49]:
print(full_dataset.columns)  # Vedi le colonne
print(full_dataset.info())   # Dettagli del dataset
print(full_dataset["text"].head())  # Colonna con i testi


Index(['text', 'id', 'author', 'subreddit', 'link_id', 'parent_id',
       'created_utc', 'rater_id', 'example_very_unclear', 'admiration',
       'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion',
       'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust',
       'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy',
       'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief',
       'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211225 entries, 0 to 211224
Data columns (total 37 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   text                  211225 non-null  object 
 1   id                    211225 non-null  object 
 2   author                211225 non-null  object 
 3   subreddit             211225 non-null  object 
 4   link_id               211225 non-null  object 
 5   par

In [50]:
# Seleziona solo il testo e le emozioni
emotions_columns = [
    "admiration", "amusement", "anger", "annoyance", "approval",
    "caring", "confusion", "curiosity", "desire", "disappointment",
    "disapproval", "disgust", "embarrassment", "excitement",
    "fear", "gratitude", "grief", "joy", "love", "nervousness",
    "optimism", "pride", "realization", "relief", "remorse",
    "sadness", "surprise", "neutral"
]

filtered_dataset = full_dataset[["text"] + emotions_columns]

# Controlla il risultato
filtered_dataset.head()


Unnamed: 0,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [51]:
# Rimuovi righe con valori mancanti
filtered_dataset = filtered_dataset.dropna()

# Controlla la dimensione del dataset pulito
print(filtered_dataset.shape)


(211225, 29)


In [52]:
# Trova l'emozione dominante per ogni riga
def get_dominant_emotion(row):
    emotions = row[emotions_columns]
    if emotions.sum() == 0:  # Se non c'è alcuna emozione, considera "neutral"
        return "neutral"
    return emotions.idxmax()  # Restituisce la colonna/emozione con valore più alto

filtered_dataset["label"] = filtered_dataset.apply(get_dominant_emotion, axis=1)

# Mantieni solo testo e etichette
final_dataset = filtered_dataset[["text", "label"]]

# Controlla il risultato
final_dataset.head()


Unnamed: 0,text,label
0,That game hurt.,sadness
1,>sexuality shouldn’t be a grouping category I...,neutral
2,"You do right, if you don't care then fuck 'em!",neutral
3,Man I love reddit.,love
4,"[NAME] was nowhere near them, he was by the Fa...",neutral


In [53]:
filtered_dataset["text"] = filtered_dataset["text"].str.strip()
filtered_dataset["text"] = filtered_dataset["text"].str.lower()
import re

filtered_dataset["text"] = filtered_dataset["text"].apply(lambda x: re.sub(r'http\S+|www\S+|https\S+', '', x, flags=re.MULTILINE))
filtered_dataset["text"] = filtered_dataset["text"].apply(lambda x: re.sub(r'@\w+|#\w+', '', x))
import string

filtered_dataset["text"] = filtered_dataset["text"].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
filtered_dataset["text"] = filtered_dataset["text"].apply(lambda x: re.sub(r'\d+', '', x))
from nltk.corpus import stopwords
import nltk

# Scarica le stop words (solo al primo utilizzo)
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
filtered_dataset["text"] = filtered_dataset["text"].apply(
    lambda x: ' '.join(word for word in x.split() if word not in stop_words)
)
filtered_dataset["text"] = filtered_dataset["text"].apply(lambda x: re.sub(r'(.)\1{2,}', r'\1', x))
filtered_dataset = filtered_dataset[filtered_dataset["text"].str.len() > 5]
print(filtered_dataset.head())  # Controlla i dati
print(filtered_dataset["text"].isnull().sum())  # Verifica dati mancanti


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nicol\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


                                                text  admiration  amusement  \
0                                          game hurt           0          0   
1  sexuality shouldn’t grouping category makes di...           0          0   
2                            right dont care fuck em           0          0   
3                                    man love reddit           0          0   
4                           name nowhere near falcon           0          0   

   anger  annoyance  approval  caring  confusion  curiosity  desire  ...  \
0      0          0         0       0          0          0       0  ...   
1      0          0         0       0          0          0       0  ...   
2      0          0         0       0          0          0       0  ...   
3      0          0         0       0          0          0       0  ...   
4      0          0         0       0          0          0       0  ...   

   nervousness  optimism  pride  realization  relief  remorse  sadne

In [54]:
from sklearn.model_selection import train_test_split

# Dividi il dataset in training (80%) e test (20%)
train_data, test_data = train_test_split(filtered_dataset, test_size=0.2, random_state=42)

# Dividi il test set in validation (10%) e test (10%)
val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

# Mostra le dimensioni dei set
print(f"Training set: {len(train_data)} esempi")
print(f"Validation set: {len(val_data)} esempi")
print(f"Test set: {len(test_data)} esempi")


Training set: 165791 esempi
Validation set: 20724 esempi
Test set: 20724 esempi


In [55]:
# Crea una mappa per le etichette testuali
label_map = {label: idx for idx, label in enumerate(filtered_dataset['label'].unique())}

# Applica la mappa al dataset
train_data["label"] = train_data["label"].map(label_map)
val_data["label"] = val_data["label"].map(label_map)
test_data["label"] = test_data["label"].map(label_map)

# Converti le etichette in tensori
train_labels = torch.tensor(list(train_data["label"].values))
val_labels = torch.tensor(list(val_data["label"].values))
test_labels = torch.tensor(list(test_data["label"].values))


In [56]:
from transformers import AutoTokenizer
import torch

# Specifica il tokenizer di RoBERTa
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

# Funzione per tokenizzare il testo
def tokenize_data(data):
    return tokenizer(
        list(data["text"]),
        padding=True,
        truncation=True,
        max_length=128,  # Lunghezza massima delle sequenze
        return_tensors="pt"
    )

# Tokenizza i dati
train_encodings = tokenize_data(train_data)
val_encodings = tokenize_data(val_data)
test_encodings = tokenize_data(test_data)

# Converti le etichette in tensori
train_labels = torch.tensor(list(train_data["label"].values))
val_labels = torch.tensor(list(val_data["label"].values))
test_labels = torch.tensor(list(test_data["label"].values))

print("Tokenizzazione completata con RoBERTa!")



Tokenizzazione completata con RoBERTa!


In [57]:
from torch.utils.data import Dataset

class EmotionDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item


In [58]:
train_dataset = EmotionDataset(train_encodings, train_labels)
val_dataset = EmotionDataset(val_encodings, val_labels)
test_dataset = EmotionDataset(test_encodings, test_labels)


In [65]:
# Numero di etichette (quante emozioni ci sono nel dataset)
num_labels = len(label_map)

from transformers import RobertaForSequenceClassification

model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=num_labels  # Numero di classi nel dataset
)



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [66]:
for name, param in model.named_parameters():
    if "classifier" not in name:  # Congela tutto tranne il classificatore
        param.requires_grad = False




In [67]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    save_total_limit=2,
    fp16=True  # Precisione mista per accelerare il training
)




In [72]:
import numpy as np
import evaluate

accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    return {"accuracy": acc, "f1": f1}


In [73]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)



In [75]:
trainer.train()


  2%|▏         | 150/7773 [05:22<4:32:50,  2.15s/it]
  0%|          | 6/7773 [00:05<2:02:24,  1.06it/s]
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item["labels"] = torch.tensor(self.labels[idx])
  1%|          | 50/7773 [00:30<1:17:35,  1.66it/s]

{'loss': 2.8868, 'grad_norm': 1.9017785787582397, 'learning_rate': 1.987134954329088e-05, 'epoch': 0.02}


                                                   
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item["labels"] = torch.tensor(self.labels[idx])


{'eval_loss': 2.8515279293060303, 'eval_accuracy': 0.2757189731712025, 'eval_f1': 0.11918134599442098, 'eval_runtime': 78.6087, 'eval_samples_per_second': 263.635, 'eval_steps_per_second': 4.122, 'epoch': 0.02}


  1%|▏         | 100/7773 [02:19<1:17:16,  1.65it/s]

{'loss': 2.8734, 'grad_norm': 1.2706586122512817, 'learning_rate': 1.9742699086581757e-05, 'epoch': 0.04}


                                                    
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item["labels"] = torch.tensor(self.labels[idx])


{'eval_loss': 2.8329453468322754, 'eval_accuracy': 0.2757189731712025, 'eval_f1': 0.11918134599442098, 'eval_runtime': 78.8683, 'eval_samples_per_second': 262.767, 'eval_steps_per_second': 4.108, 'epoch': 0.04}


  2%|▏         | 150/7773 [04:08<1:16:41,  1.66it/s] 

{'loss': 2.8375, 'grad_norm': 1.3091051578521729, 'learning_rate': 1.9614048629872636e-05, 'epoch': 0.06}




KeyboardInterrupt: 

# Geoemotions

In [1]:
import pandas as pd
import torch
import re
import random
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding, EarlyStoppingCallback
from torch.utils.data import Dataset
import evaluate
import wandb

# Login a W&B (fai wandb login da terminale se non lo hai ancora fatto)
wandb.init(project="il_tuo_progetto", name="il_tuo_esperimento")

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

known_emotions_list = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
    'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
    'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
    'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

file_paths = [
    '../data/geoemotions/goemotions_1.csv',
    '../data/geoemotions/goemotions_2.csv',
    '../data/geoemotions/goemotions_3.csv'
]
dataframes = [pd.read_csv(file) for file in file_paths]
combined_data = pd.concat(dataframes, ignore_index=True)

emotion_columns = [col for col in combined_data.columns if col in known_emotions_list]
if not emotion_columns:
    raise ValueError("Nessuna colonna di emozione trovata nel dataset.")

combined_data['label'] = combined_data[emotion_columns].idxmax(axis=1)

combined_data = combined_data[combined_data['text'].astype(str).str.strip().astype(bool)]
combined_data = combined_data[combined_data[emotion_columns].sum(axis=1) > 0]

unique_labels = combined_data['label'].unique()
label_mapping = {label: i for i, label in enumerate(unique_labels)}
combined_data['label'] = combined_data['label'].map(label_mapping)

def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-z0-9\s\.,!?;]', '', text)
    text = text.strip()
    return text

combined_data['cleaned_text'] = combined_data['text'].apply(preprocess_text)

class_counts = combined_data['label'].value_counts()
min_class_count = class_counts.min()
balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(
    balanced_data['cleaned_text'],
    balanced_data['label'],
    test_size=0.2,
    random_state=SEED,
    stratify=balanced_data['label']
)

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts.reset_index(drop=True)
        self.labels = labels.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts.iloc[index]
        label = self.labels.iloc[index]

        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
train_dataset = EmotionDataset(X_train, y_train, tokenizer, max_len=128)
test_dataset = EmotionDataset(X_test, y_test, tokenizer, max_len=128)




  from .autonotebook import tqdm as notebook_tqdm





[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\nicol\_netrc


  balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)


In [3]:
model = RobertaForSequenceClassification.from_pretrained(
    "roberta-base",
    num_labels=len(label_mapping)
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="steps",   # valutazione ogni N step
    eval_steps=500,                # valuta ogni 500 step
    save_strategy="steps",         # salva ogni N step
    save_steps=500,                # salva ogni 500 step
    logging_dir="./logs",
    logging_steps=10,
    seed=SEED,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    learning_rate=2e-5,
    report_to="wandb"
)


early_stopping = EarlyStoppingCallback(
    early_stopping_patience=2,
    early_stopping_threshold=0.0
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

trainer.train()
eval_results = trainer.evaluate()
print("Test results:", eval_results)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  0%|          | 10/3460 [00:05<32:13,  1.78it/s]

{'loss': 3.341, 'grad_norm': 3.9538540840148926, 'learning_rate': 1.994219653179191e-05, 'epoch': 0.01}


  1%|          | 20/3460 [00:11<32:17,  1.78it/s]

{'loss': 3.3515, 'grad_norm': 2.9297754764556885, 'learning_rate': 1.9884393063583815e-05, 'epoch': 0.03}


  1%|          | 30/3460 [00:16<31:09,  1.83it/s]

{'loss': 3.348, 'grad_norm': 2.7583749294281006, 'learning_rate': 1.9826589595375726e-05, 'epoch': 0.04}


  1%|          | 40/3460 [00:22<30:57,  1.84it/s]

{'loss': 3.3384, 'grad_norm': 5.278118133544922, 'learning_rate': 1.9768786127167633e-05, 'epoch': 0.06}


  1%|▏         | 50/3460 [00:27<31:58,  1.78it/s]

{'loss': 3.347, 'grad_norm': 2.950302839279175, 'learning_rate': 1.971098265895954e-05, 'epoch': 0.07}


  2%|▏         | 60/3460 [00:33<31:50,  1.78it/s]

{'loss': 3.3296, 'grad_norm': 3.2188804149627686, 'learning_rate': 1.9653179190751446e-05, 'epoch': 0.09}


  2%|▏         | 70/3460 [00:38<31:07,  1.82it/s]

{'loss': 3.3382, 'grad_norm': 2.688472270965576, 'learning_rate': 1.9595375722543353e-05, 'epoch': 0.1}


  2%|▏         | 80/3460 [00:44<31:39,  1.78it/s]

{'loss': 3.3435, 'grad_norm': 2.90793514251709, 'learning_rate': 1.9537572254335264e-05, 'epoch': 0.12}


  3%|▎         | 90/3460 [00:49<31:13,  1.80it/s]

{'loss': 3.3519, 'grad_norm': 2.687448501586914, 'learning_rate': 1.947976878612717e-05, 'epoch': 0.13}


  3%|▎         | 100/3460 [00:55<30:52,  1.81it/s]

{'loss': 3.3369, 'grad_norm': 3.068363666534424, 'learning_rate': 1.9421965317919077e-05, 'epoch': 0.14}


  3%|▎         | 110/3460 [01:01<31:14,  1.79it/s]

{'loss': 3.3327, 'grad_norm': 2.3443751335144043, 'learning_rate': 1.9364161849710984e-05, 'epoch': 0.16}


  3%|▎         | 120/3460 [01:06<32:08,  1.73it/s]

{'loss': 3.3242, 'grad_norm': 3.3922572135925293, 'learning_rate': 1.930635838150289e-05, 'epoch': 0.17}


  4%|▍         | 130/3460 [01:12<31:14,  1.78it/s]

{'loss': 3.3402, 'grad_norm': 4.140122413635254, 'learning_rate': 1.9248554913294798e-05, 'epoch': 0.19}


  4%|▍         | 140/3460 [01:17<29:51,  1.85it/s]

{'loss': 3.3141, 'grad_norm': 3.6565725803375244, 'learning_rate': 1.9190751445086705e-05, 'epoch': 0.2}


  4%|▍         | 150/3460 [01:23<30:23,  1.81it/s]

{'loss': 3.2584, 'grad_norm': 7.939465045928955, 'learning_rate': 1.9132947976878615e-05, 'epoch': 0.22}


  5%|▍         | 160/3460 [01:28<30:40,  1.79it/s]

{'loss': 3.2042, 'grad_norm': 8.62758731842041, 'learning_rate': 1.9075144508670522e-05, 'epoch': 0.23}


  5%|▍         | 170/3460 [01:34<31:31,  1.74it/s]

{'loss': 3.1198, 'grad_norm': 12.686814308166504, 'learning_rate': 1.901734104046243e-05, 'epoch': 0.25}


  5%|▌         | 180/3460 [01:40<30:29,  1.79it/s]

{'loss': 3.1281, 'grad_norm': 8.469862937927246, 'learning_rate': 1.8959537572254336e-05, 'epoch': 0.26}


  5%|▌         | 190/3460 [01:45<30:35,  1.78it/s]

{'loss': 3.075, 'grad_norm': 11.871969223022461, 'learning_rate': 1.8901734104046246e-05, 'epoch': 0.27}


  6%|▌         | 200/3460 [01:51<30:03,  1.81it/s]

{'loss': 3.0876, 'grad_norm': 14.076769828796387, 'learning_rate': 1.8843930635838153e-05, 'epoch': 0.29}


  6%|▌         | 210/3460 [01:56<29:29,  1.84it/s]

{'loss': 2.9282, 'grad_norm': 12.895576477050781, 'learning_rate': 1.878612716763006e-05, 'epoch': 0.3}


  6%|▋         | 220/3460 [02:02<30:01,  1.80it/s]

{'loss': 3.0183, 'grad_norm': 11.584376335144043, 'learning_rate': 1.8728323699421967e-05, 'epoch': 0.32}


  7%|▋         | 230/3460 [02:08<30:21,  1.77it/s]

{'loss': 2.9238, 'grad_norm': 10.393680572509766, 'learning_rate': 1.8670520231213874e-05, 'epoch': 0.33}


  7%|▋         | 240/3460 [02:13<30:11,  1.78it/s]

{'loss': 2.9324, 'grad_norm': 13.470878601074219, 'learning_rate': 1.8612716763005784e-05, 'epoch': 0.35}


  7%|▋         | 250/3460 [02:19<30:05,  1.78it/s]

{'loss': 2.8618, 'grad_norm': 18.331445693969727, 'learning_rate': 1.855491329479769e-05, 'epoch': 0.36}


  8%|▊         | 260/3460 [02:24<30:17,  1.76it/s]

{'loss': 2.7893, 'grad_norm': 14.656644821166992, 'learning_rate': 1.8497109826589598e-05, 'epoch': 0.38}


  8%|▊         | 270/3460 [02:30<29:33,  1.80it/s]

{'loss': 2.7771, 'grad_norm': 17.177236557006836, 'learning_rate': 1.8439306358381505e-05, 'epoch': 0.39}


  8%|▊         | 280/3460 [02:36<29:00,  1.83it/s]

{'loss': 2.887, 'grad_norm': 12.67171573638916, 'learning_rate': 1.838150289017341e-05, 'epoch': 0.4}


  8%|▊         | 290/3460 [02:41<29:06,  1.82it/s]

{'loss': 2.7608, 'grad_norm': 10.143811225891113, 'learning_rate': 1.832369942196532e-05, 'epoch': 0.42}


  9%|▊         | 300/3460 [02:47<29:15,  1.80it/s]

{'loss': 2.7706, 'grad_norm': 10.795377731323242, 'learning_rate': 1.8265895953757225e-05, 'epoch': 0.43}


  9%|▉         | 310/3460 [02:52<29:31,  1.78it/s]

{'loss': 2.7187, 'grad_norm': 12.80759334564209, 'learning_rate': 1.8208092485549132e-05, 'epoch': 0.45}


  9%|▉         | 320/3460 [02:58<28:19,  1.85it/s]

{'loss': 2.7688, 'grad_norm': 14.207459449768066, 'learning_rate': 1.8150289017341043e-05, 'epoch': 0.46}


 10%|▉         | 330/3460 [03:03<28:18,  1.84it/s]

{'loss': 2.5953, 'grad_norm': 11.71336555480957, 'learning_rate': 1.809248554913295e-05, 'epoch': 0.48}


 10%|▉         | 340/3460 [03:09<29:17,  1.78it/s]

{'loss': 2.6515, 'grad_norm': 18.861770629882812, 'learning_rate': 1.8034682080924856e-05, 'epoch': 0.49}


 10%|█         | 350/3460 [03:15<29:01,  1.79it/s]

{'loss': 2.69, 'grad_norm': 19.204208374023438, 'learning_rate': 1.7976878612716763e-05, 'epoch': 0.51}


 10%|█         | 360/3460 [03:20<28:24,  1.82it/s]

{'loss': 2.5256, 'grad_norm': 11.848987579345703, 'learning_rate': 1.7919075144508673e-05, 'epoch': 0.52}


 11%|█         | 370/3460 [03:26<27:30,  1.87it/s]

{'loss': 2.6168, 'grad_norm': 17.29341697692871, 'learning_rate': 1.786127167630058e-05, 'epoch': 0.53}


 11%|█         | 380/3460 [03:31<28:04,  1.83it/s]

{'loss': 2.6389, 'grad_norm': 14.090298652648926, 'learning_rate': 1.7803468208092487e-05, 'epoch': 0.55}


 11%|█▏        | 390/3460 [03:37<27:57,  1.83it/s]

{'loss': 2.7006, 'grad_norm': 21.741065979003906, 'learning_rate': 1.7745664739884394e-05, 'epoch': 0.56}


 12%|█▏        | 400/3460 [03:42<27:16,  1.87it/s]

{'loss': 2.6915, 'grad_norm': 17.004467010498047, 'learning_rate': 1.76878612716763e-05, 'epoch': 0.58}


 12%|█▏        | 410/3460 [03:48<27:40,  1.84it/s]

{'loss': 2.631, 'grad_norm': 13.365304946899414, 'learning_rate': 1.763005780346821e-05, 'epoch': 0.59}


 12%|█▏        | 420/3460 [03:53<28:19,  1.79it/s]

{'loss': 2.7416, 'grad_norm': 15.310633659362793, 'learning_rate': 1.7572254335260118e-05, 'epoch': 0.61}


 12%|█▏        | 430/3460 [03:59<28:18,  1.78it/s]

{'loss': 2.5101, 'grad_norm': 16.330835342407227, 'learning_rate': 1.7514450867052025e-05, 'epoch': 0.62}


 13%|█▎        | 440/3460 [04:04<28:14,  1.78it/s]

{'loss': 2.5902, 'grad_norm': 13.29719066619873, 'learning_rate': 1.7456647398843932e-05, 'epoch': 0.64}


 13%|█▎        | 450/3460 [04:10<27:33,  1.82it/s]

{'loss': 2.4923, 'grad_norm': 11.632902145385742, 'learning_rate': 1.739884393063584e-05, 'epoch': 0.65}


 13%|█▎        | 460/3460 [04:15<28:13,  1.77it/s]

{'loss': 2.6304, 'grad_norm': 18.231653213500977, 'learning_rate': 1.7341040462427746e-05, 'epoch': 0.66}


 14%|█▎        | 470/3460 [04:21<27:05,  1.84it/s]

{'loss': 2.6394, 'grad_norm': 16.2860050201416, 'learning_rate': 1.7283236994219653e-05, 'epoch': 0.68}


 14%|█▍        | 480/3460 [04:27<27:38,  1.80it/s]

{'loss': 2.4925, 'grad_norm': 22.36763572692871, 'learning_rate': 1.722543352601156e-05, 'epoch': 0.69}


 14%|█▍        | 490/3460 [04:32<28:14,  1.75it/s]

{'loss': 2.5128, 'grad_norm': 25.29020881652832, 'learning_rate': 1.716763005780347e-05, 'epoch': 0.71}


 14%|█▍        | 500/3460 [04:38<27:21,  1.80it/s]

{'loss': 2.4991, 'grad_norm': 15.931070327758789, 'learning_rate': 1.7109826589595377e-05, 'epoch': 0.72}


                                                  
 14%|█▍        | 500/3460 [05:08<27:21,  1.80it/s]

{'eval_loss': 2.465491771697998, 'eval_accuracy': 0.3249006143838092, 'eval_runtime': 30.5868, 'eval_samples_per_second': 90.464, 'eval_steps_per_second': 5.656, 'epoch': 0.72}


 15%|█▍        | 510/3460 [05:19<49:19,  1.00s/it]  

{'loss': 2.5107, 'grad_norm': 13.276111602783203, 'learning_rate': 1.7052023121387284e-05, 'epoch': 0.74}


 15%|█▌        | 520/3460 [05:25<29:14,  1.68it/s]

{'loss': 2.4815, 'grad_norm': 11.912239074707031, 'learning_rate': 1.699421965317919e-05, 'epoch': 0.75}


 15%|█▌        | 530/3460 [05:30<27:22,  1.78it/s]

{'loss': 2.4824, 'grad_norm': 14.123800277709961, 'learning_rate': 1.69364161849711e-05, 'epoch': 0.77}


 16%|█▌        | 540/3460 [05:36<27:10,  1.79it/s]

{'loss': 2.5734, 'grad_norm': 16.354534149169922, 'learning_rate': 1.6878612716763008e-05, 'epoch': 0.78}


 16%|█▌        | 550/3460 [05:42<26:45,  1.81it/s]

{'loss': 2.4135, 'grad_norm': 13.154816627502441, 'learning_rate': 1.6820809248554915e-05, 'epoch': 0.79}


 16%|█▌        | 560/3460 [05:47<26:27,  1.83it/s]

{'loss': 2.57, 'grad_norm': 18.781557083129883, 'learning_rate': 1.676300578034682e-05, 'epoch': 0.81}


 16%|█▋        | 570/3460 [05:53<26:13,  1.84it/s]

{'loss': 2.5036, 'grad_norm': 46.80784606933594, 'learning_rate': 1.6705202312138732e-05, 'epoch': 0.82}


 17%|█▋        | 580/3460 [05:58<26:07,  1.84it/s]

{'loss': 2.4586, 'grad_norm': 16.793363571166992, 'learning_rate': 1.664739884393064e-05, 'epoch': 0.84}


 17%|█▋        | 590/3460 [06:04<26:16,  1.82it/s]

{'loss': 2.5252, 'grad_norm': 20.309003829956055, 'learning_rate': 1.6589595375722546e-05, 'epoch': 0.85}


 17%|█▋        | 600/3460 [06:09<26:02,  1.83it/s]

{'loss': 2.4584, 'grad_norm': 17.490812301635742, 'learning_rate': 1.6531791907514452e-05, 'epoch': 0.87}


 18%|█▊        | 610/3460 [06:15<25:45,  1.84it/s]

{'loss': 2.3265, 'grad_norm': 14.132589340209961, 'learning_rate': 1.647398843930636e-05, 'epoch': 0.88}


 18%|█▊        | 620/3460 [06:20<25:51,  1.83it/s]

{'loss': 2.6214, 'grad_norm': 18.917387008666992, 'learning_rate': 1.6416184971098266e-05, 'epoch': 0.9}


 18%|█▊        | 630/3460 [06:26<25:42,  1.83it/s]

{'loss': 2.3712, 'grad_norm': 18.646207809448242, 'learning_rate': 1.6358381502890177e-05, 'epoch': 0.91}


 18%|█▊        | 640/3460 [06:31<25:37,  1.83it/s]

{'loss': 2.5532, 'grad_norm': 18.14583969116211, 'learning_rate': 1.6300578034682083e-05, 'epoch': 0.92}


 19%|█▉        | 650/3460 [06:37<26:11,  1.79it/s]

{'loss': 2.3936, 'grad_norm': 17.70243263244629, 'learning_rate': 1.624277456647399e-05, 'epoch': 0.94}


 19%|█▉        | 660/3460 [06:42<25:54,  1.80it/s]

{'loss': 2.3145, 'grad_norm': 12.66569709777832, 'learning_rate': 1.6184971098265897e-05, 'epoch': 0.95}


 19%|█▉        | 670/3460 [06:48<25:15,  1.84it/s]

{'loss': 2.5234, 'grad_norm': 16.591567993164062, 'learning_rate': 1.6127167630057804e-05, 'epoch': 0.97}


 20%|█▉        | 680/3460 [06:53<25:10,  1.84it/s]

{'loss': 2.4579, 'grad_norm': 16.472827911376953, 'learning_rate': 1.606936416184971e-05, 'epoch': 0.98}


 20%|█▉        | 690/3460 [06:58<25:00,  1.85it/s]

{'loss': 2.4592, 'grad_norm': 19.07664680480957, 'learning_rate': 1.6011560693641618e-05, 'epoch': 1.0}


 20%|██        | 700/3460 [07:04<24:46,  1.86it/s]

{'loss': 2.272, 'grad_norm': 11.44444751739502, 'learning_rate': 1.5953757225433528e-05, 'epoch': 1.01}


 21%|██        | 710/3460 [07:09<24:50,  1.85it/s]

{'loss': 2.1937, 'grad_norm': 17.104883193969727, 'learning_rate': 1.5895953757225435e-05, 'epoch': 1.03}


 21%|██        | 720/3460 [07:15<24:42,  1.85it/s]

{'loss': 2.3266, 'grad_norm': 11.966934204101562, 'learning_rate': 1.5838150289017342e-05, 'epoch': 1.04}


 21%|██        | 730/3460 [07:20<24:45,  1.84it/s]

{'loss': 2.222, 'grad_norm': 19.06548309326172, 'learning_rate': 1.578034682080925e-05, 'epoch': 1.05}


 21%|██▏       | 740/3460 [07:25<24:36,  1.84it/s]

{'loss': 2.2468, 'grad_norm': 16.12554168701172, 'learning_rate': 1.572254335260116e-05, 'epoch': 1.07}


 22%|██▏       | 750/3460 [07:31<24:36,  1.84it/s]

{'loss': 2.4131, 'grad_norm': 17.70405387878418, 'learning_rate': 1.5664739884393066e-05, 'epoch': 1.08}


 22%|██▏       | 760/3460 [07:36<24:30,  1.84it/s]

{'loss': 2.44, 'grad_norm': 20.213878631591797, 'learning_rate': 1.5606936416184973e-05, 'epoch': 1.1}


 22%|██▏       | 770/3460 [07:42<24:26,  1.83it/s]

{'loss': 2.3229, 'grad_norm': 20.87255096435547, 'learning_rate': 1.554913294797688e-05, 'epoch': 1.11}


 23%|██▎       | 780/3460 [07:47<24:10,  1.85it/s]

{'loss': 2.1455, 'grad_norm': 15.04845905303955, 'learning_rate': 1.5491329479768787e-05, 'epoch': 1.13}


 23%|██▎       | 790/3460 [07:53<24:14,  1.84it/s]

{'loss': 2.2692, 'grad_norm': 17.466846466064453, 'learning_rate': 1.5433526011560697e-05, 'epoch': 1.14}


 23%|██▎       | 800/3460 [07:58<24:00,  1.85it/s]

{'loss': 2.2586, 'grad_norm': 20.813535690307617, 'learning_rate': 1.5375722543352604e-05, 'epoch': 1.16}


 23%|██▎       | 810/3460 [08:04<23:44,  1.86it/s]

{'loss': 2.2566, 'grad_norm': 17.845460891723633, 'learning_rate': 1.531791907514451e-05, 'epoch': 1.17}


 24%|██▎       | 820/3460 [08:09<23:50,  1.85it/s]

{'loss': 2.3863, 'grad_norm': 17.00783920288086, 'learning_rate': 1.5260115606936418e-05, 'epoch': 1.18}


 24%|██▍       | 830/3460 [08:14<23:44,  1.85it/s]

{'loss': 2.2851, 'grad_norm': 19.443952560424805, 'learning_rate': 1.5202312138728326e-05, 'epoch': 1.2}


 24%|██▍       | 840/3460 [08:20<23:37,  1.85it/s]

{'loss': 2.0266, 'grad_norm': 26.664079666137695, 'learning_rate': 1.5144508670520233e-05, 'epoch': 1.21}


 25%|██▍       | 850/3460 [08:25<23:39,  1.84it/s]

{'loss': 2.2081, 'grad_norm': 16.638620376586914, 'learning_rate': 1.508670520231214e-05, 'epoch': 1.23}


 25%|██▍       | 860/3460 [08:31<24:03,  1.80it/s]

{'loss': 2.1518, 'grad_norm': 20.392793655395508, 'learning_rate': 1.5028901734104047e-05, 'epoch': 1.24}


 25%|██▌       | 870/3460 [08:36<24:17,  1.78it/s]

{'loss': 2.2946, 'grad_norm': 17.023571014404297, 'learning_rate': 1.4971098265895956e-05, 'epoch': 1.26}


 25%|██▌       | 880/3460 [08:42<23:27,  1.83it/s]

{'loss': 2.0993, 'grad_norm': 16.779375076293945, 'learning_rate': 1.4913294797687862e-05, 'epoch': 1.27}


 26%|██▌       | 890/3460 [08:47<23:12,  1.85it/s]

{'loss': 2.211, 'grad_norm': 14.532512664794922, 'learning_rate': 1.485549132947977e-05, 'epoch': 1.29}


 26%|██▌       | 900/3460 [08:53<22:56,  1.86it/s]

{'loss': 2.2769, 'grad_norm': 16.86455535888672, 'learning_rate': 1.4797687861271676e-05, 'epoch': 1.3}


 26%|██▋       | 910/3460 [08:58<23:01,  1.85it/s]

{'loss': 2.095, 'grad_norm': 17.713056564331055, 'learning_rate': 1.4739884393063586e-05, 'epoch': 1.32}


 27%|██▋       | 920/3460 [09:04<22:54,  1.85it/s]

{'loss': 2.3451, 'grad_norm': 25.76213264465332, 'learning_rate': 1.4682080924855493e-05, 'epoch': 1.33}


 27%|██▋       | 930/3460 [09:09<22:44,  1.85it/s]

{'loss': 2.2408, 'grad_norm': 14.816967964172363, 'learning_rate': 1.46242774566474e-05, 'epoch': 1.34}


 27%|██▋       | 940/3460 [09:14<23:15,  1.81it/s]

{'loss': 2.3531, 'grad_norm': 14.858575820922852, 'learning_rate': 1.4566473988439307e-05, 'epoch': 1.36}


 27%|██▋       | 950/3460 [09:20<23:28,  1.78it/s]

{'loss': 2.2192, 'grad_norm': 17.675691604614258, 'learning_rate': 1.4508670520231216e-05, 'epoch': 1.37}


 28%|██▊       | 960/3460 [09:26<22:48,  1.83it/s]

{'loss': 2.2309, 'grad_norm': 51.90044021606445, 'learning_rate': 1.4450867052023123e-05, 'epoch': 1.39}


 28%|██▊       | 970/3460 [09:31<22:25,  1.85it/s]

{'loss': 2.2096, 'grad_norm': 19.8174991607666, 'learning_rate': 1.439306358381503e-05, 'epoch': 1.4}


 28%|██▊       | 980/3460 [09:36<22:23,  1.85it/s]

{'loss': 2.2086, 'grad_norm': 18.173704147338867, 'learning_rate': 1.4335260115606936e-05, 'epoch': 1.42}


 29%|██▊       | 990/3460 [09:42<22:15,  1.85it/s]

{'loss': 2.261, 'grad_norm': 20.89910125732422, 'learning_rate': 1.4277456647398843e-05, 'epoch': 1.43}


 29%|██▉       | 1000/3460 [09:47<22:38,  1.81it/s]

{'loss': 2.1271, 'grad_norm': 20.683000564575195, 'learning_rate': 1.4219653179190754e-05, 'epoch': 1.45}


                                                   
 29%|██▉       | 1000/3460 [10:18<22:38,  1.81it/s]

{'eval_loss': 2.31858229637146, 'eval_accuracy': 0.36248644741597397, 'eval_runtime': 30.5287, 'eval_samples_per_second': 90.636, 'eval_steps_per_second': 5.667, 'epoch': 1.45}


 29%|██▉       | 1010/3460 [10:30<41:51,  1.02s/it]  

{'loss': 2.2881, 'grad_norm': 11.825542449951172, 'learning_rate': 1.416184971098266e-05, 'epoch': 1.46}


 29%|██▉       | 1020/3460 [10:35<23:21,  1.74it/s]

{'loss': 2.2275, 'grad_norm': 19.420949935913086, 'learning_rate': 1.4104046242774567e-05, 'epoch': 1.47}


 30%|██▉       | 1030/3460 [10:41<22:50,  1.77it/s]

{'loss': 2.3013, 'grad_norm': 22.549062728881836, 'learning_rate': 1.4046242774566474e-05, 'epoch': 1.49}


 30%|███       | 1040/3460 [10:46<22:13,  1.82it/s]

{'loss': 2.2339, 'grad_norm': 15.181039810180664, 'learning_rate': 1.3988439306358383e-05, 'epoch': 1.5}


 30%|███       | 1050/3460 [10:52<21:51,  1.84it/s]

{'loss': 2.158, 'grad_norm': 21.99703598022461, 'learning_rate': 1.393063583815029e-05, 'epoch': 1.52}


 31%|███       | 1060/3460 [10:57<21:41,  1.84it/s]

{'loss': 2.1469, 'grad_norm': 16.971458435058594, 'learning_rate': 1.3872832369942197e-05, 'epoch': 1.53}


 31%|███       | 1070/3460 [11:03<21:40,  1.84it/s]

{'loss': 2.1212, 'grad_norm': 16.545194625854492, 'learning_rate': 1.3815028901734104e-05, 'epoch': 1.55}


 31%|███       | 1080/3460 [11:08<21:46,  1.82it/s]

{'loss': 2.1481, 'grad_norm': 18.384366989135742, 'learning_rate': 1.3757225433526014e-05, 'epoch': 1.56}


 32%|███▏      | 1090/3460 [11:14<21:33,  1.83it/s]

{'loss': 2.2235, 'grad_norm': 21.209270477294922, 'learning_rate': 1.369942196531792e-05, 'epoch': 1.58}


 32%|███▏      | 1100/3460 [11:19<21:17,  1.85it/s]

{'loss': 2.2191, 'grad_norm': 15.738536834716797, 'learning_rate': 1.3641618497109828e-05, 'epoch': 1.59}


 32%|███▏      | 1110/3460 [11:25<21:29,  1.82it/s]

{'loss': 2.1037, 'grad_norm': 18.810760498046875, 'learning_rate': 1.3583815028901735e-05, 'epoch': 1.6}


 32%|███▏      | 1120/3460 [11:30<21:18,  1.83it/s]

{'loss': 2.0817, 'grad_norm': 18.82208251953125, 'learning_rate': 1.3526011560693643e-05, 'epoch': 1.62}


 33%|███▎      | 1130/3460 [11:36<21:04,  1.84it/s]

{'loss': 2.284, 'grad_norm': 18.197620391845703, 'learning_rate': 1.346820809248555e-05, 'epoch': 1.63}


 33%|███▎      | 1140/3460 [11:41<21:01,  1.84it/s]

{'loss': 2.343, 'grad_norm': 22.938669204711914, 'learning_rate': 1.3410404624277457e-05, 'epoch': 1.65}


 33%|███▎      | 1150/3460 [11:47<21:28,  1.79it/s]

{'loss': 2.2314, 'grad_norm': 19.261863708496094, 'learning_rate': 1.3352601156069365e-05, 'epoch': 1.66}


 34%|███▎      | 1160/3460 [11:52<21:37,  1.77it/s]

{'loss': 2.2091, 'grad_norm': 15.395820617675781, 'learning_rate': 1.3294797687861272e-05, 'epoch': 1.68}


 34%|███▍      | 1170/3460 [11:58<21:29,  1.78it/s]

{'loss': 2.1725, 'grad_norm': 33.485870361328125, 'learning_rate': 1.3236994219653181e-05, 'epoch': 1.69}


 34%|███▍      | 1180/3460 [12:04<21:20,  1.78it/s]

{'loss': 2.2388, 'grad_norm': 17.114892959594727, 'learning_rate': 1.3179190751445088e-05, 'epoch': 1.71}


 34%|███▍      | 1190/3460 [12:09<21:25,  1.77it/s]

{'loss': 2.0669, 'grad_norm': 25.3808536529541, 'learning_rate': 1.3121387283236995e-05, 'epoch': 1.72}


 35%|███▍      | 1200/3460 [12:15<20:31,  1.84it/s]

{'loss': 2.2381, 'grad_norm': 16.451213836669922, 'learning_rate': 1.3063583815028902e-05, 'epoch': 1.73}


 35%|███▍      | 1210/3460 [12:20<20:37,  1.82it/s]

{'loss': 2.3057, 'grad_norm': 16.880373001098633, 'learning_rate': 1.300578034682081e-05, 'epoch': 1.75}


 35%|███▌      | 1220/3460 [12:26<20:25,  1.83it/s]

{'loss': 2.2339, 'grad_norm': 19.310869216918945, 'learning_rate': 1.2947976878612719e-05, 'epoch': 1.76}


 36%|███▌      | 1230/3460 [12:31<20:42,  1.79it/s]

{'loss': 2.2345, 'grad_norm': 17.432106018066406, 'learning_rate': 1.2890173410404626e-05, 'epoch': 1.78}


 36%|███▌      | 1240/3460 [12:37<20:50,  1.78it/s]

{'loss': 2.1891, 'grad_norm': 17.655485153198242, 'learning_rate': 1.2832369942196533e-05, 'epoch': 1.79}


 36%|███▌      | 1250/3460 [12:42<20:43,  1.78it/s]

{'loss': 2.2634, 'grad_norm': 15.42031478881836, 'learning_rate': 1.2774566473988441e-05, 'epoch': 1.81}


 36%|███▋      | 1260/3460 [12:48<20:35,  1.78it/s]

{'loss': 2.327, 'grad_norm': 19.084625244140625, 'learning_rate': 1.2716763005780348e-05, 'epoch': 1.82}


 37%|███▋      | 1270/3460 [12:54<19:55,  1.83it/s]

{'loss': 2.2504, 'grad_norm': 22.82892417907715, 'learning_rate': 1.2658959537572255e-05, 'epoch': 1.84}


 37%|███▋      | 1280/3460 [12:59<19:56,  1.82it/s]

{'loss': 2.2233, 'grad_norm': 26.76194190979004, 'learning_rate': 1.2601156069364162e-05, 'epoch': 1.85}


 37%|███▋      | 1290/3460 [13:05<19:40,  1.84it/s]

{'loss': 2.2125, 'grad_norm': 16.729652404785156, 'learning_rate': 1.2543352601156072e-05, 'epoch': 1.86}


 38%|███▊      | 1300/3460 [13:10<19:34,  1.84it/s]

{'loss': 2.206, 'grad_norm': 17.577768325805664, 'learning_rate': 1.2485549132947979e-05, 'epoch': 1.88}


 38%|███▊      | 1310/3460 [13:15<19:26,  1.84it/s]

{'loss': 2.067, 'grad_norm': 14.637669563293457, 'learning_rate': 1.2427745664739886e-05, 'epoch': 1.89}


 38%|███▊      | 1320/3460 [13:21<19:26,  1.83it/s]

{'loss': 2.2368, 'grad_norm': 18.60544776916504, 'learning_rate': 1.2369942196531793e-05, 'epoch': 1.91}


 38%|███▊      | 1330/3460 [13:26<19:09,  1.85it/s]

{'loss': 2.3483, 'grad_norm': 26.447925567626953, 'learning_rate': 1.2312138728323701e-05, 'epoch': 1.92}


 39%|███▊      | 1340/3460 [13:32<19:17,  1.83it/s]

{'loss': 2.1534, 'grad_norm': 15.388813018798828, 'learning_rate': 1.2254335260115608e-05, 'epoch': 1.94}


 39%|███▉      | 1350/3460 [13:37<19:03,  1.84it/s]

{'loss': 2.1001, 'grad_norm': 14.169343948364258, 'learning_rate': 1.2196531791907515e-05, 'epoch': 1.95}


 39%|███▉      | 1360/3460 [13:43<18:56,  1.85it/s]

{'loss': 2.2591, 'grad_norm': 18.86682891845703, 'learning_rate': 1.2138728323699422e-05, 'epoch': 1.97}


 40%|███▉      | 1370/3460 [13:48<18:49,  1.85it/s]

{'loss': 2.1015, 'grad_norm': 23.56951904296875, 'learning_rate': 1.2080924855491329e-05, 'epoch': 1.98}


 40%|███▉      | 1380/3460 [13:53<18:37,  1.86it/s]

{'loss': 2.2382, 'grad_norm': 26.275190353393555, 'learning_rate': 1.202312138728324e-05, 'epoch': 1.99}


 40%|████      | 1390/3460 [13:59<18:33,  1.86it/s]

{'loss': 2.1066, 'grad_norm': 20.78380584716797, 'learning_rate': 1.1965317919075146e-05, 'epoch': 2.01}


 40%|████      | 1400/3460 [14:04<18:26,  1.86it/s]

{'loss': 1.9696, 'grad_norm': 17.31015396118164, 'learning_rate': 1.1907514450867053e-05, 'epoch': 2.02}


 41%|████      | 1410/3460 [14:10<18:37,  1.83it/s]

{'loss': 2.0298, 'grad_norm': 13.344100952148438, 'learning_rate': 1.184971098265896e-05, 'epoch': 2.04}


 41%|████      | 1420/3460 [14:15<18:26,  1.84it/s]

{'loss': 2.0536, 'grad_norm': 12.65826416015625, 'learning_rate': 1.1791907514450869e-05, 'epoch': 2.05}


 41%|████▏     | 1430/3460 [14:20<18:22,  1.84it/s]

{'loss': 1.8694, 'grad_norm': 12.486936569213867, 'learning_rate': 1.1734104046242775e-05, 'epoch': 2.07}


 42%|████▏     | 1440/3460 [14:26<18:06,  1.86it/s]

{'loss': 2.1732, 'grad_norm': 23.229312896728516, 'learning_rate': 1.1676300578034682e-05, 'epoch': 2.08}


 42%|████▏     | 1450/3460 [14:31<18:15,  1.83it/s]

{'loss': 2.0082, 'grad_norm': 17.979894638061523, 'learning_rate': 1.161849710982659e-05, 'epoch': 2.1}


 42%|████▏     | 1460/3460 [14:37<18:02,  1.85it/s]

{'loss': 1.7807, 'grad_norm': 20.411788940429688, 'learning_rate': 1.15606936416185e-05, 'epoch': 2.11}


 42%|████▏     | 1470/3460 [14:42<17:53,  1.85it/s]

{'loss': 1.997, 'grad_norm': 20.30683135986328, 'learning_rate': 1.1502890173410406e-05, 'epoch': 2.12}


 43%|████▎     | 1480/3460 [14:47<17:49,  1.85it/s]

{'loss': 1.9317, 'grad_norm': 18.36056900024414, 'learning_rate': 1.1445086705202313e-05, 'epoch': 2.14}


 43%|████▎     | 1490/3460 [14:53<17:47,  1.85it/s]

{'loss': 1.8183, 'grad_norm': 15.834892272949219, 'learning_rate': 1.138728323699422e-05, 'epoch': 2.15}


 43%|████▎     | 1500/3460 [14:58<17:49,  1.83it/s]

{'loss': 2.1159, 'grad_norm': 17.980026245117188, 'learning_rate': 1.1329479768786129e-05, 'epoch': 2.17}


                                                   
 43%|████▎     | 1500/3460 [15:29<17:49,  1.83it/s]

{'eval_loss': 2.303861141204834, 'eval_accuracy': 0.37621973256234187, 'eval_runtime': 30.8982, 'eval_samples_per_second': 89.552, 'eval_steps_per_second': 5.599, 'epoch': 2.17}


 44%|████▎     | 1510/3460 [15:42<33:19,  1.03s/it]  

{'loss': 2.0515, 'grad_norm': 17.659761428833008, 'learning_rate': 1.1271676300578036e-05, 'epoch': 2.18}


 44%|████▍     | 1520/3460 [15:48<18:04,  1.79it/s]

{'loss': 1.9655, 'grad_norm': 17.125940322875977, 'learning_rate': 1.1213872832369943e-05, 'epoch': 2.2}


 44%|████▍     | 1530/3460 [15:53<17:32,  1.83it/s]

{'loss': 2.113, 'grad_norm': 24.928693771362305, 'learning_rate': 1.115606936416185e-05, 'epoch': 2.21}


 45%|████▍     | 1540/3460 [15:59<17:32,  1.82it/s]

{'loss': 2.0611, 'grad_norm': 18.789634704589844, 'learning_rate': 1.1098265895953756e-05, 'epoch': 2.23}


 45%|████▍     | 1550/3460 [16:04<17:17,  1.84it/s]

{'loss': 2.1609, 'grad_norm': 17.436243057250977, 'learning_rate': 1.1040462427745667e-05, 'epoch': 2.24}


 45%|████▌     | 1560/3460 [16:10<17:13,  1.84it/s]

{'loss': 2.0898, 'grad_norm': 16.178054809570312, 'learning_rate': 1.0982658959537573e-05, 'epoch': 2.25}


 45%|████▌     | 1570/3460 [16:15<16:57,  1.86it/s]

{'loss': 1.945, 'grad_norm': 14.211674690246582, 'learning_rate': 1.092485549132948e-05, 'epoch': 2.27}


 46%|████▌     | 1580/3460 [16:20<17:03,  1.84it/s]

{'loss': 2.0763, 'grad_norm': 19.296741485595703, 'learning_rate': 1.0867052023121387e-05, 'epoch': 2.28}


 46%|████▌     | 1590/3460 [16:26<16:46,  1.86it/s]

{'loss': 2.0196, 'grad_norm': 20.700061798095703, 'learning_rate': 1.0809248554913296e-05, 'epoch': 2.3}


 46%|████▌     | 1600/3460 [16:31<16:44,  1.85it/s]

{'loss': 1.9593, 'grad_norm': 20.196290969848633, 'learning_rate': 1.0751445086705203e-05, 'epoch': 2.31}


 47%|████▋     | 1610/3460 [16:37<17:10,  1.80it/s]

{'loss': 2.0891, 'grad_norm': 21.023637771606445, 'learning_rate': 1.069364161849711e-05, 'epoch': 2.33}


 47%|████▋     | 1620/3460 [16:42<17:11,  1.78it/s]

{'loss': 2.0326, 'grad_norm': 16.97892189025879, 'learning_rate': 1.0635838150289017e-05, 'epoch': 2.34}


 47%|████▋     | 1630/3460 [16:48<16:38,  1.83it/s]

{'loss': 1.7784, 'grad_norm': 20.25748634338379, 'learning_rate': 1.0578034682080927e-05, 'epoch': 2.36}


 47%|████▋     | 1640/3460 [16:53<16:29,  1.84it/s]

{'loss': 1.8807, 'grad_norm': 19.99138832092285, 'learning_rate': 1.0520231213872834e-05, 'epoch': 2.37}


 48%|████▊     | 1650/3460 [16:59<16:28,  1.83it/s]

{'loss': 2.0654, 'grad_norm': 16.90713882446289, 'learning_rate': 1.046242774566474e-05, 'epoch': 2.38}


 48%|████▊     | 1660/3460 [17:04<16:17,  1.84it/s]

{'loss': 1.8042, 'grad_norm': 32.277278900146484, 'learning_rate': 1.0404624277456647e-05, 'epoch': 2.4}


 48%|████▊     | 1670/3460 [17:10<16:12,  1.84it/s]

{'loss': 1.9035, 'grad_norm': 19.413497924804688, 'learning_rate': 1.0346820809248556e-05, 'epoch': 2.41}


 49%|████▊     | 1680/3460 [17:15<16:02,  1.85it/s]

{'loss': 1.949, 'grad_norm': 45.47880554199219, 'learning_rate': 1.0289017341040463e-05, 'epoch': 2.43}


 49%|████▉     | 1690/3460 [17:21<16:26,  1.79it/s]

{'loss': 1.9739, 'grad_norm': 17.424827575683594, 'learning_rate': 1.023121387283237e-05, 'epoch': 2.44}


 49%|████▉     | 1700/3460 [17:26<16:35,  1.77it/s]

{'loss': 1.9846, 'grad_norm': 17.66953468322754, 'learning_rate': 1.0173410404624278e-05, 'epoch': 2.46}


 49%|████▉     | 1710/3460 [17:32<16:01,  1.82it/s]

{'loss': 1.9433, 'grad_norm': 23.13129997253418, 'learning_rate': 1.0115606936416187e-05, 'epoch': 2.47}


 50%|████▉     | 1720/3460 [17:37<15:51,  1.83it/s]

{'loss': 1.8768, 'grad_norm': 19.5784969329834, 'learning_rate': 1.0057803468208094e-05, 'epoch': 2.49}


 50%|█████     | 1730/3460 [17:43<16:03,  1.80it/s]

{'loss': 1.9594, 'grad_norm': 20.062150955200195, 'learning_rate': 1e-05, 'epoch': 2.5}


 50%|█████     | 1740/3460 [17:48<15:55,  1.80it/s]

{'loss': 1.9036, 'grad_norm': 23.223142623901367, 'learning_rate': 9.942196531791908e-06, 'epoch': 2.51}


 51%|█████     | 1750/3460 [17:54<15:34,  1.83it/s]

{'loss': 2.0361, 'grad_norm': 23.06231689453125, 'learning_rate': 9.884393063583816e-06, 'epoch': 2.53}


 51%|█████     | 1760/3460 [17:59<15:15,  1.86it/s]

{'loss': 1.964, 'grad_norm': 15.173140525817871, 'learning_rate': 9.826589595375723e-06, 'epoch': 2.54}


 51%|█████     | 1770/3460 [18:05<15:17,  1.84it/s]

{'loss': 1.9527, 'grad_norm': 17.681764602661133, 'learning_rate': 9.768786127167632e-06, 'epoch': 2.56}


 51%|█████▏    | 1780/3460 [18:10<15:06,  1.85it/s]

{'loss': 2.0967, 'grad_norm': 28.2103214263916, 'learning_rate': 9.710982658959539e-06, 'epoch': 2.57}


 52%|█████▏    | 1790/3460 [18:16<15:04,  1.85it/s]

{'loss': 1.9379, 'grad_norm': 26.62898826599121, 'learning_rate': 9.653179190751446e-06, 'epoch': 2.59}


 52%|█████▏    | 1800/3460 [18:21<15:01,  1.84it/s]

{'loss': 2.1299, 'grad_norm': 20.30220603942871, 'learning_rate': 9.595375722543352e-06, 'epoch': 2.6}


 52%|█████▏    | 1810/3460 [18:27<14:52,  1.85it/s]

{'loss': 1.9213, 'grad_norm': 21.38950538635254, 'learning_rate': 9.537572254335261e-06, 'epoch': 2.62}


 53%|█████▎    | 1820/3460 [18:32<14:45,  1.85it/s]

{'loss': 1.935, 'grad_norm': 21.354084014892578, 'learning_rate': 9.479768786127168e-06, 'epoch': 2.63}


 53%|█████▎    | 1830/3460 [18:37<14:42,  1.85it/s]

{'loss': 1.9482, 'grad_norm': 26.12883186340332, 'learning_rate': 9.421965317919077e-06, 'epoch': 2.64}


 53%|█████▎    | 1840/3460 [18:43<14:58,  1.80it/s]

{'loss': 2.2185, 'grad_norm': 21.765579223632812, 'learning_rate': 9.364161849710983e-06, 'epoch': 2.66}


 53%|█████▎    | 1850/3460 [18:48<14:59,  1.79it/s]

{'loss': 2.0365, 'grad_norm': 26.46965789794922, 'learning_rate': 9.306358381502892e-06, 'epoch': 2.67}


 54%|█████▍    | 1860/3460 [18:54<14:32,  1.83it/s]

{'loss': 1.9393, 'grad_norm': 23.903345108032227, 'learning_rate': 9.248554913294799e-06, 'epoch': 2.69}


 54%|█████▍    | 1870/3460 [18:59<14:24,  1.84it/s]

{'loss': 2.0376, 'grad_norm': 15.973775863647461, 'learning_rate': 9.190751445086706e-06, 'epoch': 2.7}


 54%|█████▍    | 1880/3460 [19:05<14:09,  1.86it/s]

{'loss': 1.9674, 'grad_norm': 24.035837173461914, 'learning_rate': 9.132947976878613e-06, 'epoch': 2.72}


 55%|█████▍    | 1890/3460 [19:10<14:04,  1.86it/s]

{'loss': 2.0031, 'grad_norm': 22.104562759399414, 'learning_rate': 9.075144508670521e-06, 'epoch': 2.73}


 55%|█████▍    | 1900/3460 [19:16<13:57,  1.86it/s]

{'loss': 1.9989, 'grad_norm': 22.835247039794922, 'learning_rate': 9.017341040462428e-06, 'epoch': 2.75}


 55%|█████▌    | 1910/3460 [19:21<13:50,  1.87it/s]

{'loss': 2.0325, 'grad_norm': 34.17510986328125, 'learning_rate': 8.959537572254337e-06, 'epoch': 2.76}


 55%|█████▌    | 1920/3460 [19:26<13:45,  1.86it/s]

{'loss': 2.081, 'grad_norm': 26.636449813842773, 'learning_rate': 8.901734104046244e-06, 'epoch': 2.77}


 56%|█████▌    | 1930/3460 [19:32<13:48,  1.85it/s]

{'loss': 2.0472, 'grad_norm': 22.773962020874023, 'learning_rate': 8.84393063583815e-06, 'epoch': 2.79}


 56%|█████▌    | 1940/3460 [19:37<13:35,  1.86it/s]

{'loss': 2.0643, 'grad_norm': 20.517108917236328, 'learning_rate': 8.786127167630059e-06, 'epoch': 2.8}


 56%|█████▋    | 1950/3460 [19:43<13:34,  1.85it/s]

{'loss': 2.0415, 'grad_norm': 18.165651321411133, 'learning_rate': 8.728323699421966e-06, 'epoch': 2.82}


 57%|█████▋    | 1960/3460 [19:48<13:29,  1.85it/s]

{'loss': 1.9889, 'grad_norm': 24.37912940979004, 'learning_rate': 8.670520231213873e-06, 'epoch': 2.83}


 57%|█████▋    | 1970/3460 [19:53<13:52,  1.79it/s]

{'loss': 1.9665, 'grad_norm': 19.69339370727539, 'learning_rate': 8.61271676300578e-06, 'epoch': 2.85}


 57%|█████▋    | 1980/3460 [19:59<13:50,  1.78it/s]

{'loss': 1.945, 'grad_norm': 27.037227630615234, 'learning_rate': 8.554913294797688e-06, 'epoch': 2.86}


 58%|█████▊    | 1990/3460 [20:05<13:18,  1.84it/s]

{'loss': 1.9369, 'grad_norm': 18.606000900268555, 'learning_rate': 8.497109826589595e-06, 'epoch': 2.88}


 58%|█████▊    | 2000/3460 [20:10<13:07,  1.85it/s]

{'loss': 1.9461, 'grad_norm': 22.95879554748535, 'learning_rate': 8.439306358381504e-06, 'epoch': 2.89}


                                                   
 58%|█████▊    | 2000/3460 [20:40<13:07,  1.85it/s]

{'eval_loss': 2.2942044734954834, 'eval_accuracy': 0.37621973256234187, 'eval_runtime': 30.1377, 'eval_samples_per_second': 91.812, 'eval_steps_per_second': 5.74, 'epoch': 2.89}


 58%|█████▊    | 2010/3460 [20:52<23:42,  1.02it/s]  

{'loss': 1.9679, 'grad_norm': 20.32745361328125, 'learning_rate': 8.38150289017341e-06, 'epoch': 2.9}


 58%|█████▊    | 2020/3460 [20:57<13:15,  1.81it/s]

{'loss': 1.9228, 'grad_norm': 27.827232360839844, 'learning_rate': 8.32369942196532e-06, 'epoch': 2.92}


 59%|█████▊    | 2030/3460 [21:03<12:58,  1.84it/s]

{'loss': 1.921, 'grad_norm': 22.09320068359375, 'learning_rate': 8.265895953757226e-06, 'epoch': 2.93}


 59%|█████▉    | 2040/3460 [21:08<12:48,  1.85it/s]

{'loss': 1.8843, 'grad_norm': 33.78139877319336, 'learning_rate': 8.208092485549133e-06, 'epoch': 2.95}


 59%|█████▉    | 2050/3460 [21:13<12:40,  1.85it/s]

{'loss': 1.9011, 'grad_norm': 28.853126525878906, 'learning_rate': 8.150289017341042e-06, 'epoch': 2.96}


 60%|█████▉    | 2060/3460 [21:19<12:35,  1.85it/s]

{'loss': 2.0328, 'grad_norm': 24.730791091918945, 'learning_rate': 8.092485549132949e-06, 'epoch': 2.98}


 60%|█████▉    | 2070/3460 [21:24<13:01,  1.78it/s]

{'loss': 2.0747, 'grad_norm': 20.292800903320312, 'learning_rate': 8.034682080924856e-06, 'epoch': 2.99}


 60%|██████    | 2080/3460 [21:30<12:33,  1.83it/s]

{'loss': 1.9155, 'grad_norm': 23.048843383789062, 'learning_rate': 7.976878612716764e-06, 'epoch': 3.01}


 60%|██████    | 2090/3460 [21:35<12:23,  1.84it/s]

{'loss': 1.7357, 'grad_norm': 16.040895462036133, 'learning_rate': 7.919075144508671e-06, 'epoch': 3.02}


 61%|██████    | 2100/3460 [21:41<12:15,  1.85it/s]

{'loss': 1.737, 'grad_norm': 23.85694122314453, 'learning_rate': 7.86127167630058e-06, 'epoch': 3.03}


 61%|██████    | 2110/3460 [21:46<12:36,  1.78it/s]

{'loss': 1.8517, 'grad_norm': 23.081693649291992, 'learning_rate': 7.803468208092486e-06, 'epoch': 3.05}


 61%|██████▏   | 2120/3460 [21:52<12:42,  1.76it/s]

{'loss': 1.7552, 'grad_norm': 16.171419143676758, 'learning_rate': 7.745664739884393e-06, 'epoch': 3.06}


 62%|██████▏   | 2130/3460 [21:58<12:28,  1.78it/s]

{'loss': 1.9049, 'grad_norm': 27.822263717651367, 'learning_rate': 7.687861271676302e-06, 'epoch': 3.08}


 62%|██████▏   | 2140/3460 [22:03<12:40,  1.74it/s]

{'loss': 1.9045, 'grad_norm': 20.850706100463867, 'learning_rate': 7.630057803468209e-06, 'epoch': 3.09}


 62%|██████▏   | 2150/3460 [22:09<12:02,  1.81it/s]

{'loss': 1.8116, 'grad_norm': 14.537873268127441, 'learning_rate': 7.5722543352601166e-06, 'epoch': 3.11}


 62%|██████▏   | 2160/3460 [22:14<11:45,  1.84it/s]

{'loss': 1.7383, 'grad_norm': 17.95880889892578, 'learning_rate': 7.5144508670520235e-06, 'epoch': 3.12}


 63%|██████▎   | 2170/3460 [22:20<11:51,  1.81it/s]

{'loss': 1.7335, 'grad_norm': 21.2801570892334, 'learning_rate': 7.456647398843931e-06, 'epoch': 3.14}


 63%|██████▎   | 2180/3460 [22:25<11:36,  1.84it/s]

{'loss': 1.5967, 'grad_norm': 14.118510246276855, 'learning_rate': 7.398843930635838e-06, 'epoch': 3.15}


 63%|██████▎   | 2190/3460 [22:31<11:32,  1.83it/s]

{'loss': 1.8872, 'grad_norm': 26.035184860229492, 'learning_rate': 7.341040462427747e-06, 'epoch': 3.16}


 64%|██████▎   | 2200/3460 [22:36<11:30,  1.82it/s]

{'loss': 1.8107, 'grad_norm': 18.840726852416992, 'learning_rate': 7.283236994219654e-06, 'epoch': 3.18}


 64%|██████▍   | 2210/3460 [22:42<11:14,  1.85it/s]

{'loss': 1.8028, 'grad_norm': 23.60321807861328, 'learning_rate': 7.225433526011561e-06, 'epoch': 3.19}


 64%|██████▍   | 2220/3460 [22:47<11:12,  1.84it/s]

{'loss': 1.638, 'grad_norm': 22.909143447875977, 'learning_rate': 7.167630057803468e-06, 'epoch': 3.21}


 64%|██████▍   | 2230/3460 [22:53<11:26,  1.79it/s]

{'loss': 1.708, 'grad_norm': 23.340646743774414, 'learning_rate': 7.109826589595377e-06, 'epoch': 3.22}


 65%|██████▍   | 2240/3460 [22:58<11:23,  1.79it/s]

{'loss': 1.7517, 'grad_norm': 18.336084365844727, 'learning_rate': 7.052023121387284e-06, 'epoch': 3.24}


 65%|██████▌   | 2250/3460 [23:04<11:18,  1.78it/s]

{'loss': 1.9255, 'grad_norm': 24.790376663208008, 'learning_rate': 6.9942196531791914e-06, 'epoch': 3.25}


 65%|██████▌   | 2260/3460 [23:10<11:07,  1.80it/s]

{'loss': 1.4899, 'grad_norm': 16.67789077758789, 'learning_rate': 6.936416184971098e-06, 'epoch': 3.27}


 66%|██████▌   | 2270/3460 [23:15<10:47,  1.84it/s]

{'loss': 1.9844, 'grad_norm': 37.25217819213867, 'learning_rate': 6.878612716763007e-06, 'epoch': 3.28}


 66%|██████▌   | 2280/3460 [23:21<10:40,  1.84it/s]

{'loss': 1.7736, 'grad_norm': 20.704978942871094, 'learning_rate': 6.820809248554914e-06, 'epoch': 3.29}


 66%|██████▌   | 2290/3460 [23:26<10:34,  1.84it/s]

{'loss': 1.5501, 'grad_norm': 21.78131103515625, 'learning_rate': 6.7630057803468215e-06, 'epoch': 3.31}


 66%|██████▋   | 2300/3460 [23:31<10:28,  1.85it/s]

{'loss': 1.702, 'grad_norm': 18.38640785217285, 'learning_rate': 6.7052023121387284e-06, 'epoch': 3.32}


 67%|██████▋   | 2310/3460 [23:37<10:24,  1.84it/s]

{'loss': 1.7762, 'grad_norm': 22.290794372558594, 'learning_rate': 6.647398843930636e-06, 'epoch': 3.34}


 67%|██████▋   | 2320/3460 [23:42<10:18,  1.84it/s]

{'loss': 1.7052, 'grad_norm': 21.77907943725586, 'learning_rate': 6.589595375722544e-06, 'epoch': 3.35}


 67%|██████▋   | 2330/3460 [23:48<10:09,  1.85it/s]

{'loss': 1.9326, 'grad_norm': 24.754234313964844, 'learning_rate': 6.531791907514451e-06, 'epoch': 3.37}


 68%|██████▊   | 2340/3460 [23:53<10:05,  1.85it/s]

{'loss': 1.9782, 'grad_norm': 17.278348922729492, 'learning_rate': 6.473988439306359e-06, 'epoch': 3.38}


 68%|██████▊   | 2350/3460 [23:59<09:56,  1.86it/s]

{'loss': 1.7206, 'grad_norm': 25.66120719909668, 'learning_rate': 6.416184971098266e-06, 'epoch': 3.4}


 68%|██████▊   | 2360/3460 [24:04<09:56,  1.84it/s]

{'loss': 1.7272, 'grad_norm': 22.339157104492188, 'learning_rate': 6.358381502890174e-06, 'epoch': 3.41}


 68%|██████▊   | 2370/3460 [24:09<09:52,  1.84it/s]

{'loss': 2.0135, 'grad_norm': 16.133604049682617, 'learning_rate': 6.300578034682081e-06, 'epoch': 3.42}


 69%|██████▉   | 2380/3460 [24:15<09:43,  1.85it/s]

{'loss': 1.807, 'grad_norm': 17.722238540649414, 'learning_rate': 6.2427745664739895e-06, 'epoch': 3.44}


 69%|██████▉   | 2390/3460 [24:20<09:41,  1.84it/s]

{'loss': 1.6822, 'grad_norm': 25.03858757019043, 'learning_rate': 6.184971098265896e-06, 'epoch': 3.45}


 69%|██████▉   | 2400/3460 [24:26<09:36,  1.84it/s]

{'loss': 1.6723, 'grad_norm': 27.046934127807617, 'learning_rate': 6.127167630057804e-06, 'epoch': 3.47}


 70%|██████▉   | 2410/3460 [24:31<09:32,  1.83it/s]

{'loss': 1.8536, 'grad_norm': 27.38645362854004, 'learning_rate': 6.069364161849711e-06, 'epoch': 3.48}


 70%|██████▉   | 2420/3460 [24:37<09:23,  1.85it/s]

{'loss': 1.9522, 'grad_norm': 22.54572105407715, 'learning_rate': 6.01156069364162e-06, 'epoch': 3.5}


 70%|███████   | 2430/3460 [24:42<09:19,  1.84it/s]

{'loss': 1.8162, 'grad_norm': 23.780120849609375, 'learning_rate': 5.9537572254335265e-06, 'epoch': 3.51}


 71%|███████   | 2440/3460 [24:47<09:14,  1.84it/s]

{'loss': 1.6582, 'grad_norm': 29.05414581298828, 'learning_rate': 5.895953757225434e-06, 'epoch': 3.53}


 71%|███████   | 2450/3460 [24:53<09:08,  1.84it/s]

{'loss': 1.7641, 'grad_norm': 26.35134506225586, 'learning_rate': 5.838150289017341e-06, 'epoch': 3.54}


 71%|███████   | 2460/3460 [24:58<09:03,  1.84it/s]

{'loss': 1.8182, 'grad_norm': 22.983768463134766, 'learning_rate': 5.78034682080925e-06, 'epoch': 3.55}


 71%|███████▏  | 2470/3460 [25:04<08:55,  1.85it/s]

{'loss': 1.9939, 'grad_norm': 23.07670021057129, 'learning_rate': 5.722543352601157e-06, 'epoch': 3.57}


 72%|███████▏  | 2480/3460 [25:09<08:49,  1.85it/s]

{'loss': 1.9543, 'grad_norm': 19.195920944213867, 'learning_rate': 5.664739884393064e-06, 'epoch': 3.58}


 72%|███████▏  | 2490/3460 [25:15<08:46,  1.84it/s]

{'loss': 1.8821, 'grad_norm': 33.678794860839844, 'learning_rate': 5.606936416184971e-06, 'epoch': 3.6}


 72%|███████▏  | 2500/3460 [25:20<08:39,  1.85it/s]

{'loss': 1.8164, 'grad_norm': 20.90106201171875, 'learning_rate': 5.549132947976878e-06, 'epoch': 3.61}


                                                   
 72%|███████▏  | 2500/3460 [25:50<08:39,  1.85it/s]

{'eval_loss': 2.3366458415985107, 'eval_accuracy': 0.37368991687748465, 'eval_runtime': 30.2366, 'eval_samples_per_second': 91.512, 'eval_steps_per_second': 5.722, 'epoch': 3.61}


 72%|███████▏  | 2500/3460 [25:58<09:58,  1.60it/s]


{'train_runtime': 1558.4113, 'train_samples_per_second': 35.501, 'train_steps_per_second': 2.22, 'train_loss': 2.2469180938720705, 'epoch': 3.61}


100%|██████████| 173/173 [00:30<00:00,  5.74it/s]

Test results: {'eval_loss': 2.303861141204834, 'eval_accuracy': 0.37621973256234187, 'eval_runtime': 30.3332, 'eval_samples_per_second': 91.22, 'eval_steps_per_second': 5.703, 'epoch': 3.61271676300578}





In [6]:
import torch
import gc

# Svuota la cache GPU
torch.cuda.empty_cache()

# Forza la garbage collection
gc.collect()

# (opzionale) Stampa lo stato della memoria GPU
print(torch.cuda.memory_summary(device='cuda', abbreviated=True))



|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   1448 MiB |   2700 MiB |  13253 GiB |  13252 GiB |
|---------------------------------------------------------------------------|
| Active memory         |   1448 MiB |   2700 MiB |  13253 GiB |  13252 GiB |
|---------------------------------------------------------------------------|
| Requested memory      |   1442 MiB |   2690 MiB |  13178 GiB |  13177 GiB |
|---------------------------------------------------------------------------|
| GPU reserved memory   |   1612 MiB |   3166 MiB |   3166 MiB |   1554 MiB |
|---------------------------------------------------------------

In [5]:
del model
del optimizer
del train_loader
del val_loader
torch.cuda.empty_cache()


NameError: name 'optimizer' is not defined

# Prova 2

In [1]:
import pandas as pd
import torch
import re
import random
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import DebertaTokenizer, DebertaForSequenceClassification
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding, EarlyStoppingCallback
from torch.utils.data import Dataset
import evaluate
import wandb

# Inizializzazione W&B (modifica 'il_tuo_progetto' e 'il_tuo_esperimento')
wandb.init(project="il_tuo_progetto", name="il_tuo_esperimento")

# Impostazione seed per riproducibilità
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

known_emotions_list = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
    'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
    'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
    'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

# Carica i file CSV (adatta i nomi dei file se necessario)
file_paths = [
    '../data/geoemotions/goemotions_1.csv',
    '../data/geoemotions/goemotions_2.csv',
    '../data/geoemotions/goemotions_3.csv'
]

dataframes = [pd.read_csv(file) for file in file_paths]
combined_data = pd.concat(dataframes, ignore_index=True)

emotion_columns = [col for col in combined_data.columns if col in known_emotions_list]
if not emotion_columns:
    raise ValueError("Nessuna colonna di emozione trovata nel dataset.")

combined_data['label'] = combined_data[emotion_columns].idxmax(axis=1)
combined_data = combined_data[combined_data['text'].astype(str).str.strip().astype(bool)]
combined_data = combined_data[combined_data[emotion_columns].sum(axis=1) > 0]

unique_labels = combined_data['label'].unique()
label_mapping = {label: i for i, label in enumerate(unique_labels)}
combined_data['label'] = combined_data['label'].map(label_mapping)

def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-z0-9\s\.,!?;]', '', text)
    text = text.strip()
    return text

combined_data['cleaned_text'] = combined_data['text'].apply(preprocess_text)

# Bilanciamento per classi
class_counts = combined_data['label'].value_counts()
min_class_count = class_counts.min()
balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(
    balanced_data['cleaned_text'],
    balanced_data['label'],
    test_size=0.2,
    random_state=SEED,
    stratify=balanced_data['label']
)

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts.reset_index(drop=True)
        self.labels = labels.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts.iloc[index]
        label = self.labels.iloc[index]

        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")
train_dataset = EmotionDataset(X_train, y_train, tokenizer, max_len=128)
test_dataset = EmotionDataset(X_test, y_test, tokenizer, max_len=128)




  from .autonotebook import tqdm as notebook_tqdm





[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mnicologiuse2003[0m ([33mnicologiuse2003-ironhack[0m). Use [1m`wandb login --relogin`[0m to force relogin


  balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)


In [2]:
model = DebertaForSequenceClassification.from_pretrained(
    "microsoft/deberta-base",
    num_labels=len(label_mapping)
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions=predictions, references=labels)

# Parametri di training aggiornati
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=10,              # più epoche per migliorare l'apprendimento
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",     # valuta alla fine di ogni epoca
    save_strategy="epoch",           # salva il modello migliore ad ogni epoca
    logging_dir="./logs",
    logging_steps=50,
    seed=SEED,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    learning_rate=2e-5,
    lr_scheduler_type="linear",      # scheduler lineare per il learning rate
    report_to="wandb",               # log su Weights & Biases
    fp16=True                        # mixed precision per training più veloce e stabile
)

# Early Stopping con più pazienza
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=5,
    early_stopping_threshold=0.0
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

trainer.train()
eval_results = trainer.evaluate()
print("Test results:", eval_results)

Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  1%|          | 50/6920 [00:35<1:18:37,  1.46it/s]

{'loss': 3.3387, 'grad_norm': 1.451082706451416, 'learning_rate': 1.985549132947977e-05, 'epoch': 0.07}


  1%|▏         | 100/6920 [01:09<1:16:51,  1.48it/s]

{'loss': 3.2845, 'grad_norm': 4.820042133331299, 'learning_rate': 1.9713872832369944e-05, 'epoch': 0.14}


  2%|▏         | 150/6920 [01:43<1:17:57,  1.45it/s]

{'loss': 3.1257, 'grad_norm': 7.830952167510986, 'learning_rate': 1.9569364161849715e-05, 'epoch': 0.22}


  3%|▎         | 200/6920 [02:18<1:17:19,  1.45it/s]

{'loss': 2.9307, 'grad_norm': 7.302014350891113, 'learning_rate': 1.9424855491329482e-05, 'epoch': 0.29}


  4%|▎         | 250/6920 [02:52<1:16:47,  1.45it/s]

{'loss': 2.7599, 'grad_norm': 12.068093299865723, 'learning_rate': 1.9283236994219654e-05, 'epoch': 0.36}


  4%|▍         | 300/6920 [03:27<1:16:00,  1.45it/s]

{'loss': 2.7039, 'grad_norm': 8.328497886657715, 'learning_rate': 1.913872832369942e-05, 'epoch': 0.43}


  5%|▌         | 350/6920 [04:01<1:15:35,  1.45it/s]

{'loss': 2.6392, 'grad_norm': 8.464312553405762, 'learning_rate': 1.8994219653179192e-05, 'epoch': 0.51}


  6%|▌         | 400/6920 [04:36<1:15:06,  1.45it/s]

{'loss': 2.5605, 'grad_norm': 7.901641368865967, 'learning_rate': 1.8849710982658962e-05, 'epoch': 0.58}


  7%|▋         | 450/6920 [05:10<1:14:42,  1.44it/s]

{'loss': 2.539, 'grad_norm': 15.042487144470215, 'learning_rate': 1.870520231213873e-05, 'epoch': 0.65}


  7%|▋         | 500/6920 [05:45<1:14:10,  1.44it/s]

{'loss': 2.5234, 'grad_norm': 13.701692581176758, 'learning_rate': 1.85606936416185e-05, 'epoch': 0.72}


  8%|▊         | 550/6920 [06:19<1:12:38,  1.46it/s]

{'loss': 2.4761, 'grad_norm': 7.684752941131592, 'learning_rate': 1.8416184971098267e-05, 'epoch': 0.79}


  9%|▊         | 600/6920 [06:54<1:12:26,  1.45it/s]

{'loss': 2.5033, 'grad_norm': 11.972770690917969, 'learning_rate': 1.8271676300578035e-05, 'epoch': 0.87}


  9%|▉         | 650/6920 [07:28<1:11:54,  1.45it/s]

{'loss': 2.4222, 'grad_norm': 10.55102825164795, 'learning_rate': 1.8127167630057805e-05, 'epoch': 0.94}


 10%|█         | 692/6920 [07:57<1:04:20,  1.61it/s]
 10%|█         | 692/6920 [08:32<1:04:20,  1.61it/s]

{'eval_loss': 2.3630948066711426, 'eval_accuracy': 0.35128297795446334, 'eval_runtime': 35.0032, 'eval_samples_per_second': 79.05, 'eval_steps_per_second': 4.942, 'epoch': 1.0}


 10%|█         | 700/6920 [08:46<3:01:28,  1.75s/it] 

{'loss': 2.402, 'grad_norm': 10.985922813415527, 'learning_rate': 1.7982658959537573e-05, 'epoch': 1.01}


 11%|█         | 750/6920 [09:20<1:10:34,  1.46it/s]

{'loss': 2.208, 'grad_norm': 10.408838272094727, 'learning_rate': 1.7838150289017343e-05, 'epoch': 1.08}


 12%|█▏        | 800/6920 [09:54<1:10:31,  1.45it/s]

{'loss': 2.2071, 'grad_norm': 10.200215339660645, 'learning_rate': 1.769364161849711e-05, 'epoch': 1.16}


 12%|█▏        | 850/6920 [10:29<1:10:08,  1.44it/s]

{'loss': 2.146, 'grad_norm': 13.116727828979492, 'learning_rate': 1.7549132947976878e-05, 'epoch': 1.23}


 13%|█▎        | 900/6920 [11:04<1:09:16,  1.45it/s]

{'loss': 2.1372, 'grad_norm': 14.997278213500977, 'learning_rate': 1.7404624277456648e-05, 'epoch': 1.3}


 14%|█▎        | 950/6920 [11:39<1:11:34,  1.39it/s]

{'loss': 2.2262, 'grad_norm': 11.535496711730957, 'learning_rate': 1.726011560693642e-05, 'epoch': 1.37}


 14%|█▍        | 1000/6920 [12:15<1:10:06,  1.41it/s]

{'loss': 2.153, 'grad_norm': 18.252574920654297, 'learning_rate': 1.7115606936416186e-05, 'epoch': 1.45}


 15%|█▌        | 1050/6920 [12:49<1:06:58,  1.46it/s]

{'loss': 2.2073, 'grad_norm': 14.021491050720215, 'learning_rate': 1.6971098265895957e-05, 'epoch': 1.52}


 16%|█▌        | 1100/6920 [13:23<1:06:17,  1.46it/s]

{'loss': 2.1176, 'grad_norm': 11.871573448181152, 'learning_rate': 1.6826589595375724e-05, 'epoch': 1.59}


 17%|█▋        | 1150/6920 [13:57<1:05:47,  1.46it/s]

{'loss': 2.1902, 'grad_norm': 11.419403076171875, 'learning_rate': 1.668208092485549e-05, 'epoch': 1.66}


 17%|█▋        | 1200/6920 [14:33<1:06:45,  1.43it/s]

{'loss': 2.1358, 'grad_norm': 11.700373649597168, 'learning_rate': 1.6537572254335262e-05, 'epoch': 1.73}


 18%|█▊        | 1250/6920 [15:07<1:04:50,  1.46it/s]

{'loss': 2.1769, 'grad_norm': 13.600149154663086, 'learning_rate': 1.639306358381503e-05, 'epoch': 1.81}


 19%|█▉        | 1300/6920 [15:42<1:04:30,  1.45it/s]

{'loss': 2.2308, 'grad_norm': 12.231348037719727, 'learning_rate': 1.62485549132948e-05, 'epoch': 1.88}


 20%|█▉        | 1350/6920 [16:17<1:03:46,  1.46it/s]

{'loss': 2.1744, 'grad_norm': 14.795382499694824, 'learning_rate': 1.6104046242774567e-05, 'epoch': 1.95}


 20%|██        | 1384/6920 [16:39<56:10,  1.64it/s]  
 20%|██        | 1384/6920 [17:14<56:10,  1.64it/s]

{'eval_loss': 2.3015594482421875, 'eval_accuracy': 0.37188290567401516, 'eval_runtime': 34.6854, 'eval_samples_per_second': 79.774, 'eval_steps_per_second': 4.988, 'epoch': 2.0}


 20%|██        | 1400/6920 [17:41<1:09:22,  1.33it/s] 

{'loss': 2.0722, 'grad_norm': 12.135458946228027, 'learning_rate': 1.5959537572254334e-05, 'epoch': 2.02}


 21%|██        | 1450/6920 [18:15<1:04:19,  1.42it/s]

{'loss': 1.9556, 'grad_norm': 15.173900604248047, 'learning_rate': 1.5815028901734105e-05, 'epoch': 2.1}


 22%|██▏       | 1500/6920 [18:50<1:03:05,  1.43it/s]

{'loss': 1.8296, 'grad_norm': 13.361488342285156, 'learning_rate': 1.5670520231213875e-05, 'epoch': 2.17}


 22%|██▏       | 1550/6920 [19:24<1:00:51,  1.47it/s]

{'loss': 1.9426, 'grad_norm': 11.589937210083008, 'learning_rate': 1.5526011560693643e-05, 'epoch': 2.24}


 23%|██▎       | 1600/6920 [19:58<1:00:56,  1.45it/s]

{'loss': 1.8791, 'grad_norm': 14.243181228637695, 'learning_rate': 1.5381502890173413e-05, 'epoch': 2.31}


 24%|██▍       | 1650/6920 [20:33<1:00:29,  1.45it/s]

{'loss': 1.8386, 'grad_norm': 13.977599143981934, 'learning_rate': 1.523699421965318e-05, 'epoch': 2.38}


 25%|██▍       | 1700/6920 [21:07<57:44,  1.51it/s]  

{'loss': 1.8352, 'grad_norm': 15.794451713562012, 'learning_rate': 1.509248554913295e-05, 'epoch': 2.46}


 25%|██▌       | 1750/6920 [21:41<58:03,  1.48it/s]

{'loss': 1.8568, 'grad_norm': 14.06245231628418, 'learning_rate': 1.4947976878612718e-05, 'epoch': 2.53}


 26%|██▌       | 1800/6920 [22:14<56:34,  1.51it/s]

{'loss': 1.9116, 'grad_norm': 11.887608528137207, 'learning_rate': 1.4803468208092486e-05, 'epoch': 2.6}


 27%|██▋       | 1850/6920 [22:47<55:44,  1.52it/s]

{'loss': 1.9258, 'grad_norm': 18.707050323486328, 'learning_rate': 1.4658959537572254e-05, 'epoch': 2.67}


 27%|██▋       | 1900/6920 [23:20<55:08,  1.52it/s]

{'loss': 1.8939, 'grad_norm': 16.807144165039062, 'learning_rate': 1.4514450867052023e-05, 'epoch': 2.75}


 28%|██▊       | 1950/6920 [23:53<54:39,  1.52it/s]

{'loss': 1.9911, 'grad_norm': 13.296162605285645, 'learning_rate': 1.4369942196531792e-05, 'epoch': 2.82}


 29%|██▉       | 2000/6920 [24:28<55:43,  1.47it/s]

{'loss': 1.9042, 'grad_norm': 13.924515724182129, 'learning_rate': 1.4225433526011563e-05, 'epoch': 2.89}


 30%|██▉       | 2050/6920 [25:02<55:27,  1.46it/s]

{'loss': 1.799, 'grad_norm': 21.772132873535156, 'learning_rate': 1.4080924855491332e-05, 'epoch': 2.96}


                                                   
 30%|███       | 2076/6920 [25:54<49:20,  1.64it/s]

{'eval_loss': 2.3393869400024414, 'eval_accuracy': 0.36754607878568846, 'eval_runtime': 34.7388, 'eval_samples_per_second': 79.652, 'eval_steps_per_second': 4.98, 'epoch': 3.0}


 30%|███       | 2100/6920 [26:21<54:53,  1.46it/s]   

{'loss': 1.7443, 'grad_norm': 14.563915252685547, 'learning_rate': 1.39364161849711e-05, 'epoch': 3.03}


 31%|███       | 2150/6920 [26:55<54:24,  1.46it/s]

{'loss': 1.6656, 'grad_norm': 11.546416282653809, 'learning_rate': 1.3791907514450868e-05, 'epoch': 3.11}


 32%|███▏      | 2200/6920 [27:29<53:43,  1.46it/s]

{'loss': 1.5883, 'grad_norm': 13.332844734191895, 'learning_rate': 1.3647398843930637e-05, 'epoch': 3.18}


 33%|███▎      | 2250/6920 [28:03<53:11,  1.46it/s]

{'loss': 1.5816, 'grad_norm': 21.127763748168945, 'learning_rate': 1.3502890173410406e-05, 'epoch': 3.25}


 33%|███▎      | 2300/6920 [28:37<52:30,  1.47it/s]

{'loss': 1.5451, 'grad_norm': 14.851216316223145, 'learning_rate': 1.3358381502890175e-05, 'epoch': 3.32}


 34%|███▍      | 2350/6920 [29:11<51:42,  1.47it/s]

{'loss': 1.6565, 'grad_norm': 17.112810134887695, 'learning_rate': 1.3213872832369942e-05, 'epoch': 3.4}


 35%|███▍      | 2400/6920 [29:45<51:23,  1.47it/s]

{'loss': 1.5957, 'grad_norm': 17.68636703491211, 'learning_rate': 1.3069364161849711e-05, 'epoch': 3.47}


 35%|███▌      | 2450/6920 [30:19<50:56,  1.46it/s]

{'loss': 1.626, 'grad_norm': 16.86660385131836, 'learning_rate': 1.292485549132948e-05, 'epoch': 3.54}


 36%|███▌      | 2500/6920 [30:54<50:18,  1.46it/s]

{'loss': 1.7219, 'grad_norm': 31.05234718322754, 'learning_rate': 1.278034682080925e-05, 'epoch': 3.61}


 37%|███▋      | 2550/6920 [31:28<49:47,  1.46it/s]

{'loss': 1.6257, 'grad_norm': 22.097810745239258, 'learning_rate': 1.263583815028902e-05, 'epoch': 3.68}


 38%|███▊      | 2600/6920 [32:02<50:54,  1.41it/s]

{'loss': 1.6022, 'grad_norm': 21.08141326904297, 'learning_rate': 1.2491329479768788e-05, 'epoch': 3.76}


 38%|███▊      | 2650/6920 [32:36<48:54,  1.46it/s]

{'loss': 1.6149, 'grad_norm': 16.8810977935791, 'learning_rate': 1.2346820809248557e-05, 'epoch': 3.83}


 39%|███▉      | 2700/6920 [33:11<48:29,  1.45it/s]

{'loss': 1.6805, 'grad_norm': 14.865697860717773, 'learning_rate': 1.2202312138728324e-05, 'epoch': 3.9}


 40%|███▉      | 2750/6920 [33:45<47:35,  1.46it/s]

{'loss': 1.6514, 'grad_norm': 16.678699493408203, 'learning_rate': 1.2057803468208093e-05, 'epoch': 3.97}


                                                   
 40%|████      | 2768/6920 [34:32<42:11,  1.64it/s]

{'eval_loss': 2.444145679473877, 'eval_accuracy': 0.36501626310083124, 'eval_runtime': 34.9702, 'eval_samples_per_second': 79.124, 'eval_steps_per_second': 4.947, 'epoch': 4.0}


 40%|████      | 2800/6920 [35:06<48:19,  1.42it/s]   

{'loss': 1.4741, 'grad_norm': 17.386316299438477, 'learning_rate': 1.1916184971098265e-05, 'epoch': 4.05}


 41%|████      | 2850/6920 [35:41<46:42,  1.45it/s]

{'loss': 1.3286, 'grad_norm': 15.003543853759766, 'learning_rate': 1.1771676300578036e-05, 'epoch': 4.12}


 42%|████▏     | 2900/6920 [36:15<45:49,  1.46it/s]

{'loss': 1.3234, 'grad_norm': 16.77688980102539, 'learning_rate': 1.1627167630057805e-05, 'epoch': 4.19}


 43%|████▎     | 2950/6920 [36:49<46:38,  1.42it/s]

{'loss': 1.3881, 'grad_norm': 15.55764389038086, 'learning_rate': 1.1482658959537574e-05, 'epoch': 4.26}


 43%|████▎     | 3000/6920 [37:24<44:36,  1.46it/s]

{'loss': 1.3944, 'grad_norm': 20.297773361206055, 'learning_rate': 1.1338150289017343e-05, 'epoch': 4.34}


 44%|████▍     | 3050/6920 [37:58<44:16,  1.46it/s]

{'loss': 1.3513, 'grad_norm': 16.479354858398438, 'learning_rate': 1.1193641618497112e-05, 'epoch': 4.41}


 45%|████▍     | 3100/6920 [38:32<43:48,  1.45it/s]

{'loss': 1.296, 'grad_norm': 20.61298370361328, 'learning_rate': 1.1049132947976879e-05, 'epoch': 4.48}


 46%|████▌     | 3150/6920 [39:07<42:48,  1.47it/s]

{'loss': 1.3801, 'grad_norm': 21.906747817993164, 'learning_rate': 1.0904624277456648e-05, 'epoch': 4.55}


 46%|████▌     | 3200/6920 [39:41<42:33,  1.46it/s]

{'loss': 1.4375, 'grad_norm': 19.14889144897461, 'learning_rate': 1.0760115606936417e-05, 'epoch': 4.62}


 47%|████▋     | 3250/6920 [40:15<41:48,  1.46it/s]

{'loss': 1.4433, 'grad_norm': 18.921998977661133, 'learning_rate': 1.0615606936416186e-05, 'epoch': 4.7}


 48%|████▊     | 3300/6920 [40:50<41:33,  1.45it/s]

{'loss': 1.3454, 'grad_norm': 16.762168884277344, 'learning_rate': 1.0471098265895953e-05, 'epoch': 4.77}


 48%|████▊     | 3350/6920 [41:24<40:54,  1.45it/s]

{'loss': 1.3587, 'grad_norm': 20.746992111206055, 'learning_rate': 1.0326589595375725e-05, 'epoch': 4.84}


 49%|████▉     | 3400/6920 [41:59<41:34,  1.41it/s]

{'loss': 1.4021, 'grad_norm': 24.452632904052734, 'learning_rate': 1.0182080924855492e-05, 'epoch': 4.91}


 50%|████▉     | 3450/6920 [42:34<39:19,  1.47it/s]

{'loss': 1.4092, 'grad_norm': 20.525039672851562, 'learning_rate': 1.0037572254335261e-05, 'epoch': 4.99}


                                                   
 50%|█████     | 3460/6920 [43:15<35:07,  1.64it/s]

{'eval_loss': 2.551812171936035, 'eval_accuracy': 0.3570654138055656, 'eval_runtime': 34.5671, 'eval_samples_per_second': 80.047, 'eval_steps_per_second': 5.005, 'epoch': 5.0}


 51%|█████     | 3500/6920 [43:47<40:42,  1.40it/s]   

{'loss': 1.2278, 'grad_norm': 16.45635414123535, 'learning_rate': 9.89306358381503e-06, 'epoch': 5.06}


 51%|█████▏    | 3550/6920 [44:21<38:16,  1.47it/s]

{'loss': 1.1433, 'grad_norm': 15.995635986328125, 'learning_rate': 9.7485549132948e-06, 'epoch': 5.13}


 52%|█████▏    | 3600/6920 [44:55<37:20,  1.48it/s]

{'loss': 1.1809, 'grad_norm': 20.14066505432129, 'learning_rate': 9.604046242774568e-06, 'epoch': 5.2}


 53%|█████▎    | 3650/6920 [45:30<38:26,  1.42it/s]

{'loss': 1.1589, 'grad_norm': 18.979835510253906, 'learning_rate': 9.459537572254335e-06, 'epoch': 5.27}


 53%|█████▎    | 3700/6920 [46:04<36:45,  1.46it/s]

{'loss': 1.2091, 'grad_norm': 20.516311645507812, 'learning_rate': 9.315028901734104e-06, 'epoch': 5.35}


 54%|█████▍    | 3750/6920 [46:38<35:47,  1.48it/s]

{'loss': 1.1743, 'grad_norm': 21.215770721435547, 'learning_rate': 9.170520231213875e-06, 'epoch': 5.42}


 55%|█████▍    | 3800/6920 [47:12<35:42,  1.46it/s]

{'loss': 1.1479, 'grad_norm': 18.464242935180664, 'learning_rate': 9.026011560693642e-06, 'epoch': 5.49}


 56%|█████▌    | 3850/6920 [47:47<35:09,  1.46it/s]

{'loss': 1.1284, 'grad_norm': 15.851239204406738, 'learning_rate': 8.881502890173411e-06, 'epoch': 5.56}


 56%|█████▋    | 3900/6920 [48:21<34:25,  1.46it/s]

{'loss': 1.1908, 'grad_norm': 21.315574645996094, 'learning_rate': 8.73699421965318e-06, 'epoch': 5.64}


 57%|█████▋    | 3950/6920 [48:56<33:55,  1.46it/s]

{'loss': 1.1859, 'grad_norm': 24.59602165222168, 'learning_rate': 8.592485549132949e-06, 'epoch': 5.71}


 58%|█████▊    | 4000/6920 [49:30<33:22,  1.46it/s]

{'loss': 1.1385, 'grad_norm': 23.792627334594727, 'learning_rate': 8.447976878612716e-06, 'epoch': 5.78}


 59%|█████▊    | 4050/6920 [50:04<32:48,  1.46it/s]

{'loss': 1.1862, 'grad_norm': 22.34578514099121, 'learning_rate': 8.303468208092487e-06, 'epoch': 5.85}


 59%|█████▉    | 4100/6920 [50:39<32:17,  1.46it/s]

{'loss': 1.1698, 'grad_norm': 19.471426010131836, 'learning_rate': 8.158959537572256e-06, 'epoch': 5.92}


 60%|█████▉    | 4150/6920 [51:13<31:45,  1.45it/s]

{'loss': 1.1512, 'grad_norm': 21.960599899291992, 'learning_rate': 8.014450867052025e-06, 'epoch': 6.0}


                                                   
 60%|██████    | 4152/6920 [51:49<28:23,  1.62it/s]

{'eval_loss': 2.705454111099243, 'eval_accuracy': 0.35561980484279004, 'eval_runtime': 34.946, 'eval_samples_per_second': 79.179, 'eval_steps_per_second': 4.95, 'epoch': 6.0}


 61%|██████    | 4200/6920 [52:28<31:14,  1.45it/s]  

{'loss': 0.9636, 'grad_norm': 15.101083755493164, 'learning_rate': 7.869942196531792e-06, 'epoch': 6.07}


 61%|██████▏   | 4250/6920 [53:03<30:33,  1.46it/s]

{'loss': 0.9273, 'grad_norm': 29.77641487121582, 'learning_rate': 7.72543352601156e-06, 'epoch': 6.14}


 62%|██████▏   | 4300/6920 [53:37<29:49,  1.46it/s]

{'loss': 0.9567, 'grad_norm': 19.710357666015625, 'learning_rate': 7.5809248554913305e-06, 'epoch': 6.21}


 63%|██████▎   | 4350/6920 [54:11<29:26,  1.45it/s]

{'loss': 0.9349, 'grad_norm': 20.56467056274414, 'learning_rate': 7.4364161849710995e-06, 'epoch': 6.29}


 64%|██████▎   | 4400/6920 [54:45<28:36,  1.47it/s]

{'loss': 0.9436, 'grad_norm': 16.596372604370117, 'learning_rate': 7.2919075144508675e-06, 'epoch': 6.36}


 64%|██████▍   | 4450/6920 [55:19<28:07,  1.46it/s]

{'loss': 0.965, 'grad_norm': 18.655872344970703, 'learning_rate': 7.1473988439306365e-06, 'epoch': 6.43}


 65%|██████▌   | 4500/6920 [55:54<26:56,  1.50it/s]

{'loss': 0.9481, 'grad_norm': 14.904879570007324, 'learning_rate': 7.00578034682081e-06, 'epoch': 6.5}


 66%|██████▌   | 4550/6920 [56:28<26:59,  1.46it/s]

{'loss': 1.0817, 'grad_norm': 21.258853912353516, 'learning_rate': 6.861271676300578e-06, 'epoch': 6.58}


 66%|██████▋   | 4600/6920 [57:02<26:27,  1.46it/s]

{'loss': 0.9888, 'grad_norm': 25.060781478881836, 'learning_rate': 6.716763005780348e-06, 'epoch': 6.65}


 67%|██████▋   | 4650/6920 [57:36<25:46,  1.47it/s]

{'loss': 0.9838, 'grad_norm': 22.188556671142578, 'learning_rate': 6.572254335260117e-06, 'epoch': 6.72}


 68%|██████▊   | 4700/6920 [58:10<25:08,  1.47it/s]

{'loss': 0.9986, 'grad_norm': 19.861909866333008, 'learning_rate': 6.427745664739885e-06, 'epoch': 6.79}


 69%|██████▊   | 4750/6920 [58:44<24:42,  1.46it/s]

{'loss': 1.0214, 'grad_norm': 25.569841384887695, 'learning_rate': 6.283236994219654e-06, 'epoch': 6.86}


 69%|██████▉   | 4800/6920 [59:19<24:14,  1.46it/s]

{'loss': 0.996, 'grad_norm': 22.823057174682617, 'learning_rate': 6.138728323699422e-06, 'epoch': 6.94}


                                                   
 70%|███████   | 4844/6920 [1:00:23<21:07,  1.64it/s]

{'eval_loss': 2.838918447494507, 'eval_accuracy': 0.3509215757137694, 'eval_runtime': 34.6308, 'eval_samples_per_second': 79.9, 'eval_steps_per_second': 4.996, 'epoch': 7.0}


 70%|███████   | 4844/6920 [1:00:30<25:55,  1.33it/s]


{'train_runtime': 3630.5141, 'train_samples_per_second': 30.478, 'train_steps_per_second': 1.906, 'train_loss': 1.706913396014742, 'epoch': 7.0}


100%|██████████| 173/173 [00:34<00:00,  5.00it/s]

Test results: {'eval_loss': 2.3015594482421875, 'eval_accuracy': 0.37188290567401516, 'eval_runtime': 34.8221, 'eval_samples_per_second': 79.461, 'eval_steps_per_second': 4.968, 'epoch': 7.0}





In [3]:
# Supponendo di aver già definito:
# test_dataset = EmotionDataset(X_test, y_test, tokenizer, max_len=128)

# Se hai utilizzato Trainer per l’addestramento, puoi facilmente eseguire:
test_results = trainer.evaluate(eval_dataset=test_dataset)
print("Test results:", test_results)


100%|██████████| 173/173 [00:34<00:00,  5.01it/s]

Test results: {'eval_loss': 2.3015594482421875, 'eval_accuracy': 0.37188290567401516, 'eval_runtime': 34.7901, 'eval_samples_per_second': 79.534, 'eval_steps_per_second': 4.973, 'epoch': 7.0}





In [4]:
predictions = trainer.predict(test_dataset)
preds = predictions.predictions
labels = predictions.label_ids
metrics = predictions.metrics

print("Metrics on test set:", metrics)


100%|██████████| 173/173 [00:34<00:00,  4.99it/s]

Metrics on test set: {'test_loss': 2.3015594482421875, 'test_accuracy': 0.37188290567401516, 'test_runtime': 34.8764, 'test_samples_per_second': 79.337, 'test_steps_per_second': 4.96}





In [5]:
predictions = trainer.predict(test_dataset)
print(predictions.metrics)

preds = np.argmax(predictions.predictions, axis=1)
labels = predictions.label_ids
# Ora puoi confrontare preds e labels


100%|██████████| 173/173 [00:34<00:00,  5.00it/s]

{'test_loss': 2.3015594482421875, 'test_accuracy': 0.37188290567401516, 'test_runtime': 34.855, 'test_samples_per_second': 79.386, 'test_steps_per_second': 4.963}





In [6]:
import numpy as np

logits = predictions.predictions
preds = np.argmax(logits, axis=1)  # Indici delle classi predette
labels = predictions.label_ids      # Etichette reali


In [7]:
for i in range(10):  # primi 10 esempi
    print(f"Example {i}: Prediction = {preds[i]}, Label = {labels[i]}")


Example 0: Prediction = 16, Label = 19
Example 1: Prediction = 14, Label = 27
Example 2: Prediction = 19, Label = 27
Example 3: Prediction = 7, Label = 7
Example 4: Prediction = 20, Label = 5
Example 5: Prediction = 5, Label = 1
Example 6: Prediction = 14, Label = 14
Example 7: Prediction = 0, Label = 0
Example 8: Prediction = 21, Label = 21
Example 9: Prediction = 12, Label = 9


In [8]:
from scipy.special import expit  # funzione sigmoid

probs = expit(logits)  # se i logit sono raw
predicted_labels = [np.where(probs[i] > 0.5)[0] for i in range(len(probs))]


In [9]:
import evaluate

f1_metric = evaluate.load("f1")

# Calcolo dell'F1 macro
f1_macro = f1_metric.compute(predictions=preds, references=labels, average="macro")
print("F1 Macro:", f1_macro['f1'])

# Calcolo dell'F1 weighted
f1_weighted = f1_metric.compute(predictions=preds, references=labels, average="weighted")
print("F1 Weighted:", f1_weighted['f1'])


F1 Macro: 0.35506979467585403
F1 Weighted: 0.35521537526526514


In [2]:
import torchaudio
from torchaudio.backend.utils import get_audio_backend

# Mostra il backend audio attualmente in uso
print("Audio Backend:", get_audio_backend())

# Imposta il backend su 'ffmpeg' se disponibile
try:
    torchaudio.backend.utils.set_audio_backend("ffmpeg")
    print("FFmpeg configurato correttamente.")
except Exception as e:
    print("Errore durante la configurazione di FFmpeg:", e)



ModuleNotFoundError: No module named 'torchaudio.backend.utils'

In [11]:
import os
os.environ["PATH"] += os.pathsep + r"D:\Programmi\ffmpeg-master-latest-win64-gpl\bin"


In [12]:
import torchaudio

try:
    torchaudio.set_audio_backend("ffmpeg")
    print("Audio Backend impostato su FFmpeg.")
except Exception as e:
    print("Errore durante l'impostazione del backend FFmpeg:", e)


Audio Backend impostato su FFmpeg.


  torchaudio.set_audio_backend("ffmpeg")


In [13]:

file_path = "merry-christmas-to-all-to-all-a-good-night-santa-voice-david-h-m-lambert-1-1-00-06.mp3"  # Sostituisci con il percorso di un file audio
try:
    waveform, sample_rate = torchaudio.load(file_path)
    print("Caricamento audio riuscito!")
    print("Forma del waveform:", waveform.shape)
    print("Frequenza di campionamento:", sample_rate)
except Exception as e:
    print("Errore durante il caricamento del file audio:", e)


Errore durante il caricamento del file audio: Couldn't find appropriate backend to handle uri merry-christmas-to-all-to-all-a-good-night-santa-voice-david-h-m-lambert-1-1-00-06.mp3 and format None.


In [6]:
import torchaudio

# Mostra il backend audio attualmente configurato
print("Audio Backend:", torchaudio.get_audio_backend())


Audio Backend: None


  print("Audio Backend:", torchaudio.get_audio_backend())


# Prova 3

In [1]:
import pandas as pd
import torch
import re
import random
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import (AutoTokenizer, DebertaForSequenceClassification,
                          Trainer, TrainingArguments, DataCollatorWithPadding, 
                          EarlyStoppingCallback)
from torch.utils.data import Dataset
import evaluate
import wandb

wandb.init(project="il_tuo_progetto", name="deberta_base_simplified")

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

known_emotions_list = [
    'admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity',
    'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
    'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
    'relief', 'remorse', 'sadness', 'surprise', 'neutral'
]

file_paths = [
    '../data/geoemotions/goemotions_1.csv',
    '../data/geoemotions/goemotions_2.csv',
    '../data/geoemotions/goemotions_3.csv'
]

dataframes = [pd.read_csv(file) for file in file_paths]
combined_data = pd.concat(dataframes, ignore_index=True)

emotion_columns = [col for col in combined_data.columns if col in known_emotions_list]
if not emotion_columns:
    raise ValueError("Nessuna colonna di emozione trovata nel dataset.")

combined_data['label'] = combined_data[emotion_columns].idxmax(axis=1)
combined_data = combined_data[combined_data['text'].astype(str).str.strip().astype(bool)]
combined_data = combined_data[combined_data[emotion_columns].sum(axis=1) > 0]

unique_labels = combined_data['label'].unique()
label_mapping = {label: i for i, label in enumerate(unique_labels)}
combined_data['label'] = combined_data['label'].map(label_mapping)

def preprocess_text(text):
    text = str(text).lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-z0-9\s\.,!?;]', '', text)
    text = text.strip()
    return text

combined_data['cleaned_text'] = combined_data['text'].apply(preprocess_text)

class_counts = combined_data['label'].value_counts()
min_class_count = class_counts.min()
balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(
    balanced_data['cleaned_text'],
    balanced_data['label'],
    test_size=0.2,
    random_state=SEED,
    stratify=balanced_data['label']
)

class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts.reset_index(drop=True)
        self.labels = labels.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts.iloc[index]
        label = self.labels.iloc[index]

        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-base")
train_dataset = EmotionDataset(X_train, y_train, tokenizer, max_len=128)
test_dataset = EmotionDataset(X_test, y_test, tokenizer, max_len=128)

num_labels = len(label_mapping)
class_counts = y_train.value_counts()
total = len(y_train)
class_weights = [total/class_counts[i] for i in range(len(class_counts))]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

model = DebertaForSequenceClassification.from_pretrained(
    "microsoft/deberta-base",
    num_labels=num_labels
).to(device)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    acc = accuracy_metric.compute(predictions=preds, references=labels)
    f1_macro = f1_metric.compute(predictions=preds, references=labels, average="macro")
    f1_weighted = f1_metric.compute(predictions=preds, references=labels, average="weighted")
    return {
        "accuracy": acc["accuracy"],
        "f1_macro": f1_macro["f1"],
        "f1_weighted": f1_weighted["f1"]
    }

from transformers.trainer import Trainer

class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        outputs = model(**{k: v for k, v in inputs.items() if k != "labels"})
        logits = outputs.get("logits")
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=50,
    seed=SEED,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    greater_is_better=True,
    learning_rate=2e-5,
    lr_scheduler_type="linear",
    report_to="wandb",
    fp16=True
)

early_stopping = EarlyStoppingCallback(
    early_stopping_patience=3,
    early_stopping_threshold=0.0
)

trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

trainer.train()
eval_results = trainer.evaluate()
print("Test results:", eval_results)


  from .autonotebook import tqdm as notebook_tqdm





[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mnicologiuse2003[0m ([33mnicologiuse2003-ironhack[0m). Use [1m`wandb login --relogin`[0m to force relogin


  balanced_data = combined_data.groupby('label').apply(lambda x: x.sample(n=min_class_count, random_state=SEED)).reset_index(drop=True)
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = WeightedTrainer(
  1%|▏         | 50/3460 [00:35<40:08,  1.42it/s]

{'loss': 3.3387, 'grad_norm': 1.450717568397522, 'learning_rate': 1.971098265895954e-05, 'epoch': 0.07}


  3%|▎         | 100/3460 [01:10<38:10,  1.47it/s]

{'loss': 3.2886, 'grad_norm': 6.55365514755249, 'learning_rate': 1.9433526011560696e-05, 'epoch': 0.14}


  4%|▍         | 150/3460 [01:44<37:05,  1.49it/s]

{'loss': 3.325, 'grad_norm': 18.818981170654297, 'learning_rate': 1.9150289017341043e-05, 'epoch': 0.22}


  6%|▌         | 200/3460 [02:18<37:09,  1.46it/s]

{'loss': 3.3045, 'grad_norm': 17.545177459716797, 'learning_rate': 1.8861271676300578e-05, 'epoch': 0.29}


  7%|▋         | 250/3460 [02:52<36:25,  1.47it/s]

{'loss': 3.1584, 'grad_norm': 18.08842658996582, 'learning_rate': 1.857225433526012e-05, 'epoch': 0.36}


  9%|▊         | 300/3460 [03:26<35:56,  1.47it/s]

{'loss': 3.0049, 'grad_norm': 13.16912841796875, 'learning_rate': 1.8283236994219657e-05, 'epoch': 0.43}


 10%|█         | 350/3460 [04:00<35:23,  1.46it/s]

{'loss': 2.8589, 'grad_norm': 7.619739055633545, 'learning_rate': 1.799421965317919e-05, 'epoch': 0.51}


 12%|█▏        | 400/3460 [04:35<35:04,  1.45it/s]

{'loss': 2.7605, 'grad_norm': 7.953896522521973, 'learning_rate': 1.770520231213873e-05, 'epoch': 0.58}


 13%|█▎        | 450/3460 [05:09<34:14,  1.47it/s]

{'loss': 2.725, 'grad_norm': 13.338898658752441, 'learning_rate': 1.7416184971098267e-05, 'epoch': 0.65}


 14%|█▍        | 500/3460 [05:43<33:49,  1.46it/s]

{'loss': 2.675, 'grad_norm': 59.058292388916016, 'learning_rate': 1.7127167630057805e-05, 'epoch': 0.72}


 16%|█▌        | 550/3460 [06:18<33:14,  1.46it/s]

{'loss': 2.5874, 'grad_norm': 10.148837089538574, 'learning_rate': 1.6838150289017343e-05, 'epoch': 0.79}


 17%|█▋        | 600/3460 [06:52<32:21,  1.47it/s]

{'loss': 2.5861, 'grad_norm': 14.536416053771973, 'learning_rate': 1.654913294797688e-05, 'epoch': 0.87}


 19%|█▉        | 650/3460 [07:26<32:12,  1.45it/s]

{'loss': 2.5139, 'grad_norm': 12.318488121032715, 'learning_rate': 1.6260115606936418e-05, 'epoch': 0.94}


                                                  
 20%|██        | 692/3460 [08:29<27:58,  1.65it/s]

{'eval_loss': 2.4374170303344727, 'eval_accuracy': 0.32453921214311526, 'eval_f1_macro': 0.28910300829922725, 'eval_f1_weighted': 0.28925316168742693, 'eval_runtime': 34.3267, 'eval_samples_per_second': 80.608, 'eval_steps_per_second': 5.04, 'epoch': 1.0}


 20%|██        | 700/3460 [08:40<1:17:03,  1.68s/it]

{'loss': 2.462, 'grad_norm': 9.157601356506348, 'learning_rate': 1.5971098265895956e-05, 'epoch': 1.01}


 22%|██▏       | 750/3460 [09:14<30:40,  1.47it/s]  

{'loss': 2.3188, 'grad_norm': 12.530547142028809, 'learning_rate': 1.568208092485549e-05, 'epoch': 1.08}


 23%|██▎       | 800/3460 [09:48<30:16,  1.46it/s]

{'loss': 2.3456, 'grad_norm': 9.7249755859375, 'learning_rate': 1.5393063583815032e-05, 'epoch': 1.16}


 25%|██▍       | 850/3460 [10:23<29:40,  1.47it/s]

{'loss': 2.2715, 'grad_norm': 12.946310043334961, 'learning_rate': 1.5104046242774568e-05, 'epoch': 1.23}


 26%|██▌       | 900/3460 [10:57<29:03,  1.47it/s]

{'loss': 2.2537, 'grad_norm': 15.853528022766113, 'learning_rate': 1.4815028901734106e-05, 'epoch': 1.3}


 27%|██▋       | 950/3460 [11:31<28:30,  1.47it/s]

{'loss': 2.3322, 'grad_norm': 13.851468086242676, 'learning_rate': 1.4531791907514451e-05, 'epoch': 1.37}


 29%|██▉       | 1000/3460 [12:05<27:52,  1.47it/s]

{'loss': 2.2644, 'grad_norm': 16.244157791137695, 'learning_rate': 1.4242774566473989e-05, 'epoch': 1.45}


 30%|███       | 1050/3460 [12:39<27:24,  1.47it/s]

{'loss': 2.323, 'grad_norm': 12.786933898925781, 'learning_rate': 1.3953757225433527e-05, 'epoch': 1.52}


 32%|███▏      | 1100/3460 [13:13<26:49,  1.47it/s]

{'loss': 2.2342, 'grad_norm': 15.881603240966797, 'learning_rate': 1.3664739884393065e-05, 'epoch': 1.59}


 33%|███▎      | 1150/3460 [13:47<26:15,  1.47it/s]

{'loss': 2.2741, 'grad_norm': 14.45245361328125, 'learning_rate': 1.3375722543352603e-05, 'epoch': 1.66}


 35%|███▍      | 1200/3460 [14:21<25:36,  1.47it/s]

{'loss': 2.1974, 'grad_norm': 11.042255401611328, 'learning_rate': 1.308670520231214e-05, 'epoch': 1.73}


 36%|███▌      | 1250/3460 [14:56<24:55,  1.48it/s]

{'loss': 2.2804, 'grad_norm': 17.229629516601562, 'learning_rate': 1.2797687861271677e-05, 'epoch': 1.81}


 38%|███▊      | 1300/3460 [15:30<24:40,  1.46it/s]

{'loss': 2.3322, 'grad_norm': 31.28887939453125, 'learning_rate': 1.2508670520231215e-05, 'epoch': 1.88}


 39%|███▉      | 1350/3460 [16:04<23:46,  1.48it/s]

{'loss': 2.2686, 'grad_norm': 21.09587287902832, 'learning_rate': 1.221965317919075e-05, 'epoch': 1.95}


                                                   
 40%|████      | 1384/3460 [17:01<21:08,  1.64it/s]

{'eval_loss': 2.3274827003479004, 'eval_accuracy': 0.35887242500903505, 'eval_f1_macro': 0.33709439385291967, 'eval_f1_weighted': 0.3372302122928686, 'eval_runtime': 34.5681, 'eval_samples_per_second': 80.045, 'eval_steps_per_second': 5.005, 'epoch': 2.0}


 40%|████      | 1400/3460 [17:18<25:28,  1.35it/s]  

{'loss': 2.1605, 'grad_norm': 11.396627426147461, 'learning_rate': 1.193063583815029e-05, 'epoch': 2.02}


 42%|████▏     | 1450/3460 [17:52<22:53,  1.46it/s]

{'loss': 2.0632, 'grad_norm': 137.0626983642578, 'learning_rate': 1.1641618497109828e-05, 'epoch': 2.1}


 43%|████▎     | 1500/3460 [18:27<22:20,  1.46it/s]

{'loss': 1.9619, 'grad_norm': 16.434045791625977, 'learning_rate': 1.1352601156069364e-05, 'epoch': 2.17}


 45%|████▍     | 1550/3460 [19:01<21:46,  1.46it/s]

{'loss': 2.109, 'grad_norm': 64.51547241210938, 'learning_rate': 1.1063583815028902e-05, 'epoch': 2.24}


 46%|████▌     | 1600/3460 [19:35<21:07,  1.47it/s]

{'loss': 2.0048, 'grad_norm': 18.765867233276367, 'learning_rate': 1.078034682080925e-05, 'epoch': 2.31}


 48%|████▊     | 1650/3460 [20:09<20:51,  1.45it/s]

{'loss': 1.9761, 'grad_norm': 11.250224113464355, 'learning_rate': 1.0491329479768785e-05, 'epoch': 2.38}


 49%|████▉     | 1700/3460 [20:43<20:07,  1.46it/s]

{'loss': 1.9405, 'grad_norm': 14.939823150634766, 'learning_rate': 1.0202312138728325e-05, 'epoch': 2.46}


 51%|█████     | 1750/3460 [21:18<19:29,  1.46it/s]

{'loss': 1.9917, 'grad_norm': 16.2454776763916, 'learning_rate': 9.913294797687863e-06, 'epoch': 2.53}


 52%|█████▏    | 1800/3460 [21:52<18:53,  1.46it/s]

{'loss': 2.0324, 'grad_norm': 15.493701934814453, 'learning_rate': 9.624277456647399e-06, 'epoch': 2.6}


 53%|█████▎    | 1850/3460 [22:26<18:15,  1.47it/s]

{'loss': 2.0338, 'grad_norm': 16.104549407958984, 'learning_rate': 9.335260115606937e-06, 'epoch': 2.67}


 55%|█████▍    | 1900/3460 [23:00<17:36,  1.48it/s]

{'loss': 2.0301, 'grad_norm': 18.03075408935547, 'learning_rate': 9.052023121387284e-06, 'epoch': 2.75}


 56%|█████▋    | 1950/3460 [23:34<17:11,  1.46it/s]

{'loss': 2.0862, 'grad_norm': 16.614795684814453, 'learning_rate': 8.763005780346822e-06, 'epoch': 2.82}


 58%|█████▊    | 2000/3460 [24:09<16:37,  1.46it/s]

{'loss': 1.9836, 'grad_norm': 14.113754272460938, 'learning_rate': 8.473988439306358e-06, 'epoch': 2.89}


 59%|█████▉    | 2050/3460 [24:43<16:04,  1.46it/s]

{'loss': 1.8961, 'grad_norm': 22.28241729736328, 'learning_rate': 8.184971098265898e-06, 'epoch': 2.96}


                                                   
 60%|██████    | 2076/3460 [25:35<14:04,  1.64it/s]

{'eval_loss': 2.3318047523498535, 'eval_accuracy': 0.3614022406938923, 'eval_f1_macro': 0.3486000326465891, 'eval_f1_weighted': 0.3487169786211432, 'eval_runtime': 34.5683, 'eval_samples_per_second': 80.044, 'eval_steps_per_second': 5.005, 'epoch': 3.0}


 61%|██████    | 2100/3460 [25:57<15:41,  1.44it/s]  

{'loss': 1.8691, 'grad_norm': 16.830041885375977, 'learning_rate': 7.895953757225434e-06, 'epoch': 3.03}


 62%|██████▏   | 2150/3460 [26:32<14:49,  1.47it/s]

{'loss': 1.8227, 'grad_norm': 9.325413703918457, 'learning_rate': 7.606936416184972e-06, 'epoch': 3.11}


 64%|██████▎   | 2200/3460 [27:06<14:19,  1.47it/s]

{'loss': 1.7666, 'grad_norm': 15.703383445739746, 'learning_rate': 7.3179190751445094e-06, 'epoch': 3.18}


 65%|██████▌   | 2250/3460 [27:40<13:44,  1.47it/s]

{'loss': 1.749, 'grad_norm': 16.50896644592285, 'learning_rate': 7.0289017341040464e-06, 'epoch': 3.25}


 66%|██████▋   | 2300/3460 [28:14<13:10,  1.47it/s]

{'loss': 1.7565, 'grad_norm': 15.915733337402344, 'learning_rate': 6.739884393063585e-06, 'epoch': 3.32}


 68%|██████▊   | 2350/3460 [28:48<12:42,  1.46it/s]

{'loss': 1.7974, 'grad_norm': 13.494584083557129, 'learning_rate': 6.450867052023122e-06, 'epoch': 3.4}


 69%|██████▉   | 2400/3460 [29:22<12:04,  1.46it/s]

{'loss': 1.8026, 'grad_norm': 24.79961395263672, 'learning_rate': 6.161849710982659e-06, 'epoch': 3.47}


 71%|███████   | 2450/3460 [29:56<11:25,  1.47it/s]

{'loss': 1.8064, 'grad_norm': 12.594547271728516, 'learning_rate': 5.872832369942197e-06, 'epoch': 3.54}


 72%|███████▏  | 2500/3460 [30:31<10:56,  1.46it/s]

{'loss': 1.9012, 'grad_norm': 13.898002624511719, 'learning_rate': 5.583815028901735e-06, 'epoch': 3.61}


 74%|███████▎  | 2550/3460 [31:05<10:23,  1.46it/s]

{'loss': 1.8073, 'grad_norm': 13.404322624206543, 'learning_rate': 5.294797687861272e-06, 'epoch': 3.68}


 75%|███████▌  | 2600/3460 [31:39<09:55,  1.44it/s]

{'loss': 1.7743, 'grad_norm': 20.75869369506836, 'learning_rate': 5.00578034682081e-06, 'epoch': 3.76}


 77%|███████▋  | 2650/3460 [32:14<09:15,  1.46it/s]

{'loss': 1.7841, 'grad_norm': 19.158510208129883, 'learning_rate': 4.716763005780347e-06, 'epoch': 3.83}


 78%|███████▊  | 2700/3460 [32:48<08:36,  1.47it/s]

{'loss': 1.8387, 'grad_norm': 12.680228233337402, 'learning_rate': 4.427745664739885e-06, 'epoch': 3.9}


 79%|███████▉  | 2750/3460 [33:23<08:07,  1.46it/s]

{'loss': 1.8422, 'grad_norm': 13.687055587768555, 'learning_rate': 4.1387283236994224e-06, 'epoch': 3.97}


                                                   
 80%|████████  | 2768/3460 [34:10<07:03,  1.63it/s]

{'eval_loss': 2.3598270416259766, 'eval_accuracy': 0.36067943621250453, 'eval_f1_macro': 0.34963552693097316, 'eval_f1_weighted': 0.3497298435669147, 'eval_runtime': 34.7032, 'eval_samples_per_second': 79.733, 'eval_steps_per_second': 4.985, 'epoch': 4.0}


 81%|████████  | 2800/3460 [34:35<07:30,  1.47it/s]  

{'loss': 1.7132, 'grad_norm': 11.675568580627441, 'learning_rate': 3.8497109826589594e-06, 'epoch': 4.05}


 82%|████████▏ | 2850/3460 [35:09<06:54,  1.47it/s]

{'loss': 1.6589, 'grad_norm': 13.923113822937012, 'learning_rate': 3.5606936416184973e-06, 'epoch': 4.12}


 84%|████████▍ | 2900/3460 [35:44<06:23,  1.46it/s]

{'loss': 1.6101, 'grad_norm': 17.2692813873291, 'learning_rate': 3.271676300578035e-06, 'epoch': 4.19}


 85%|████████▌ | 2950/3460 [36:18<05:50,  1.46it/s]

{'loss': 1.6713, 'grad_norm': 15.294452667236328, 'learning_rate': 2.9826589595375726e-06, 'epoch': 4.26}


 87%|████████▋ | 3000/3460 [36:52<05:17,  1.45it/s]

{'loss': 1.6776, 'grad_norm': 14.46310806274414, 'learning_rate': 2.69364161849711e-06, 'epoch': 4.34}


 88%|████████▊ | 3050/3460 [37:27<04:41,  1.46it/s]

{'loss': 1.6467, 'grad_norm': 15.49555492401123, 'learning_rate': 2.4046242774566474e-06, 'epoch': 4.41}


 90%|████████▉ | 3100/3460 [38:01<04:09,  1.45it/s]

{'loss': 1.5763, 'grad_norm': 13.8775053024292, 'learning_rate': 2.1156069364161853e-06, 'epoch': 4.48}


 91%|█████████ | 3150/3460 [38:35<03:32,  1.46it/s]

{'loss': 1.6346, 'grad_norm': 16.78697967529297, 'learning_rate': 1.8265895953757227e-06, 'epoch': 4.55}


 92%|█████████▏| 3200/3460 [39:10<02:58,  1.46it/s]

{'loss': 1.6759, 'grad_norm': 19.975406646728516, 'learning_rate': 1.5375722543352603e-06, 'epoch': 4.62}


 94%|█████████▍| 3250/3460 [39:44<02:23,  1.46it/s]

{'loss': 1.6994, 'grad_norm': 18.992202758789062, 'learning_rate': 1.2485549132947978e-06, 'epoch': 4.7}


 95%|█████████▌| 3300/3460 [40:18<01:49,  1.46it/s]

{'loss': 1.5954, 'grad_norm': 15.62440013885498, 'learning_rate': 9.595375722543352e-07, 'epoch': 4.77}


 97%|█████████▋| 3350/3460 [40:52<01:14,  1.47it/s]

{'loss': 1.6473, 'grad_norm': 15.309535026550293, 'learning_rate': 6.705202312138728e-07, 'epoch': 4.84}


 98%|█████████▊| 3400/3460 [41:27<00:41,  1.46it/s]

{'loss': 1.6002, 'grad_norm': 25.686981201171875, 'learning_rate': 3.8150289017341043e-07, 'epoch': 4.91}


100%|█████████▉| 3450/3460 [42:01<00:06,  1.46it/s]

{'loss': 1.6319, 'grad_norm': 19.03200340270996, 'learning_rate': 9.248554913294799e-08, 'epoch': 4.99}


                                                   
100%|██████████| 3460/3460 [42:47<00:00,  1.61it/s]

{'eval_loss': 2.377781391143799, 'eval_accuracy': 0.36393205637874954, 'eval_f1_macro': 0.35319960084597957, 'eval_f1_weighted': 0.3533029138940354, 'eval_runtime': 34.7328, 'eval_samples_per_second': 79.665, 'eval_steps_per_second': 4.981, 'epoch': 5.0}


100%|██████████| 3460/3460 [42:53<00:00,  1.34it/s]


{'train_runtime': 2573.5769, 'train_samples_per_second': 21.497, 'train_steps_per_second': 1.344, 'train_loss': 2.127726753874321, 'epoch': 5.0}


100%|██████████| 173/173 [00:34<00:00,  5.06it/s]

Test results: {'eval_loss': 2.377781391143799, 'eval_accuracy': 0.36393205637874954, 'eval_f1_macro': 0.35319960084597957, 'eval_f1_weighted': 0.3533029138940354, 'eval_runtime': 34.3816, 'eval_samples_per_second': 80.479, 'eval_steps_per_second': 5.032, 'epoch': 5.0}





In [213]:
from collections import Counter

# Conta le occorrenze delle etichette nel dataset di training
train_label_counts = Counter([int(label) for label in train_dataset['labels']])
test_label_counts = Counter([int(label) for label in test_dataset['labels']])

# Stampa la distribuzione in modo leggibile
print("Distribuzione delle classi nel dataset di training:")
for label, count in train_label_counts.items():
    print(f"Classe {label}: {count} esempi")

print("\nDistribuzione delle classi nel dataset di test:")
for label, count in test_label_counts.items():
    print(f"Classe {label}: {count} esempi")



Distribuzione delle classi nel dataset di training:
Classe 0: 25063 esempi
Classe 2: 24937 esempi

Distribuzione delle classi nel dataset di test:
Classe 1: 139 esempi
Classe 0: 177 esempi
Classe 2: 182 esempi


In [None]:
from transformers import Trainer, TrainingArguments

# Configura i parametri di training
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50
)

# Inizializza il Trainer
trainer = Trainer(
    model=model,  # Assicurati di aver caricato il modello DistilBERT
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"]
)

# Avvia il fine-tuning
trainer.train()


In [59]:
# Imposta il formato del dataset
tokenized_datasets = tokenized_datasets.remove_columns(["text", "date", "user", "query"])
tokenized_datasets.set_format("torch")

# Dividi in train e test
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]


ValueError: Column name ['date', 'user', 'query', 'text'] not in the dataset. Current columns in the dataset: ['sentiment', 'input_ids', 'attention_mask', 'labels']

In [106]:
from transformers import DistilBertForSequenceClassification

# Inizializza il modello
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [107]:
# Congela tutti i layer
for param in model.base_model.parameters():
    param.requires_grad = False

for param in model.distilbert.transformer.layer[-8:].parameters():
    param.requires_grad = True





In [108]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, Trainer, TrainingArguments
import torch.nn as nn

# Modifica il classificatore finale (opzionale)
model.classifier = nn.Sequential(
    nn.Linear(model.config.hidden_size, 128),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(128, 3)
)

In [111]:
from collections import Counter

# Mescola i dataset
train_dataset = train_dataset.shuffle(seed=42)
test_dataset = test_dataset.shuffle(seed=42)

# Seleziona fino a 50.000 esempi nel training e 2.000 nel test
small_train_dataset = train_dataset.select(range(min(50000, len(train_dataset))))
small_test_dataset = test_dataset.select(range(min(2000, len(test_dataset))))

# Configura il Trainer
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=2e-5,
    lr_scheduler_type="linear",
    warmup_steps=500,  # Una percentuale del totale dei passi di training
    per_device_train_batch_size=32,  # Batch size aumentato
    per_device_eval_batch_size=32,
    num_train_epochs=2,  # Ridotto per test rapidi
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    save_total_limit=2,
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True
)

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_test_dataset,
    compute_metrics=compute_metrics
)

In [112]:
# Avvia l'addestramento
trainer.train()


  2%|▏         | 50/3126 [00:25<25:41,  2.00it/s]

{'loss': 1.0984, 'grad_norm': 1.2728896141052246, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.03}


  3%|▎         | 100/3126 [00:50<25:13,  2.00it/s]

{'loss': 1.0007, 'grad_norm': 1.5675368309020996, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.06}


  5%|▍         | 150/3126 [01:15<24:40,  2.01it/s]

{'loss': 0.8245, 'grad_norm': 1.500333309173584, 'learning_rate': 6e-06, 'epoch': 0.1}


  6%|▋         | 200/3126 [01:40<24:14,  2.01it/s]

{'loss': 0.7134, 'grad_norm': 1.506410837173462, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.13}


  8%|▊         | 250/3126 [02:05<23:56,  2.00it/s]

{'loss': 0.6121, 'grad_norm': 2.701855182647705, 'learning_rate': 1e-05, 'epoch': 0.16}


 10%|▉         | 300/3126 [02:30<23:26,  2.01it/s]

{'loss': 0.5414, 'grad_norm': 6.165102481842041, 'learning_rate': 1.2e-05, 'epoch': 0.19}


 11%|█         | 350/3126 [02:56<23:12,  1.99it/s]

{'loss': 0.5093, 'grad_norm': 5.476261615753174, 'learning_rate': 1.4e-05, 'epoch': 0.22}


 13%|█▎        | 400/3126 [03:21<22:40,  2.00it/s]

{'loss': 0.501, 'grad_norm': 3.293067455291748, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.26}


 14%|█▍        | 450/3126 [03:46<22:16,  2.00it/s]

{'loss': 0.4743, 'grad_norm': 4.989706993103027, 'learning_rate': 1.8e-05, 'epoch': 0.29}


 16%|█▌        | 500/3126 [04:11<21:39,  2.02it/s]

{'loss': 0.4496, 'grad_norm': 4.248252868652344, 'learning_rate': 2e-05, 'epoch': 0.32}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                  
 16%|█▌        | 500/3126 [04:14<21:39,  2.02it/s]

{'eval_loss': 1.685691237449646, 'eval_accuracy': 0.5963855421686747, 'eval_precision': 0.46138078324860143, 'eval_recall': 0.5963855421686747, 'eval_f1': 0.5089106609588537, 'eval_runtime': 3.045, 'eval_samples_per_second': 163.549, 'eval_steps_per_second': 5.255, 'epoch': 0.32}


 18%|█▊        | 550/3126 [04:40<21:22,  2.01it/s]  

{'loss': 0.462, 'grad_norm': 3.5710206031799316, 'learning_rate': 1.961919268849962e-05, 'epoch': 0.35}


 19%|█▉        | 600/3126 [05:05<21:02,  2.00it/s]

{'loss': 0.4738, 'grad_norm': 2.4982292652130127, 'learning_rate': 1.923838537699924e-05, 'epoch': 0.38}


 21%|██        | 650/3126 [05:30<20:37,  2.00it/s]

{'loss': 0.4565, 'grad_norm': 2.995617151260376, 'learning_rate': 1.885757806549886e-05, 'epoch': 0.42}


 22%|██▏       | 700/3126 [05:55<20:12,  2.00it/s]

{'loss': 0.4407, 'grad_norm': 2.466111421585083, 'learning_rate': 1.8476770753998477e-05, 'epoch': 0.45}


 24%|██▍       | 750/3126 [06:20<19:54,  1.99it/s]

{'loss': 0.4293, 'grad_norm': 4.082337379455566, 'learning_rate': 1.8095963442498095e-05, 'epoch': 0.48}


 26%|██▌       | 800/3126 [06:45<19:12,  2.02it/s]

{'loss': 0.4435, 'grad_norm': 2.255018949508667, 'learning_rate': 1.7715156130997717e-05, 'epoch': 0.51}


 27%|██▋       | 850/3126 [07:10<19:06,  1.99it/s]

{'loss': 0.4418, 'grad_norm': 2.7185280323028564, 'learning_rate': 1.7334348819497335e-05, 'epoch': 0.54}


 29%|██▉       | 900/3126 [07:35<18:36,  1.99it/s]

{'loss': 0.4306, 'grad_norm': 3.0409204959869385, 'learning_rate': 1.6953541507996956e-05, 'epoch': 0.58}


 30%|███       | 950/3126 [08:00<18:21,  1.97it/s]

{'loss': 0.4382, 'grad_norm': 3.594254970550537, 'learning_rate': 1.6572734196496574e-05, 'epoch': 0.61}


 32%|███▏      | 1000/3126 [08:25<18:02,  1.96it/s]

{'loss': 0.4247, 'grad_norm': 2.548297166824341, 'learning_rate': 1.6191926884996193e-05, 'epoch': 0.64}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                   
 32%|███▏      | 1000/3126 [08:29<18:02,  1.96it/s]

{'eval_loss': 2.0311286449432373, 'eval_accuracy': 0.6004016064257028, 'eval_precision': 0.4427398586387993, 'eval_recall': 0.6004016064257028, 'eval_f1': 0.5067951312182474, 'eval_runtime': 3.1147, 'eval_samples_per_second': 159.887, 'eval_steps_per_second': 5.137, 'epoch': 0.64}


 34%|███▎      | 1050/3126 [08:55<17:06,  2.02it/s]  

{'loss': 0.4301, 'grad_norm': 5.946230411529541, 'learning_rate': 1.581111957349581e-05, 'epoch': 0.67}


 35%|███▌      | 1100/3126 [09:20<17:04,  1.98it/s]

{'loss': 0.4135, 'grad_norm': 4.732226848602295, 'learning_rate': 1.5430312261995432e-05, 'epoch': 0.7}


 37%|███▋      | 1150/3126 [09:46<17:09,  1.92it/s]

{'loss': 0.396, 'grad_norm': 3.732815742492676, 'learning_rate': 1.504950495049505e-05, 'epoch': 0.74}


 38%|███▊      | 1200/3126 [10:11<15:52,  2.02it/s]

{'loss': 0.438, 'grad_norm': 3.823451042175293, 'learning_rate': 1.4668697638994668e-05, 'epoch': 0.77}


 40%|███▉      | 1250/3126 [10:37<16:04,  1.95it/s]

{'loss': 0.4147, 'grad_norm': 5.914107799530029, 'learning_rate': 1.428789032749429e-05, 'epoch': 0.8}


 42%|████▏     | 1300/3126 [11:02<15:14,  2.00it/s]

{'loss': 0.4203, 'grad_norm': 4.583414077758789, 'learning_rate': 1.3907083015993908e-05, 'epoch': 0.83}


 43%|████▎     | 1350/3126 [11:27<14:47,  2.00it/s]

{'loss': 0.4175, 'grad_norm': 3.4147822856903076, 'learning_rate': 1.3526275704493528e-05, 'epoch': 0.86}


 45%|████▍     | 1400/3126 [11:52<14:23,  2.00it/s]

{'loss': 0.4244, 'grad_norm': 5.510150909423828, 'learning_rate': 1.3145468392993146e-05, 'epoch': 0.9}


 46%|████▋     | 1450/3126 [12:17<13:57,  2.00it/s]

{'loss': 0.4098, 'grad_norm': 4.245355129241943, 'learning_rate': 1.2764661081492767e-05, 'epoch': 0.93}


 48%|████▊     | 1500/3126 [12:42<13:31,  2.00it/s]

{'loss': 0.4163, 'grad_norm': 3.167078733444214, 'learning_rate': 1.2383853769992385e-05, 'epoch': 0.96}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
                                                   
 48%|████▊     | 1500/3126 [12:45<13:31,  2.00it/s]

{'eval_loss': 2.2044501304626465, 'eval_accuracy': 0.5963855421686747, 'eval_precision': 0.46425460879663233, 'eval_recall': 0.5963855421686747, 'eval_f1': 0.5081957533711664, 'eval_runtime': 3.0498, 'eval_samples_per_second': 163.291, 'eval_steps_per_second': 5.246, 'epoch': 0.96}


 50%|████▉     | 1550/3126 [13:11<13:32,  1.94it/s]

{'loss': 0.3876, 'grad_norm': 5.323203086853027, 'learning_rate': 1.2003046458492005e-05, 'epoch': 0.99}


 50%|████▉     | 1559/3126 [13:16<13:27,  1.94it/s]

KeyboardInterrupt: 

In [101]:
# Valutazione sul dataset di test
metrics = trainer.evaluate(eval_dataset=small_test_dataset)
print(metrics)


100%|██████████| 8/8 [00:03<00:00,  2.36it/s]

{'eval_loss': 2.4108541011810303, 'eval_runtime': 3.3459, 'eval_samples_per_second': 148.84, 'eval_steps_per_second': 2.391, 'epoch': 2.0}





In [102]:
# Previsioni sul dataset di test
predictions = trainer.predict(test_dataset=small_test_dataset)

# Estrai logits e etichette predette
logits = predictions.predictions
predicted_labels = logits.argmax(axis=1)  # Classe con probabilità massima

# Etichette reali
true_labels = predictions.label_ids

# Calcola l'accuratezza manualmente (opzionale)
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuratezza: {accuracy * 100:.2f}%")


100%|██████████| 8/8 [00:02<00:00,  2.74it/s]

Accuratezza: 60.04%



