In [6]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from torch.optim import AdamW
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    get_linear_schedule_with_warmup,
    set_seed
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import argparse
from tqdm import tqdm
import random


MODEL_NAME = "lordtt13/emo-mobilebert"
#MODEL_NAME = "JuliusAlphonso/distilbert-plutchik"
DATASET_PATH = "/kaggle/input/dataset-5/dataset.csv"
TEXT_COLUMN = "TESTO"
LABEL_COLUMN = "EMOZIONI"
OUTPUT_DIR = "/kaggle/working/best_model"
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 3e-5
MAX_LENGTH = 128
SEED = 12

# Reproducibility
set_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Dispositivo in uso: {device}")

# Custom Dataset
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            return_attention_mask=True,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

def load_data(file_path, text_col, label_col):
    """Carica il dataset da un file CSV."""
    df = pd.read_csv(file_path)
    
    # Verifica la presenza delle colonne necessarie
    if text_col not in df.columns or label_col not in df.columns:
        available_cols = ", ".join(df.columns)
        raise ValueError(f"Colonne richieste non trovate. Colonne disponibili: {available_cols}")
    
    # Se le etichette sono testuali, convertiamole in numeriche
    if not pd.api.types.is_numeric_dtype(df[label_col]):
        label_map = {label: idx for idx, label in enumerate(df[label_col].unique())}
        df['label_id'] = df[label_col].map(label_map)
        print(f"Mappatura etichette: {label_map}")
        return df[text_col].values, df['label_id'].values, label_map
    
    return df[text_col].values, df[label_col].values, None

def train_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    progress_bar = tqdm(data_loader, desc="Training")
    
    for batch in progress_bar:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
            
        loss = outputs.loss
        total_loss += loss.item()
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        
        preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())
        
        progress_bar.set_postfix({"loss": loss.item()})
    
    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1

def evaluate(model, data_loader, device):
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Validation"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(data_loader)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return avg_loss, accuracy, f1

def optimize_for_raspberry_pi(model, tokenizer, output_dir):
    # Convert to quantized model to reduce size and improve inference speed
    # Use torch.quantization for 8-bit quantization
    quantized_model = torch.quantization.quantize_dynamic(
        model, {torch.nn.Linear}, dtype=torch.qint8
    )
    
    # Save the quantized model
    torch.save(quantized_model.state_dict(), f"{output_dir}/mobilebert_sentiment_quantized.pt")
    
    # Save the tokenizer
    tokenizer.save_pretrained(output_dir)
    
    # Export to ONNX for better performance (optional)
    #dummy_input = torch.randint(1, 10000, (1, 128)).to('cuda')
    #torch.onnx.export(
    #    model, 
    #    dummy_input, 
    #    f"{output_dir}/mobilebert_sentiment.onnx",
    #    export_params=True,
    #    opset_version=11,
    #    input_names=['input'],
    #    output_names=['output'],
    #    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
    #)

def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    # Carica il dataset
    print(f"Caricamento del dataset...")
    dataset = load_dataset("MelmaGrigia/italian-text-sentiment-analysis")
    #texts, labels, label_map = load_data(DATASET_PATH, TEXT_COLUMN, LABEL_COLUMN)
    
    # Divisione in training e validation set
    #train_texts, val_texts, train_labels, val_labels = train_test_split(
    #    texts, labels, test_size=0.2, random_state=SEED
    #)

    print(f"Struttura del dataset: {dataset}")
    print(f"Colonne: {dataset['train'].column_names}")
    
    # Estrai i testi e le etichette
    train_texts = dataset['train']['text']
    train_labels = dataset['train']['label']

    val_texts = dataset['test']['text']
    val_labels = dataset['test']['label']

    num_labels = len(set(train_labels))
    print(f"Numero di etichette: {num_labels}")
    
    # Calcola la distribuzione delle classi
    class_counts = np.bincount(train_labels)
    print(f"Distribuzione delle classi: {class_counts}")
    
    print(f"Testi di training: {len(train_texts)}")
    print(f"Testi di validazione: {len(val_texts)}")

    # Print class distribution
    #class_counts = np.bincount(labels)
    #print("Class distribution:", class_counts)
    
    # You might need class weights
    #class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
    #class_weights = class_weights / class_weights.sum()
    #class_weights = class_weights.to(device)
    #print("Pesi delle classi:", class_weights)
    
    # Then in your loss calculation:
    #loss_fct = nn.CrossEntropyLoss(weight=class_weights)
    
    # Carica il tokenizer e il modello
    print(f"Caricamento del modello {MODEL_NAME}...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=num_labels,
        ignore_mismatched_sizes=True
    )

    # Freeze all parameters except the classifier
    #for param in model.parameters():
    #    param.requires_grad = False
    
    # Unfreeze only the classifier parameters
    #for param in model.classifier.parameters():
    #    param.requires_grad = True

    # Prepara i dataset
    train_dataset = SentimentDataset(train_texts, train_labels, tokenizer, MAX_LENGTH)
    val_dataset = SentimentDataset(val_texts, val_labels, tokenizer, MAX_LENGTH)
    
    # Prepara i dataloader
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False
    )
    
    # Prepara l'ottimizzatore e lo scheduler
    optimizer = AdamW(model.parameters(), lr=LEARNING_RATE) 
    #optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LEARNING_RATE, weight_decay=1e-4)
        
    total_steps = len(train_dataloader) * EPOCHS
    #warmup_steps = int(0.1 * total_steps)
    scheduler = CosineAnnealingLR(optimizer, T_max=total_steps)
    #scheduler = get_linear_schedule_with_warmup(
    #    optimizer,
    #    num_warmup_steps=warmup_steps,
    #    num_training_steps=total_steps
    #)
    
    # Training
    print("Inizio dell'addestramento...")
    best_val_loss = float('inf')
    patience = 10

    model.to(device)
    
    for epoch in range(EPOCHS):
        print(f"\nEpoca {epoch+1}/{EPOCHS}")
        
        train_loss, train_acc, train_f1 = train_epoch(
            model, train_dataloader, optimizer, scheduler, device
        )
        
        print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, F1: {train_f1:.4f}")
        
        val_loss, val_acc, val_f1 = evaluate(
            model, val_dataloader, device
        )
        
        print(f"Val Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}, F1: {val_f1:.4f}")

        # Salva il modello se abbiamo ottenuto un miglior F1 score
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = 10
            
            # Salva il modello
            model.save_pretrained(OUTPUT_DIR)
            tokenizer.save_pretrained(OUTPUT_DIR)
            print(f"Modello salvato in {OUTPUT_DIR}")
            
        else:
            patience -= 1
            print(f"Early stopping patience: {patience}")
            
            if patience == 0:
                print("Early stopping attivato.")
                break
    
    print("\nAddestramento completato!")
    #print(f"Miglior F1 score di validazione: {best_val_f1:.4f}")
    
    torch.save(model.state_dict(), f"{OUTPUT_DIR}/mobilebert_sentiment.pt")
    optimize_for_raspberry_pi(model, tokenizer, OUTPUT_DIR)

    print("\nOptimization completed!")


if __name__ == "__main__":
    main()

Dispositivo in uso: cuda
Caricamento del dataset...
Struttura del dataset: DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_name'],
        num_rows: 1799
    })
    test: Dataset({
        features: ['text', 'label', 'label_name'],
        num_rows: 326
    })
})
Colonne: ['text', 'label', 'label_name']
Numero di etichette: 6
Distribuzione delle classi: [303 302 301 298 298 297]
Testi di training: 1799
Testi di validazione: 326
Caricamento del modello lordtt13/emo-mobilebert...


Some weights of MobileBertForSequenceClassification were not initialized from the model checkpoint at lordtt13/emo-mobilebert and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([4]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([4, 512]) in the checkpoint and torch.Size([6, 512]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Inizio dell'addestramento...

Epoca 1/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.49it/s, loss=1.52]


Train Loss: 1.7470, Accuracy: 0.2279, F1: 0.2083


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.92it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 1.6529, Accuracy: 0.3405, F1: 0.2384
Modello salvato in /kaggle/working/best_model

Epoca 2/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.45it/s, loss=1.35]


Train Loss: 1.5394, Accuracy: 0.3513, F1: 0.3028


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.39it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 1.4379, Accuracy: 0.3957, F1: 0.2788
Modello salvato in /kaggle/working/best_model

Epoca 3/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.40it/s, loss=1.27]


Train Loss: 1.3434, Accuracy: 0.4591, F1: 0.4336


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.73it/s]


Val Loss: 1.4528, Accuracy: 0.3497, F1: 0.2459
Early stopping patience: 9

Epoca 4/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=1.09] 


Train Loss: 1.1355, Accuracy: 0.5581, F1: 0.5515


Validation: 100%|██████████| 11/11 [00:01<00:00, 10.10it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 1.2646, Accuracy: 0.4540, F1: 0.4044
Modello salvato in /kaggle/working/best_model

Epoca 5/100


Training: 100%|██████████| 57/57 [00:15<00:00,  3.57it/s, loss=1.14] 


Train Loss: 1.0036, Accuracy: 0.6337, F1: 0.6278


Validation: 100%|██████████| 11/11 [00:01<00:00, 10.11it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 1.1073, Accuracy: 0.5552, F1: 0.5282
Modello salvato in /kaggle/working/best_model

Epoca 6/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=1.25] 


Train Loss: 0.9106, Accuracy: 0.6782, F1: 0.6719


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.82it/s]


Val Loss: 1.2935, Accuracy: 0.4816, F1: 0.4313
Early stopping patience: 9

Epoca 7/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.42it/s, loss=1.25] 


Train Loss: 0.8174, Accuracy: 0.7187, F1: 0.7142


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.79it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.8348, Accuracy: 0.6718, F1: 0.6286
Modello salvato in /kaggle/working/best_model

Epoca 8/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.52it/s, loss=0.403]


Train Loss: 0.6726, Accuracy: 0.7727, F1: 0.7681


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.92it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.6539, Accuracy: 0.7423, F1: 0.7007
Modello salvato in /kaggle/working/best_model

Epoca 9/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.472]


Train Loss: 0.6097, Accuracy: 0.7893, F1: 0.7852


Validation: 100%|██████████| 11/11 [00:01<00:00, 10.01it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.6028, Accuracy: 0.7883, F1: 0.7640
Modello salvato in /kaggle/working/best_model

Epoca 10/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.881]


Train Loss: 0.5533, Accuracy: 0.8049, F1: 0.8027


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.91it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.5083, Accuracy: 0.7883, F1: 0.7753
Modello salvato in /kaggle/working/best_model

Epoca 11/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=1.43] 


Train Loss: 0.5135, Accuracy: 0.8277, F1: 0.8260


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.89it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.4375, Accuracy: 0.8712, F1: 0.8677
Modello salvato in /kaggle/working/best_model

Epoca 12/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.51it/s, loss=0.196]


Train Loss: 0.4620, Accuracy: 0.8455, F1: 0.8431


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.96it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.3844, Accuracy: 0.8804, F1: 0.8782
Modello salvato in /kaggle/working/best_model

Epoca 13/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.48it/s, loss=0.594]


Train Loss: 0.4031, Accuracy: 0.8688, F1: 0.8675


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.97it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.3535, Accuracy: 0.8742, F1: 0.8730
Modello salvato in /kaggle/working/best_model

Epoca 14/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.56it/s, loss=0.446]


Train Loss: 0.3435, Accuracy: 0.8949, F1: 0.8940


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.95it/s]


Val Loss: 0.4210, Accuracy: 0.8067, F1: 0.7873
Early stopping patience: 9

Epoca 15/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.56it/s, loss=0.244]


Train Loss: 0.3830, Accuracy: 0.8710, F1: 0.8706


Validation: 100%|██████████| 11/11 [00:01<00:00, 10.04it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.3434, Accuracy: 0.8436, F1: 0.8309
Modello salvato in /kaggle/working/best_model

Epoca 16/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.303]


Train Loss: 0.3403, Accuracy: 0.8866, F1: 0.8862


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.98it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.2568, Accuracy: 0.9172, F1: 0.9168
Modello salvato in /kaggle/working/best_model

Epoca 17/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.786]


Train Loss: 0.2992, Accuracy: 0.9099, F1: 0.9099


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.95it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.2217, Accuracy: 0.9264, F1: 0.9251
Modello salvato in /kaggle/working/best_model

Epoca 18/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.0626]


Train Loss: 0.2957, Accuracy: 0.9044, F1: 0.9042


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.90it/s]


Val Loss: 0.3086, Accuracy: 0.8865, F1: 0.8843
Early stopping patience: 9

Epoca 19/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.47it/s, loss=0.82]  


Train Loss: 0.2577, Accuracy: 0.9300, F1: 0.9299


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.88it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1786, Accuracy: 0.9387, F1: 0.9379
Modello salvato in /kaggle/working/best_model

Epoca 20/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.0445]


Train Loss: 0.2278, Accuracy: 0.9355, F1: 0.9354


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.93it/s]


Val Loss: 0.2148, Accuracy: 0.9479, F1: 0.9475
Early stopping patience: 9

Epoca 21/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.0487]


Train Loss: 0.2453, Accuracy: 0.9205, F1: 0.9203


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.90it/s]


Val Loss: 0.3614, Accuracy: 0.8681, F1: 0.8664
Early stopping patience: 8

Epoca 22/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.0259]


Train Loss: 0.2490, Accuracy: 0.9277, F1: 0.9274


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.93it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1641, Accuracy: 0.9479, F1: 0.9476
Modello salvato in /kaggle/working/best_model

Epoca 23/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.0184]


Train Loss: 0.2125, Accuracy: 0.9339, F1: 0.9337


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.91it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1436, Accuracy: 0.9601, F1: 0.9600
Modello salvato in /kaggle/working/best_model

Epoca 24/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.636] 


Train Loss: 0.1835, Accuracy: 0.9489, F1: 0.9488


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.88it/s]


Val Loss: 0.2194, Accuracy: 0.9202, F1: 0.9172
Early stopping patience: 9

Epoca 25/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.52it/s, loss=0.0247]


Train Loss: 0.1759, Accuracy: 0.9477, F1: 0.9477


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.90it/s]


Val Loss: 0.1594, Accuracy: 0.9448, F1: 0.9447
Early stopping patience: 8

Epoca 26/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.46it/s, loss=0.0135]


Train Loss: 0.1711, Accuracy: 0.9522, F1: 0.9523


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.82it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1146, Accuracy: 0.9632, F1: 0.9627
Modello salvato in /kaggle/working/best_model

Epoca 27/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.0189]


Train Loss: 0.1478, Accuracy: 0.9605, F1: 0.9605


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.87it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1138, Accuracy: 0.9632, F1: 0.9633
Modello salvato in /kaggle/working/best_model

Epoca 28/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.0201]


Train Loss: 0.1554, Accuracy: 0.9539, F1: 0.9539


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.96it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.1024, Accuracy: 0.9693, F1: 0.9692
Modello salvato in /kaggle/working/best_model

Epoca 29/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.0242]


Train Loss: 0.1419, Accuracy: 0.9589, F1: 0.9589


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.97it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.0730, Accuracy: 0.9785, F1: 0.9785
Modello salvato in /kaggle/working/best_model

Epoca 30/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.56it/s, loss=0.216] 


Train Loss: 0.1726, Accuracy: 0.9544, F1: 0.9544


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.96it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.0696, Accuracy: 0.9816, F1: 0.9816
Modello salvato in /kaggle/working/best_model

Epoca 31/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.00891]


Train Loss: 0.1309, Accuracy: 0.9639, F1: 0.9639


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.98it/s]


Val Loss: 0.0857, Accuracy: 0.9785, F1: 0.9784
Early stopping patience: 9

Epoca 32/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=1.17]  


Train Loss: 0.1565, Accuracy: 0.9578, F1: 0.9577


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.87it/s]


Val Loss: 0.0932, Accuracy: 0.9755, F1: 0.9752
Early stopping patience: 8

Epoca 33/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.0137]


Train Loss: 0.1189, Accuracy: 0.9666, F1: 0.9666


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.83it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.0510, Accuracy: 0.9816, F1: 0.9816
Modello salvato in /kaggle/working/best_model

Epoca 34/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.48it/s, loss=0.523]  


Train Loss: 0.1066, Accuracy: 0.9722, F1: 0.9722


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.89it/s]
Non-default generation parameters: {'max_length': 128}


Val Loss: 0.0469, Accuracy: 0.9847, F1: 0.9847
Modello salvato in /kaggle/working/best_model

Epoca 35/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.54it/s, loss=0.0106] 


Train Loss: 0.1153, Accuracy: 0.9694, F1: 0.9695


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.90it/s]


Val Loss: 0.0610, Accuracy: 0.9847, F1: 0.9847
Early stopping patience: 9

Epoca 36/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.233]  


Train Loss: 0.1093, Accuracy: 0.9739, F1: 0.9739


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.91it/s]


Val Loss: 0.0592, Accuracy: 0.9816, F1: 0.9816
Early stopping patience: 8

Epoca 37/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.00841]


Train Loss: 0.0888, Accuracy: 0.9750, F1: 0.9750


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.94it/s]


Val Loss: 0.0617, Accuracy: 0.9847, F1: 0.9848
Early stopping patience: 7

Epoca 38/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.0128] 


Train Loss: 0.0743, Accuracy: 0.9805, F1: 0.9805


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.85it/s]


Val Loss: 0.0665, Accuracy: 0.9847, F1: 0.9846
Early stopping patience: 6

Epoca 39/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.52it/s, loss=1.41]   


Train Loss: 0.1224, Accuracy: 0.9750, F1: 0.9750


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.96it/s]


Val Loss: 0.2674, Accuracy: 0.9233, F1: 0.9222
Early stopping patience: 5

Epoca 40/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.00764]


Train Loss: 0.0789, Accuracy: 0.9767, F1: 0.9766


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.92it/s]


Val Loss: 0.0924, Accuracy: 0.9755, F1: 0.9755
Early stopping patience: 4

Epoca 41/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.00586]


Train Loss: 0.0934, Accuracy: 0.9750, F1: 0.9749


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.84it/s]


Val Loss: 0.1346, Accuracy: 0.9571, F1: 0.9563
Early stopping patience: 3

Epoca 42/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.53it/s, loss=0.01]   


Train Loss: 0.0902, Accuracy: 0.9789, F1: 0.9789


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.94it/s]


Val Loss: 0.1146, Accuracy: 0.9693, F1: 0.9689
Early stopping patience: 2

Epoca 43/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.00599]


Train Loss: 0.0760, Accuracy: 0.9794, F1: 0.9795


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.95it/s]


Val Loss: 0.0775, Accuracy: 0.9785, F1: 0.9786
Early stopping patience: 1

Epoca 44/100


Training: 100%|██████████| 57/57 [00:16<00:00,  3.55it/s, loss=0.00604]


Train Loss: 0.0568, Accuracy: 0.9850, F1: 0.9850


Validation: 100%|██████████| 11/11 [00:01<00:00,  9.97it/s]

Val Loss: 0.0672, Accuracy: 0.9785, F1: 0.9786
Early stopping patience: 0
Early stopping attivato.

Addestramento completato!





NameError: name 'best_val_f1' is not defined

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load your pre-trained quantized model and tokenizer
model_path = "/kaggle/working/best_model"
tokenizer_path = "/kaggle/working/best_model"

# Load the quantized model
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# Set the model to evaluation mode
model.eval()

# Use the tokenizer to create a valid dummy input
sample_text = "This is a sample input for the model."
inputs = tokenizer(sample_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)

# Extract the input_ids tensor
dummy_input = inputs["input_ids"]

# Export the model to ONNX format
output_onnx_path = "/kaggle/working/best_model/mobilebert_sentiment.onnx"
torch.onnx.export(
    model,
    dummy_input,
    output_onnx_path,
    export_params=True,  # Store the trained parameter weights inside the model file
    opset_version=11,    # The ONNX version to export the model to
    input_names=["input_ids"],  # The model's input names
    output_names=["output"],    # The model's output names
    dynamic_axes={
        "input_ids": {0: "batch_size"},  # Dynamic axes for input (batch size)
        "output": {0: "batch_size"},    # Dynamic axes for output (batch size)
    },
    do_constant_folding=True,  # Optimize the model by folding constants
)

print(f"Model has been exported to {output_onnx_path}")

In [8]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Percorso del modello salvato con save_pretrained
MODEL_DIR = "/kaggle/working/best_model"  # OUTPUT_DIR nel tuo codice
OUTPUT_PATH = "/kaggle/working/best_model/mobilebert_sentiment.pt"  # Path per il file .pt

# Carica il modello salvato
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)

# Salva solo lo state_dict in formato .pt
torch.save(model.state_dict(), OUTPUT_PATH)

print(f"Modello salvato in formato .pt: {OUTPUT_PATH}")

Modello salvato in formato .pt: /kaggle/working/best_model/mobilebert_sentiment.pt


In [14]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
quantized_model = torch.quantization.quantize_dynamic(
        model, {torch.nn.Linear}, dtype=torch.qint8
    )
    
    # Save the quantized model
torch.save(quantized_model.state_dict(), f"{MODEL_DIR}/mobilebert_sentiment_quant.pt")
    
    # Save the tokenizer

tokenizer.save_pretrained(MODEL_DIR)

('/kaggle/working/best_model/tokenizer_config.json',
 '/kaggle/working/best_model/special_tokens_map.json',
 '/kaggle/working/best_model/vocab.txt',
 '/kaggle/working/best_model/added_tokens.json',
 '/kaggle/working/best_model/tokenizer.json')