# CODICE BERT INIZIALE SULL'ITALIANO E CON TEST SPLIT

CODICE BERT INIZIALE SULL'ITALIANO E CON TEST SPLIT

In [None]:
!pip install accelerate -U
!pip install transformers[torch] -U
!pip install scikit-learn


Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.w

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

Mounted at /content/drive


In [None]:
# Tokenizer di BERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

In [None]:
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles


In [None]:
# Caricamento e divisione dei dati
texts, labels, titles = load_data(data_path)
train_texts, test_texts, train_labels, test_labels, train_titles, test_titles = train_test_split(texts, labels, titles, test_size=0.2, random_state=42)

# Creazione dei dataset
train_dataset = TextDataset(train_texts, train_labels, tokenizer)
test_dataset = TextDataset(test_texts, test_labels, tokenizer)

In [None]:
# Configurazione dei parametri di training
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)


In [None]:
# Inizializzazione del modello e del trainer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Training e valutazione del modello
trainer.train()
trainer.save_model("./best_model")  # Salva il modello con la migliore accuratezza


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7031,0.709749,0.699029
2,0.6755,0.78675,0.708738
3,0.6205,0.590616,0.796117


In [None]:
# Valutazione manuale sui dati di test
predictions = trainer.predict(test_dataset)
predicted_labels = np.argmax(predictions.predictions, axis=1)


In [None]:
# Calcolo del classification report
target_names = ['0_bambini', '1_ragazzi', '2_adulti']
report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

# Stampa del classification report formattato
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    class_report = report[class_name]
    print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

print("\n\t\t\tSupport")
print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

# Funzione per la stampa degli errori usando il nome del file come titolo
def print_errors(titles, test_labels, predicted_labels, target_names, max_errors=10):
    print("\nErrori nei testi di test:")
    error_count = 0
    for i, (title, true_label, predicted_label) in enumerate(zip(titles, test_labels, predicted_labels)):
        if true_label != predicted_label:
            print(f"\nTitolo: {title}")
            print(f"Etichetta reale: {target_names[true_label]}")
            print(f"Etichetta predetta: {target_names[predicted_label]}")
            error_count += 1
            if error_count >= max_errors:
                break
    if error_count == 0:
        print("Nessun errore trovato.")

# Utilizza la funzione per la stampa degli errori
print_errors(test_titles, test_labels, predicted_labels, target_names)


		Precision	Recall	F1-score	Support
0_bambini	0.87		0.87	0.87		46
1_ragazzi	0.62		0.83	0.71		35
2_adulti	0.60		0.27	0.37		22

			Support
Accuracy		0.73	103
Macro avg	0.70		0.66	0.65		103
Weighted avg	0.73		0.73	0.71		103

Errori nei testi di test:

Titolo: ted2020-31.txt
Etichetta reale: 1_ragazzi
Etichetta predetta: 2_adulti

Titolo: YTP_003.txt
Etichetta reale: 2_adulti
Etichetta predetta: 0_bambini

Titolo: filmadulti46.txt
Etichetta reale: 2_adulti
Etichetta predetta: 0_bambini

Titolo: filmadulti42.txt
Etichetta reale: 2_adulti
Etichetta predetta: 1_ragazzi

Titolo: testo_ytkids_0031.txt
Etichetta reale: 0_bambini
Etichetta predetta: 1_ragazzi

Titolo: ted2020-127.txt
Etichetta reale: 1_ragazzi
Etichetta predetta: 2_adulti

Titolo: ted2020-101.txt
Etichetta reale: 1_ragazzi
Etichetta predetta: 2_adulti

Titolo: testo_ytkids_N030.txt
Etichetta reale: 1_ragazzi
Etichetta predetta: 0_bambini

Titolo: ted2020-183.txt
Etichetta reale: 2_adulti
Etichetta predetta: 1_ragazzi

Titolo: ted

# mBERT

# Mbert eng uncased

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Tokenizer di mBERT
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-uncased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0203,0.953747,0.688889
2,0.5429,0.625416,0.7
3,0.3543,0.381404,0.866667


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.67		0.91	0.77		22.0
2_adulti	0.89		0.63	0.74		27.0

			Support
Accuracy		0.87	90
Macro avg	0.85		0.85	0.84		90
Weighted avg	0.89		0.87	0.87		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0065,0.921059,0.674157
2,0.604,0.564635,0.775281
3,0.4565,0.334179,0.842697


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.66		0.96	0.78		26.0
2_adulti	0.86		0.32	0.46		19.0

			Support
Accuracy		0.84	89
Macro avg	0.84		0.76	0.75		89
Weighted avg	0.87		0.84	0.82		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0111,0.935264,0.730337
2,0.6126,0.560142,0.797753
3,0.4213,0.365058,0.853933


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		38.0
1_ragazzi	0.71		1.00	0.83		32.0
2_adulti	1.00		0.32	0.48		19.0

			Support
Accuracy		0.85	89
Macro avg	0.90		0.77	0.77		89
Weighted avg	0.90		0.85	0.83		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0122,0.93595,0.775281
2,0.5798,0.554301,0.820225
3,0.4358,0.328288,0.876404


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.75		1.00	0.86		33.0
2_adulti	1.00		0.31	0.48		16.0

			Support
Accuracy		0.88	89
Macro avg	0.92		0.77	0.78		89
Weighted avg	0.91		0.88	0.85		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0157,0.927953,0.775281
2,0.5865,0.540399,0.786517
3,0.4534,0.320242,0.932584


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.85		1.00	0.92		33.0
2_adulti	1.00		0.68	0.81		19.0

			Support
Accuracy		0.93	89
Macro avg	0.95		0.89	0.91		89
Weighted avg	0.94		0.93	0.93		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.73		0.97	0.83		146.0
2_adulti	0.95		0.45	0.59		100.0

Macro avg	0.89		0.81	0.81
Weighted avg	0.90		0.87	0.86

Accuratezza media su 5 fold: 0.87


# mBERT ita uncased

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer di mBERT
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-uncased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.072,0.999552,0.679612
2,0.829,0.670833,0.747573
3,0.5419,0.579848,0.786408


		Precision	Recall	F1-score	Support
0_bambini	0.87		0.89	0.88		46.0
1_ragazzi	0.74		0.83	0.78		35.0
2_adulti	0.65		0.50	0.56		22.0

			Support
Accuracy		0.79	103
Macro avg	0.75		0.74	0.74		103
Weighted avg	0.78		0.79	0.78		103

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0601,1.016044,0.640777
2,0.7489,0.71679,0.718447
3,0.551,0.760797,0.708738


		Precision	Recall	F1-score	Support
0_bambini	0.74		0.98	0.84		44.0
1_ragazzi	0.66		0.74	0.70		31.0
2_adulti	0.80		0.29	0.42		28.0

			Support
Accuracy		0.72	103
Macro avg	0.73		0.67	0.65		103
Weighted avg	0.73		0.72	0.68		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0614,1.014441,0.621359
2,0.7454,0.826793,0.631068
3,0.5974,0.852416,0.631068


		Precision	Recall	F1-score	Support
0_bambini	0.62		0.89	0.73		37.0
1_ragazzi	0.64		0.72	0.67		39.0
2_adulti	0.67		0.15	0.24		27.0

			Support
Accuracy		0.63	103
Macro avg	0.64		0.59	0.55		103
Weighted avg	0.64		0.63	0.58		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0664,1.015423,0.660194
2,0.806,0.745242,0.737864
3,0.4672,0.657697,0.747573


		Precision	Recall	F1-score	Support
0_bambini	0.85		0.79	0.82		43.0
1_ragazzi	0.63		0.94	0.76		36.0
2_adulti	1.00		0.38	0.55		24.0

			Support
Accuracy		0.75	103
Macro avg	0.83		0.70	0.71		103
Weighted avg	0.81		0.75	0.73		103

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0391,0.991283,0.666667
2,0.7766,0.739408,0.686275
3,0.5678,0.639844,0.735294


		Precision	Recall	F1-score	Support
0_bambini	0.74		0.95	0.83		39.0
1_ragazzi	0.71		0.76	0.73		38.0
2_adulti	0.82		0.36	0.50		25.0

			Support
Accuracy		0.74	102
Macro avg	0.76		0.69	0.69		102
Weighted avg	0.75		0.74	0.71		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.77		0.90	0.82		209.0
1_ragazzi	0.67		0.80	0.73		179.0
2_adulti	0.79		0.33	0.45		126.0

Macro avg	0.74		0.68	0.67
Weighted avg	0.74		0.72	0.70

Accuratezza media su 5 fold: 0.72


# mbert cased ita

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer di mBERT (cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9905,0.962189,0.601942
2,0.8362,0.794913,0.699029
3,0.7067,0.953738,0.572816


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


		Precision	Recall	F1-score	Support
0_bambini	0.68		0.93	0.79		46.0
1_ragazzi	0.72		0.83	0.77		35.0
2_adulti	0.00		0.00	0.00		22.0

			Support
Accuracy		0.70	103
Macro avg	0.47		0.59	0.52		103
Weighted avg	0.55		0.70	0.62		103

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.013,0.990306,0.563107
2,0.7787,0.702587,0.68932
3,0.7252,0.667515,0.669903


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


		Precision	Recall	F1-score	Support
0_bambini	0.91		0.89	0.90		44.0
1_ragazzi	0.50		0.97	0.66		31.0
2_adulti	0.00		0.00	0.00		28.0

			Support
Accuracy		0.67	103
Macro avg	0.47		0.62	0.52		103
Weighted avg	0.54		0.67	0.58		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9862,0.999379,0.427184
2,0.7402,0.813439,0.679612
3,0.6359,0.730358,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.85		0.89	0.87		37.0
1_ragazzi	0.64		0.72	0.67		39.0
2_adulti	0.55		0.41	0.47		27.0

			Support
Accuracy		0.70	103
Macro avg	0.68		0.67	0.67		103
Weighted avg	0.69		0.70	0.69		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0078,1.016464,0.417476
2,0.8097,0.782928,0.699029
3,0.5424,0.728135,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.75		0.95	0.84		43.0
1_ragazzi	0.65		0.61	0.63		36.0
2_adulti	0.64		0.38	0.47		24.0

			Support
Accuracy		0.70	103
Macro avg	0.68		0.65	0.65		103
Weighted avg	0.69		0.70	0.68		103

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0186,0.938617,0.637255
2,0.788,0.701932,0.735294
3,0.6831,0.787005,0.598039


		Precision	Recall	F1-score	Support
0_bambini	0.72		0.87	0.79		39.0
1_ragazzi	0.71		0.89	0.79		38.0
2_adulti	1.00		0.28	0.44		25.0

			Support
Accuracy		0.74	102
Macro avg	0.81		0.68	0.67		102
Weighted avg	0.79		0.74	0.70		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.78		0.91	0.84		209.0
1_ragazzi	0.64		0.80	0.71		179.0
2_adulti	0.44		0.21	0.28		126.0

Macro avg	0.62		0.64	0.61
Weighted avg	0.65		0.70	0.65

Accuratezza media su 5 fold: 0.70


# mbert cased ita badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Caricamento delle bad words in italiano
bad_words_path = '/content/drive/My Drive/bad_words.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e abbassa il testo
    return words.intersection(bad_words)

# Tokenizer di mBERT (cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

# Lista per memorizzare i testi in cui l'etichetta è cambiata
texts_with_label_changes = []

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti"
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))
            # Memorizza i testi modificati
            texts_with_label_changes.append({
                "Title": titles[test_index[i]],
                "Original Text": text,
                "Detected Bad Words": list(detected_bad_words),
                "Original Label": original_label,  # Etichetta originale (0 = bambini)
                "New Label": 2  # Nuova etichetta (2 = adulti)
            })
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

    # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {bad_words}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Stampa dei testi in cui l'etichetta è stata cambiata a causa delle bad words
if texts_with_label_changes:
    print(f"\nTesti con cambiamento di etichetta a causa di bad words ({len(texts_with_label_changes)} testi):")
    for item in texts_with_label_changes:
        print(f"Titolo: {item['Title']}")
        print(f"Testo originale: {item['Original Text']}")
        print(f"Parole inappropriate rilevate: {', '.join(item['Detected Bad Words'])}")
        print(f"Etichetta originale: {item['Original Label']}, Nuova etichetta: {item['New Label']}")
        print("-" * 80)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9905,0.962189,0.601942
2,0.8362,0.794913,0.699029
3,0.7067,0.953738,0.572816


		Precision	Recall	F1-score	Support
0_bambini	0.83		0.93	0.88		46.0
1_ragazzi	0.72		0.83	0.77		35.0
2_adulti	1.00		0.50	0.67		22.0

			Support
Accuracy		0.81	103
Macro avg	0.85		0.75	0.77		103
Weighted avg	0.83		0.81	0.80		103
Bad words rilevate nel fold 1:
Titolo: YTP_003.txt, Bad words: {'cazzata', 'cazzetto', 'troia', 'piscio'}
Titolo: YTP_012.txt, Bad words: {'cazzo', 'cazzi', 'bastardo'}
Titolo: YTP_015.txt, Bad words: {'cazzo', 'minchia', 'bastardo', 'scopata'}
Titolo: YTP_019.txt, Bad words: {'affanculo', 'cazzo', 'culo'}
Titolo: filmadulti12.txt, Bad words: {'tette', 'fanculo'}
Titolo: filmadulti16.txt, Bad words: {'cazzo', 'fottuto'}
Titolo: filmadulti25.txt, Bad words: {'cazzo', 'fottuto', 'fanculo', 'minchia', 'idiota'}
Titolo: filmadulti27.txt, Bad words: {'stronzetta', 'cazzo', 'fottuto'}
Titolo: filmadulti46.txt, Bad words: {'puttana', 'negro'}
Titolo: filmadulti41.txt, Bad words: {'puttana', 'culo'}
Titolo: filmadulti42.txt, Bad words: {'stronzo', 'merda', 'idiota'}

Fol

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.013,0.990306,0.563107
2,0.7787,0.702587,0.68932
3,0.7252,0.667515,0.669903


		Precision	Recall	F1-score	Support
0_bambini	0.95		0.89	0.92		44.0
1_ragazzi	0.50		0.97	0.66		31.0
2_adulti	1.00		0.07	0.13		28.0

			Support
Accuracy		0.69	103
Macro avg	0.82		0.64	0.57		103
Weighted avg	0.83		0.69	0.63		103
Bad words rilevate nel fold 2:
Titolo: YTP_013.txt, Bad words: {'merda'}
Titolo: YTP_017.txt, Bad words: {'merda'}

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9862,0.999379,0.427184
2,0.7402,0.813439,0.679612
3,0.6359,0.730358,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.87		0.89	0.88		37.0
1_ragazzi	0.64		0.72	0.67		39.0
2_adulti	0.57		0.44	0.50		27.0

			Support
Accuracy		0.71	103
Macro avg	0.69		0.68	0.68		103
Weighted avg	0.70		0.71	0.70		103
Bad words rilevate nel fold 3:
Titolo: YTP_004.txt, Bad words: {'merda'}

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0078,1.016464,0.417476
2,0.8097,0.782928,0.699029
3,0.5424,0.728135,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.79		0.95	0.86		43.0
1_ragazzi	0.65		0.61	0.63		36.0
2_adulti	0.71		0.50	0.59		24.0

			Support
Accuracy		0.73	103
Macro avg	0.71		0.69	0.69		103
Weighted avg	0.72		0.73	0.72		103
Bad words rilevate nel fold 4:
Titolo: YTP_001.txt, Bad words: {'merda'}
Titolo: YTP_002.txt, Bad words: {'merda'}
Titolo: YTP_010.txt, Bad words: {'merda'}

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0186,0.938617,0.637255
2,0.788,0.701932,0.735294
3,0.6831,0.787005,0.598039


		Precision	Recall	F1-score	Support
0_bambini	0.81		0.87	0.84		39.0
1_ragazzi	0.71		0.89	0.79		38.0
2_adulti	1.00		0.48	0.65		25.0

			Support
Accuracy		0.78	102
Macro avg	0.84		0.75	0.76		102
Weighted avg	0.82		0.78	0.77		102
Bad words rilevate nel fold 5:
Titolo: YTP_006.txt, Bad words: {'merda'}
Titolo: YTP_021.txt, Bad words: {'merda'}
Titolo: YTP_022.txt, Bad words: {'merda'}
Titolo: filmadulti48.txt, Bad words: {'merda'}
Titolo: filmadulti44.txt, Bad words: {'merda'}

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.85		0.91	0.88		209.0
1_ragazzi	0.64		0.80	0.71		179.0
2_adulti	0.86		0.40	0.51		126.0

Macro avg	0.78		0.70	0.70
Weighted avg	0.78		0.74	0.72

Accuratezza media su 5 fold: 0.74

Testi con cambiamento di etichetta a causa di bad words (22 testi):
Titolo: YTP_003.txt
Testo originale: Gianni Morandi prego maestro
Vorrei che
cantassimo tutti insieme
fratelli d'Italia
vi piscio in testa
[Musica]
dio bestia
Dov'è la mia droga e porca ma Madonna
che 

# mbert cased eng

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Tokenizer di mBERT (cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0545,0.866264,0.577778
2,0.4753,0.49627,0.7
3,0.3058,0.363715,0.833333


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.64		0.73	0.68		22.0
2_adulti	0.75		0.67	0.71		27.0

			Support
Accuracy		0.83	90
Macro avg	0.80		0.80	0.80		90
Weighted avg	0.84		0.83	0.83		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.007,0.882665,0.775281
2,0.4786,0.452766,0.797753
3,0.5331,0.305367,0.876404


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.70		1.00	0.83		26.0
2_adulti	1.00		0.42	0.59		19.0

			Support
Accuracy		0.88	89
Macro avg	0.90		0.81	0.81		89
Weighted avg	0.91		0.88	0.86		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0187,0.905619,0.752809
2,0.5029,0.538341,0.786517
3,0.378,0.383129,0.820225


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		38.0
1_ragazzi	0.67		1.00	0.80		32.0
2_adulti	1.00		0.16	0.27		19.0

			Support
Accuracy		0.82	89
Macro avg	0.89		0.72	0.69		89
Weighted avg	0.88		0.82	0.77		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0177,0.935777,0.460674
2,0.3782,0.367005,0.865169
3,0.3962,0.294872,0.88764


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.77		1.00	0.87		33.0
2_adulti	1.00		0.38	0.55		16.0

			Support
Accuracy		0.89	89
Macro avg	0.92		0.79	0.80		89
Weighted avg	0.91		0.89	0.87		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0271,0.901314,0.595506
2,0.5158,0.487314,0.820225
3,0.3694,0.274374,0.910112


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.82		0.97	0.89		33.0
2_adulti	0.92		0.63	0.75		19.0

			Support
Accuracy		0.91	89
Macro avg	0.91		0.87	0.88		89
Weighted avg	0.92		0.91	0.91		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.72		0.94	0.81		146.0
2_adulti	0.93		0.45	0.57		100.0

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.72		0.94	0.81		146.0
2_adulti	0.93		0.45	0.57		100.0

Macro avg	0.88		0.80	0.80
Weighted avg	0.89		0.87	0.85

Accuratezza media su 5 fold: 0.87


# mbert cased eng badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Caricamento delle bad words in italiano
bad_words_path = '/content/drive/My Drive/badwords_eng_new.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")
    bad_words = set()  # Definisci un set vuoto in caso di errore per evitare crash

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e abbassa il testo
    return words.intersection(bad_words)

# Tokenizer di mBERT (cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

# Lista per memorizzare i testi in cui l'etichetta è cambiata
texts_with_label_changes = []

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti" se ci sono bad words
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))

            # Memorizza i testi modificati
            texts_with_label_changes.append({
                "Title": titles[test_index[i]],
                "Original Text": text,
                "Detected Bad Words": list(detected_bad_words),
                "Original Label": original_label,  # Etichetta originale (0 = bambini)
                "New Label": 2  # Nuova etichetta (2 = adulti)
            })
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

        # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

        # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {bad_words}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Stampa dei testi in cui l'etichetta è stata cambiata a causa delle bad words
if texts_with_label_changes:
    print(f"\nTesti con cambiamento di etichetta a causa di bad words ({len(texts_with_label_changes)} testi):")
    for item in texts_with_label_changes:
        print(f"Titolo: {item['Title']}")
        print(f"Testo originale: {item['Original Text']}")
        print(f"Parole inappropriate rilevate: {', '.join(item['Detected Bad Words'])}")
        print(f"Etichetta originale: {item['Original Label']}, Nuova etichetta: {item['New Label']}")
        print("-" * 80)


Mounted at /content/drive


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0389,1.00467,0.622222
2,0.4205,0.491652,0.711111
3,0.2565,0.401443,0.833333


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.65		0.68	0.67		22.0
2_adulti	0.73		0.70	0.72		27.0

			Support
Accuracy		0.83	90
Macro avg	0.79		0.80	0.79		90
Weighted avg	0.83		0.83	0.83		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.007,0.882665,0.775281
2,0.4786,0.452766,0.797753
3,0.5331,0.305367,0.876404


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.70		1.00	0.83		26.0
2_adulti	1.00		0.42	0.59		19.0

			Support
Accuracy		0.88	89
Macro avg	0.90		0.81	0.81		89
Weighted avg	0.91		0.88	0.86		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0187,0.905619,0.752809
2,0.5029,0.538341,0.786517
3,0.378,0.383129,0.820225


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		38.0
1_ragazzi	0.67		1.00	0.80		32.0
2_adulti	1.00		0.16	0.27		19.0

			Support
Accuracy		0.82	89
Macro avg	0.89		0.72	0.69		89
Weighted avg	0.88		0.82	0.77		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0177,0.935777,0.460674
2,0.3782,0.367005,0.865169
3,0.3962,0.294872,0.88764


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.77		1.00	0.87		33.0
2_adulti	1.00		0.38	0.55		16.0

			Support
Accuracy		0.89	89
Macro avg	0.92		0.79	0.80		89
Weighted avg	0.91		0.89	0.87		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0271,0.901314,0.595506
2,0.5158,0.487314,0.820225
3,0.3694,0.274374,0.910112


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.82		0.97	0.89		33.0
2_adulti	0.92		0.63	0.75		19.0

			Support
Accuracy		0.91	89
Macro avg	0.91		0.87	0.88		89
Weighted avg	0.92		0.91	0.91		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.72		0.93	0.81		146.0
2_adulti	0.93		0.46	0.58		100.0

Macro avg	0.88		0.80	0.80
Weighted avg	0.89		0.87	0.85

Accuratezza media su 5 fold: 0.87


# BERT

# bert uncased ita

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer di BERT (inglese)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8999,0.878468,0.563107
2,0.7965,0.694538,0.747573
3,0.7651,0.728509,0.708738


		Precision	Recall	F1-score	Support
0_bambini	0.87		0.89	0.88		46.0
1_ragazzi	0.62		0.86	0.72		35.0
2_adulti	0.75		0.27	0.40		22.0

			Support
Accuracy		0.75	103
Macro avg	0.75		0.67	0.67		103
Weighted avg	0.76		0.75	0.72		103

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9519,0.863848,0.631068
2,0.782,0.792349,0.640777
3,0.6325,0.651726,0.747573


		Precision	Recall	F1-score	Support
0_bambini	0.93		0.93	0.93		44.0
1_ragazzi	0.55		0.87	0.68		31.0
2_adulti	0.90		0.32	0.47		28.0

			Support
Accuracy		0.75	103
Macro avg	0.79		0.71	0.69		103
Weighted avg	0.81		0.75	0.73		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9078,0.857335,0.601942
2,0.8294,0.859559,0.582524
3,0.5765,0.710907,0.68932


		Precision	Recall	F1-score	Support
0_bambini	0.85		0.89	0.87		37.0
1_ragazzi	0.61		0.59	0.60		39.0
2_adulti	0.58		0.56	0.57		27.0

			Support
Accuracy		0.69	103
Macro avg	0.68		0.68	0.68		103
Weighted avg	0.68		0.69	0.69		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8921,0.865728,0.563107
2,0.7571,0.738874,0.737864
3,0.5949,0.645248,0.737864


		Precision	Recall	F1-score	Support
0_bambini	0.83		0.91	0.87		43.0
1_ragazzi	0.65		0.86	0.74		36.0
2_adulti	0.75		0.25	0.38		24.0

			Support
Accuracy		0.74	103
Macro avg	0.74		0.67	0.66		103
Weighted avg	0.75		0.74	0.71		103

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.929,0.86462,0.647059
2,0.8337,0.810057,0.637255
3,0.6691,0.720672,0.627451


		Precision	Recall	F1-score	Support
0_bambini	0.89		0.87	0.88		39.0
1_ragazzi	0.52		0.39	0.45		38.0
2_adulti	0.43		0.60	0.50		25.0

			Support
Accuracy		0.63	102
Macro avg	0.61		0.62	0.61		102
Weighted avg	0.64		0.63	0.63		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.87		0.90	0.89		209.0
1_ragazzi	0.59		0.71	0.64		179.0
2_adulti	0.68		0.40	0.46		126.0

Macro avg	0.71		0.67	0.66
Weighted avg	0.73		0.71	0.70

Accuratezza media su 5 fold: 0.71


# Bert uncased eng

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Tokenizer di BERT (inglese)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9862,0.827604,0.711111
2,0.4894,0.497172,0.811111
3,0.2798,0.420436,0.866667


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.67		0.91	0.77		22.0
2_adulti	0.89		0.63	0.74		27.0

			Support
Accuracy		0.87	90
Macro avg	0.85		0.85	0.84		90
Weighted avg	0.89		0.87	0.87		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9309,0.776423,0.752809
2,0.5392,0.466378,0.820225
3,0.3955,0.392092,0.831461


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.64		0.96	0.77		26.0
2_adulti	0.83		0.26	0.40		19.0

			Support
Accuracy		0.83	89
Macro avg	0.82		0.74	0.72		89
Weighted avg	0.86		0.83	0.80		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9388,0.807679,0.707865
2,0.5094,0.479157,0.876404
3,0.3851,0.381991,0.853933


		Precision	Recall	F1-score	Support
0_bambini	0.97		1.00	0.99		38.0
1_ragazzi	0.73		0.94	0.82		32.0
2_adulti	0.89		0.42	0.57		19.0

			Support
Accuracy		0.85	89
Macro avg	0.86		0.79	0.79		89
Weighted avg	0.87		0.85	0.84		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9344,0.760693,0.764045
2,0.5089,0.468639,0.842697
3,0.4252,0.368519,0.88764


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.77		1.00	0.87		33.0
2_adulti	1.00		0.38	0.55		16.0

			Support
Accuracy		0.89	89
Macro avg	0.92		0.79	0.80		89
Weighted avg	0.91		0.89	0.87		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9323,0.80734,0.764045
2,0.5186,0.483763,0.876404
3,0.3826,0.296617,0.910112


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.84		0.94	0.89		33.0
2_adulti	0.87		0.68	0.76		19.0

			Support
Accuracy		0.91	89
Macro avg	0.90		0.87	0.88		89
Weighted avg	0.91		0.91	0.91		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.99		1.00	1.00		200.0
1_ragazzi	0.73		0.95	0.82		146.0
2_adulti	0.90		0.47	0.60		100.0

Macro avg	0.87		0.81	0.81
Weighted avg	0.89		0.87	0.86

Accuratezza media su 5 fold: 0.87


# Bert cased eng

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Tokenizer di BERT (versione cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer (versione cased)
    model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9787,0.901758,0.7
2,0.4359,0.464189,0.777778
3,0.2636,0.369538,0.855556


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.67		0.82	0.73		22.0
2_adulti	0.82		0.67	0.73		27.0

			Support
Accuracy		0.86	90
Macro avg	0.83		0.83	0.82		90
Weighted avg	0.86		0.86	0.86		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9903,0.833634,0.786517
2,0.4146,0.384708,0.808989
3,0.3883,0.331041,0.853933


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.67		1.00	0.80		26.0
2_adulti	1.00		0.32	0.48		19.0

			Support
Accuracy		0.85	89
Macro avg	0.89		0.77	0.76		89
Weighted avg	0.90		0.85	0.83		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9871,0.827489,0.764045
2,0.4005,0.404945,0.842697
3,0.3262,0.33417,0.853933


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		38.0
1_ragazzi	0.71		1.00	0.83		32.0
2_adulti	1.00		0.32	0.48		19.0

			Support
Accuracy		0.85	89
Macro avg	0.90		0.77	0.77		89
Weighted avg	0.90		0.85	0.83		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9921,0.817409,0.808989
2,0.3876,0.388015,0.853933
3,0.4226,0.299795,0.876404


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.76		0.97	0.85		33.0
2_adulti	0.86		0.38	0.52		16.0

			Support
Accuracy		0.88	89
Macro avg	0.87		0.78	0.79		89
Weighted avg	0.89		0.88	0.86		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9913,0.821279,0.786517
2,0.4041,0.356052,0.910112
3,0.358,0.27788,0.88764


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.87		0.82	0.84		33.0
2_adulti	0.71		0.79	0.75		19.0

			Support
Accuracy		0.89	89
Macro avg	0.86		0.87	0.86		89
Weighted avg	0.89		0.89	0.89		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.74		0.92	0.81		146.0
2_adulti	0.88		0.49	0.59		100.0

Macro avg	0.87		0.80	0.80
Weighted avg	0.89		0.87	0.85

Accuratezza media su 5 fold: 0.87


# Bert cased eng badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/Esperimento_eng'

# Caricamento delle bad words in inglese
bad_words_path = '/content/drive/My Drive/badwords_eng_new.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e abbassa il testo
    return words.intersection(bad_words)

# Tokenizer di BERT (versione cased)
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer (versione cased)
    model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti"
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

    # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {bad_words}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9787,0.901758,0.7
2,0.4359,0.464189,0.777778
3,0.2636,0.369538,0.855556


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		41.0
1_ragazzi	0.67		0.82	0.73		22.0
2_adulti	0.82		0.67	0.73		27.0

			Support
Accuracy		0.86	90
Macro avg	0.83		0.83	0.82		90
Weighted avg	0.86		0.86	0.86		90

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9903,0.833634,0.786517
2,0.4146,0.384708,0.808989
3,0.3883,0.331041,0.853933


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		44.0
1_ragazzi	0.67		1.00	0.80		26.0
2_adulti	1.00		0.32	0.48		19.0

			Support
Accuracy		0.85	89
Macro avg	0.89		0.77	0.76		89
Weighted avg	0.90		0.85	0.83		89

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9871,0.827489,0.764045
2,0.4005,0.404945,0.842697
3,0.3262,0.33417,0.853933


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		38.0
1_ragazzi	0.71		1.00	0.83		32.0
2_adulti	1.00		0.32	0.48		19.0

			Support
Accuracy		0.85	89
Macro avg	0.90		0.77	0.77		89
Weighted avg	0.90		0.85	0.83		89

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9921,0.817409,0.808989
2,0.3876,0.388015,0.853933
3,0.4226,0.299795,0.876404


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		40.0
1_ragazzi	0.76		0.97	0.85		33.0
2_adulti	0.86		0.38	0.52		16.0

			Support
Accuracy		0.88	89
Macro avg	0.87		0.78	0.79		89
Weighted avg	0.89		0.88	0.86		89

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.9913,0.821279,0.786517
2,0.4041,0.356052,0.910112
3,0.358,0.27788,0.88764


		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		37.0
1_ragazzi	0.87		0.82	0.84		33.0
2_adulti	0.71		0.79	0.75		19.0

			Support
Accuracy		0.89	89
Macro avg	0.86		0.87	0.86		89
Weighted avg	0.89		0.89	0.89		89

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	1.00		1.00	1.00		200.0
1_ragazzi	0.74		0.92	0.81		146.0
2_adulti	0.88		0.49	0.59		100.0

Macro avg	0.87		0.80	0.80
Weighted avg	0.89		0.87	0.85

Accuratezza media su 5 fold: 0.87


# Bert cased ita

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer di BERT (versione cased per inglese)
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1462,1.052214,0.466019
2,0.8026,0.733716,0.757282
3,0.6597,1.024455,0.592233


		Precision	Recall	F1-score	Support
0_bambini	0.77		0.96	0.85		46.0
1_ragazzi	0.81		0.71	0.76		35.0
2_adulti	0.60		0.41	0.49		22.0

			Support
Accuracy		0.76	103
Macro avg	0.73		0.69	0.70		103
Weighted avg	0.75		0.76	0.74		103

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1194,1.08331,0.38835
2,0.7586,0.715793,0.728155
3,0.6261,0.717078,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.82		0.95	0.88		44.0
1_ragazzi	0.59		0.87	0.70		31.0
2_adulti	1.00		0.21	0.35		28.0

			Support
Accuracy		0.73	103
Macro avg	0.80		0.68	0.65		103
Weighted avg	0.80		0.73	0.68		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1373,1.054693,0.378641
2,0.8246,0.85988,0.660194
3,0.6322,0.773743,0.679612


		Precision	Recall	F1-score	Support
0_bambini	0.81		0.70	0.75		37.0
1_ragazzi	0.63		0.82	0.71		39.0
2_adulti	0.60		0.44	0.51		27.0

			Support
Accuracy		0.68	103
Macro avg	0.68		0.66	0.66		103
Weighted avg	0.69		0.68	0.67		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1367,1.064904,0.359223
2,0.7578,0.804839,0.718447
3,0.5845,0.704514,0.718447


		Precision	Recall	F1-score	Support
0_bambini	0.81		0.79	0.80		43.0
1_ragazzi	0.64		0.89	0.74		36.0
2_adulti	0.73		0.33	0.46		24.0

			Support
Accuracy		0.72	103
Macro avg	0.73		0.67	0.67		103
Weighted avg	0.73		0.72	0.70		103

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.094,1.014906,0.578431
2,0.7708,1.011697,0.509804
3,0.6752,0.804774,0.598039


		Precision	Recall	F1-score	Support
0_bambini	0.74		0.95	0.83		39.0
1_ragazzi	0.62		0.13	0.22		38.0
2_adulti	0.43		0.76	0.55		25.0

			Support
Accuracy		0.60	102
Macro avg	0.60		0.61	0.53		102
Weighted avg	0.62		0.60	0.53		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.79		0.87	0.82		209.0
1_ragazzi	0.66		0.69	0.63		179.0
2_adulti	0.67		0.43	0.47		126.0

Macro avg	0.71		0.66	0.64
Weighted avg	0.72		0.70	0.67

Accuratezza media su 5 fold: 0.70


# Bert cased ita badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Caricamento delle bad words in italiano
bad_words_path = '/content/drive/My Drive/bad_words.txt'  # Percorso corretto
try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")
    bad_words = set()  # Set vuoto in caso di errore

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e abbassa il testo
    return words.intersection(bad_words)

# Tokenizer di BERT (versione cased per inglese)
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Lista per memorizzare i testi in cui l'etichetta è cambiata
texts_with_label_changes = []

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti"
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))

            # Memorizza i testi modificati
            texts_with_label_changes.append({
                "Title": titles[test_index[i]],
                "Original Text": text,
                "Detected Bad Words": list(detected_bad_words),
                "Original Label": original_label,  # Etichetta originale (0 = bambini)
                "New Label": 2  # Nuova etichetta (2 = adulti)
            })
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

    # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {', '.join(bad_words)}")
# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Stampa dei testi in cui l'etichetta è stata cambiata a causa delle bad words
if texts_with_label_changes:
    print(f"\nTesti con cambiamento di etichetta a causa di bad words ({len(texts_with_label_changes)} testi):")
    for item in texts_with_label_changes:
        print(f"Titolo: {item['Title']}")
        print(f"Testo originale: {item['Original Text']}")
        print(f"Parole inappropriate rilevate: {', '.join(item['Detected Bad Words'])}")
        print(f"Etichetta originale: {item['Original Label']}, Nuova etichetta: {item['New Label']}")
        print("-" * 80)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1462,1.052214,0.466019
2,0.8026,0.733716,0.757282
3,0.6597,1.024455,0.592233


		Precision	Recall	F1-score	Support
0_bambini	0.86		0.96	0.91		46.0
1_ragazzi	0.81		0.71	0.76		35.0
2_adulti	0.71		0.68	0.70		22.0

			Support
Accuracy		0.82	103
Macro avg	0.79		0.78	0.79		103
Weighted avg	0.81		0.82	0.81		103
Bad words rilevate nel fold 1:
Titolo: YTP_003.txt, Bad words: troia, cazzetto, piscio, cazzata
Titolo: YTP_012.txt, Bad words: bastardo, cazzi, cazzo
Titolo: YTP_015.txt, Bad words: bastardo, scopata, minchia, cazzo
Titolo: YTP_019.txt, Bad words: culo, affanculo, cazzo
Titolo: filmadulti41.txt, Bad words: culo, puttana
Titolo: filmadulti42.txt, Bad words: idiota, stronzo, merda

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1194,1.08331,0.38835
2,0.7586,0.715793,0.728155
3,0.6261,0.717078,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.88		0.95	0.91		44.0
1_ragazzi	0.59		0.87	0.70		31.0
2_adulti	1.00		0.32	0.49		28.0

			Support
Accuracy		0.76	103
Macro avg	0.82		0.72	0.70		103
Weighted avg	0.82		0.76	0.73		103
Bad words rilevate nel fold 2:
Titolo: YTP_013.txt, Bad words: merda
Titolo: YTP_017.txt, Bad words: merda
Titolo: YTP_018.txt, Bad words: merda

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1373,1.054693,0.378641
2,0.8246,0.85988,0.660194
3,0.6322,0.773743,0.679612


		Precision	Recall	F1-score	Support
0_bambini	0.81		0.70	0.75		37.0
1_ragazzi	0.63		0.82	0.71		39.0
2_adulti	0.60		0.44	0.51		27.0

			Support
Accuracy		0.68	103
Macro avg	0.68		0.66	0.66		103
Weighted avg	0.69		0.68	0.67		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1367,1.064904,0.359223
2,0.7578,0.804839,0.718447
3,0.5845,0.704514,0.718447


		Precision	Recall	F1-score	Support
0_bambini	0.85		0.79	0.82		43.0
1_ragazzi	0.64		0.89	0.74		36.0
2_adulti	0.77		0.42	0.54		24.0

			Support
Accuracy		0.74	103
Macro avg	0.75		0.70	0.70		103
Weighted avg	0.76		0.74	0.73		103
Bad words rilevate nel fold 4:
Titolo: YTP_002.txt, Bad words: merda
Titolo: YTP_010.txt, Bad words: merda

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.094,1.014906,0.578431
2,0.7708,1.011697,0.509804
3,0.6752,0.804774,0.598039


		Precision	Recall	F1-score	Support
0_bambini	0.79		0.95	0.86		39.0
1_ragazzi	0.62		0.13	0.22		38.0
2_adulti	0.47		0.88	0.61		25.0

			Support
Accuracy		0.63	102
Macro avg	0.63		0.65	0.56		102
Weighted avg	0.65		0.63	0.56		102
Bad words rilevate nel fold 5:
Titolo: YTP_006.txt, Bad words: merda
Titolo: YTP_021.txt, Bad words: merda
Titolo: YTP_022.txt, Bad words: merda

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.84		0.87	0.85		209.0
1_ragazzi	0.66		0.69	0.63		179.0
2_adulti	0.71		0.55	0.57		126.0

Macro avg	0.73		0.70	0.68
Weighted avg	0.75		0.72	0.70

Accuratezza media su 5 fold: 0.72

Testi con cambiamento di etichetta a causa di bad words (14 testi):
Titolo: YTP_003.txt
Testo originale: Gianni Morandi prego maestro
Vorrei che
cantassimo tutti insieme
fratelli d'Italia
vi piscio in testa
[Musica]
dio bestia
Dov'è la mia droga e porca ma Madonna
che schiava chiava
Dio porco
[Musica]
Ciao stronza ho deciso di scriverti una
lettera voglio farti una premess

# dbmdz

# dbmdz ita cased

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer di BERT specifico per l'italiano
tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-base-italian-cased')  # Puoi usare anche la versione uncased

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('dbmdz/bert-base-italian-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


tokenizer_config.json:   0%|          | 0.00/59.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/235k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/433 [00:00<?, ?B/s]




Fold 1




model.safetensors:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0159,0.977863,0.679612
2,0.7746,0.703113,0.718447
3,0.6582,0.859408,0.621359


		Precision	Recall	F1-score	Support
0_bambini	0.77		0.96	0.85		46.0
1_ragazzi	0.64		0.80	0.71		35.0
2_adulti	1.00		0.09	0.17		22.0

			Support
Accuracy		0.72	103
Macro avg	0.80		0.62	0.58		103
Weighted avg	0.77		0.72	0.66		103

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0151,0.96094,0.650485
2,0.7401,0.685977,0.718447
3,0.6147,0.679428,0.728155


		Precision	Recall	F1-score	Support
0_bambini	0.95		0.86	0.90		44.0
1_ragazzi	0.55		0.97	0.70		31.0
2_adulti	0.88		0.25	0.39		28.0

			Support
Accuracy		0.73	103
Macro avg	0.79		0.69	0.66		103
Weighted avg	0.81		0.73	0.70		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.014,0.99407,0.640777
2,0.718,0.771715,0.669903
3,0.5813,0.750979,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.79		0.84	0.82		37.0
1_ragazzi	0.62		0.79	0.70		39.0
2_adulti	0.71		0.37	0.49		27.0

			Support
Accuracy		0.70	103
Macro avg	0.71		0.67	0.67		103
Weighted avg	0.71		0.70	0.68		103

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0099,0.981162,0.601942
2,0.6861,0.811373,0.68932
3,0.6023,0.756578,0.68932


		Precision	Recall	F1-score	Support
0_bambini	0.78		0.84	0.81		43.0
1_ragazzi	0.61		0.83	0.71		36.0
2_adulti	0.62		0.21	0.31		24.0

			Support
Accuracy		0.69	103
Macro avg	0.67		0.63	0.61		103
Weighted avg	0.69		0.69	0.66		103

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0402,0.965988,0.666667
2,0.7309,0.742987,0.676471
3,0.6678,0.684118,0.754902


		Precision	Recall	F1-score	Support
0_bambini	0.79		0.95	0.86		39.0
1_ragazzi	0.78		0.66	0.71		38.0
2_adulti	0.65		0.60	0.62		25.0

			Support
Accuracy		0.75	102
Macro avg	0.74		0.74	0.73		102
Weighted avg	0.75		0.75	0.75		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.82		0.89	0.85		209.0
1_ragazzi	0.64		0.81	0.70		179.0
2_adulti	0.77		0.30	0.40		126.0

Macro avg	0.74		0.67	0.65
Weighted avg	0.75		0.72	0.69

Accuratezza media su 5 fold: 0.72


# dbmdz ita cased badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Caricamento delle bad words in italiano
bad_words_path = '/content/drive/My Drive/bad_words.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")
    bad_words = set()  # Definisci un set vuoto in caso di errore per evitare crash

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e abbassa il testo
    return words.intersection(bad_words)

# Tokenizer di BERT specifico per l'italiano
tokenizer = BertTokenizer.from_pretrained('dbmdz/bert-base-italian-cased')  # Puoi usare anche la versione uncased

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Lista per memorizzare i testi in cui l'etichetta è cambiata
texts_with_label_changes = []

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )

    # Inizializzazione del modello e del trainer
    model = BertForSequenceClassification.from_pretrained('dbmdz/bert-base-italian-cased', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti"
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))

            # Memorizza i testi modificati
            texts_with_label_changes.append({
                "Title": titles[test_index[i]],
                "Original Text": text,
                "Detected Bad Words": list(detected_bad_words),
                "Original Label": original_label,  # Etichetta originale (0 = bambini)
                "New Label": 2  # Nuova etichetta (2 = adulti)
            })
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

    # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {bad_words}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Stampa dei testi in cui l'etichetta è stata cambiata a causa delle bad words
if texts_with_label_changes:
    print(f"\nTesti con cambiamento di etichetta a causa di bad words ({len(texts_with_label_changes)} testi):")
    for item in texts_with_label_changes:
        print(f"Titolo: {item['Title']}")
        print(f"Testo originale: {item['Original Text']}")
        print(f"Parole inappropriate rilevate: {', '.join(item['Detected Bad Words'])}")
        print(f"Etichetta originale: {item['Original Label']}, Nuova etichetta: {item['New Label']}")
        print("-" * 80)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Fold 1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0159,0.977863,0.679612
2,0.7746,0.703113,0.718447
3,0.6582,0.859408,0.621359


		Precision	Recall	F1-score	Support
0_bambini	0.83		0.96	0.89		46.0
1_ragazzi	0.64		0.80	0.71		35.0
2_adulti	1.00		0.27	0.43		22.0

			Support
Accuracy		0.76	103
Macro avg	0.82		0.68	0.68		103
Weighted avg	0.80		0.76	0.73		103
Bad words rilevate nel fold 1:
Titolo: YTP_003.txt, Bad words: {'troia', 'cazzetto', 'piscio', 'cazzata'}
Titolo: YTP_012.txt, Bad words: {'bastardo', 'cazzi', 'cazzo'}
Titolo: YTP_015.txt, Bad words: {'bastardo', 'scopata', 'minchia', 'cazzo'}
Titolo: YTP_019.txt, Bad words: {'culo', 'affanculo', 'cazzo'}

Fold 2


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0151,0.96094,0.650485
2,0.7401,0.685977,0.718447
3,0.6147,0.679428,0.728155


		Precision	Recall	F1-score	Support
0_bambini	0.95		0.86	0.90		44.0
1_ragazzi	0.55		0.97	0.70		31.0
2_adulti	0.88		0.25	0.39		28.0

			Support
Accuracy		0.73	103
Macro avg	0.79		0.69	0.66		103
Weighted avg	0.81		0.73	0.70		103

Fold 3


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.014,0.99407,0.640777
2,0.718,0.771715,0.669903
3,0.5813,0.750979,0.699029


		Precision	Recall	F1-score	Support
0_bambini	0.86		0.84	0.85		37.0
1_ragazzi	0.62		0.79	0.70		39.0
2_adulti	0.76		0.48	0.59		27.0

			Support
Accuracy		0.73	103
Macro avg	0.75		0.70	0.71		103
Weighted avg	0.74		0.73	0.72		103
Bad words rilevate nel fold 3:
Titolo: YTP_004.txt, Bad words: {'culo', 'cazzo'}
Titolo: YTP_011.txt, Bad words: {'culo', 'cazzo'}
Titolo: YTP_014.txt, Bad words: {'culo'}

Fold 4


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0099,0.981162,0.601942
2,0.6861,0.811373,0.68932
3,0.6023,0.756578,0.68932


		Precision	Recall	F1-score	Support
0_bambini	0.82		0.84	0.83		43.0
1_ragazzi	0.61		0.83	0.71		36.0
2_adulti	0.70		0.29	0.41		24.0

			Support
Accuracy		0.71	103
Macro avg	0.71		0.65	0.65		103
Weighted avg	0.72		0.71	0.69		103
Bad words rilevate nel fold 4:
Titolo: YTP_002.txt, Bad words: {'culo'}
Titolo: YTP_009.txt, Bad words: {'culo'}

Fold 5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-italian-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0402,0.965988,0.666667
2,0.7309,0.742987,0.676471
3,0.6678,0.684118,0.754902


		Precision	Recall	F1-score	Support
0_bambini	0.80		0.95	0.87		39.0
1_ragazzi	0.78		0.66	0.71		38.0
2_adulti	0.67		0.64	0.65		25.0

			Support
Accuracy		0.76	102
Macro avg	0.75		0.75	0.75		102
Weighted avg	0.76		0.76	0.76		102
Bad words rilevate nel fold 5:
Titolo: YTP_022.txt, Bad words: {'culo'}

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.85		0.89	0.87		209.0
1_ragazzi	0.64		0.81	0.70		179.0
2_adulti	0.80		0.39	0.49		126.0

Macro avg	0.76		0.70	0.69
Weighted avg	0.77		0.74	0.72

Accuratezza media su 5 fold: 0.74

Testi con cambiamento di etichetta a causa di bad words (10 testi):
Titolo: YTP_003.txt
Testo originale: Gianni Morandi prego maestro
Vorrei che
cantassimo tutti insieme
fratelli d'Italia
vi piscio in testa
[Musica]
dio bestia
Dov'è la mia droga e porca ma Madonna
che schiava chiava
Dio porco
[Musica]
Ciao stronza ho deciso di scriverti una
lettera voglio farti una premessa
ho sesso
Ho deciso che mi viene da piangere
e non sai la gente mi conos

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.82		0.89	0.85		209.0
1_ragazzi	0.64		0.81	0.70		179.0
2_adulti	0.77		0.30	0.40		126.0

Macro avg	0.74		0.67	0.65
Weighted avg	0.75		0.72	0.69

Accuratezza media su 5 fold: 0.72

# ELECTRA

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Tokenizer specifico per ELECTRA in italiano
tokenizer = ElectraTokenizer.from_pretrained('dbmdz/electra-base-italian-xxl-cased-discriminator')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",  # Valutazione a ogni epoca
        save_strategy="no",  # Disabilita il salvataggio dei checkpoint intermedi
        load_best_model_at_end=False  # Non caricare il miglior modello alla fine
    )

    # Inizializzazione del modello e del trainer
    model = ElectraForSequenceClassification.from_pretrained('dbmdz/electra-base-italian-xxl-cased-discriminator', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, predicted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Salva il modello finale
trainer.save_model(output_dir="./final_model")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).





Fold 1


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0525,1.044491,0.592233
2,0.8742,0.766153,0.728155
3,0.6393,0.802128,0.631068


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


		Precision	Recall	F1-score	Support
0_bambini	0.97		0.67	0.79		46.0
1_ragazzi	0.48		0.97	0.64		35.0
2_adulti	0.00		0.00	0.00		22.0

			Support
Accuracy		0.63	103
Macro avg	0.48		0.55	0.48		103
Weighted avg	0.60		0.63	0.57		103

Fold 2


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0637,1.057522,0.61165
2,0.8347,0.791359,0.650485
3,0.5862,0.638863,0.747573


		Precision	Recall	F1-score	Support
0_bambini	0.93		0.86	0.89		44.0
1_ragazzi	0.57		0.94	0.71		31.0
2_adulti	0.91		0.36	0.51		28.0

			Support
Accuracy		0.75	103
Macro avg	0.80		0.72	0.70		103
Weighted avg	0.81		0.75	0.73		103

Fold 3


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0593,1.068272,0.475728
2,0.8537,0.832007,0.650485
3,0.595,0.666384,0.737864


		Precision	Recall	F1-score	Support
0_bambini	0.80		0.97	0.88		37.0
1_ragazzi	0.65		0.85	0.73		39.0
2_adulti	1.00		0.26	0.41		27.0

			Support
Accuracy		0.74	103
Macro avg	0.82		0.69	0.67		103
Weighted avg	0.79		0.74	0.70		103

Fold 4


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0671,1.054812,0.621359
2,0.8569,0.812326,0.669903
3,0.5563,0.605449,0.757282


		Precision	Recall	F1-score	Support
0_bambini	0.81		0.98	0.88		43.0
1_ragazzi	0.68		0.83	0.75		36.0
2_adulti	0.86		0.25	0.39		24.0

			Support
Accuracy		0.76	103
Macro avg	0.78		0.69	0.67		103
Weighted avg	0.78		0.76	0.72		103

Fold 5


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0709,1.054137,0.480392
2,0.8729,0.791923,0.705882
3,0.707,0.71074,0.794118


		Precision	Recall	F1-score	Support
0_bambini	0.75		1.00	0.86		39.0
1_ragazzi	0.80		0.84	0.82		38.0
2_adulti	1.00		0.40	0.57		25.0

			Support
Accuracy		0.79	102
Macro avg	0.85		0.75	0.75		102
Weighted avg	0.83		0.79	0.77		102

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.85		0.90	0.86		209.0
1_ragazzi	0.64		0.89	0.73		179.0
2_adulti	0.75		0.25	0.38		126.0

Macro avg	0.75		0.68	0.66
Weighted avg	0.76		0.73	0.70

Accuratezza media su 5 fold: 0.73


ValueError: You are trying to save a non contiguous tensor: `electra.encoder.layer.0.attention.self.query.weight` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.

# electra con badwords

In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, classification_report
from nltk.tokenize import word_tokenize
import nltk

# Montaggio di Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Scarica risorse di NLTK per la tokenizzazione
nltk.download('punkt')

# Percorso della cartella dei dati su Google Drive
data_path = '/content/drive/My Drive/esperimento'

# Caricamento delle bad words in italiano
bad_words_path = '/content/drive/My Drive/bad_words.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
except Exception as e:
    print(f"Error loading bad words: {e}")
    bad_words = set()  # Definisci un set vuoto in caso di errore per evitare crash

# Funzione per rilevare bad words
def detect_bad_words(text, bad_words):
    words = set(word_tokenize(text.lower()))  # Tokenizza e converte tutto in minuscolo
    return words.intersection(bad_words)

# Tokenizer specifico per ELECTRA in italiano
tokenizer = ElectraTokenizer.from_pretrained('dbmdz/electra-base-italian-xxl-cased-discriminator')

# Classe Dataset
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Funzione per calcolare le metriche
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# Funzione per caricare i dati
def load_data(data_path):
    texts, labels, titles = [], [], []
    label_dict = {'0_bambini': 0, '1_ragazzi': 1, '2_adulti': 2}
    for label, index in label_dict.items():
        folder_path = os.path.join(data_path, label)
        for filename in os.listdir(folder_path):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                texts.append(file.read())
                labels.append(index)
                titles.append(filename)  # Salva il nome del file come titolo
    return texts, labels, titles

# Caricamento dei dati
texts, labels, titles = load_data(data_path)

# Inizializza dizionari per raccogliere i risultati medi
precision_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
recall_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
f1_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}
support_sum = {class_name: 0 for class_name in ['0_bambini', '1_ragazzi', '2_adulti']}

accuracy_per_fold = []
macro_avg_precision_sum, macro_avg_recall_sum, macro_avg_f1_sum = 0, 0, 0
weighted_avg_precision_sum, weighted_avg_recall_sum, weighted_avg_f1_sum = 0, 0, 0

fold = 1

# K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Lista per memorizzare i testi in cui l'etichetta è cambiata
texts_with_label_changes = []

for train_index, test_index in kfold.split(texts):
    print(f'\nFold {fold}')
    fold += 1

    # Divisione in train e test per il fold corrente
    train_texts = [texts[i] for i in train_index]
    test_texts = [texts[i] for i in test_index]
    train_labels = [labels[i] for i in train_index]
    test_labels = [labels[i] for i in test_index]

    # Creazione dei dataset
    train_dataset = TextDataset(train_texts, train_labels, tokenizer)
    test_dataset = TextDataset(test_texts, test_labels, tokenizer)

    # Configurazione dei parametri di training
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",  # Valutazione a ogni epoca
        save_strategy="no",  # Disabilita il salvataggio dei checkpoint intermedi
        load_best_model_at_end=False  # Non caricare il miglior modello alla fine
    )

    # Inizializzazione del modello e del trainer
    model = ElectraForSequenceClassification.from_pretrained('dbmdz/electra-base-italian-xxl-cased-discriminator', num_labels=3)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics
    )

    # Training del modello
    trainer.train()

    # Valutazione sul test del fold corrente
    predictions = trainer.predict(test_dataset)
    predicted_labels = np.argmax(predictions.predictions, axis=1)

    # Aggiungi il controllo delle bad words
    adjusted_labels = []
    detected_bad_words_list = []

    for i, text in enumerate(test_texts):
        detected_bad_words = detect_bad_words(text, bad_words)
        original_label = predicted_labels[i]
        if original_label == 0 and detected_bad_words:
            adjusted_labels.append(2)  # Cambia l'etichetta in "adulti" se ci sono bad words
            detected_bad_words_list.append((titles[test_index[i]], detected_bad_words))

            # Memorizza i testi modificati
            texts_with_label_changes.append({
                "Title": titles[test_index[i]],
                "Original Text": text,
                "Detected Bad Words": list(detected_bad_words),
                "Original Label": original_label,  # Etichetta originale (0 = bambini)
                "New Label": 2  # Nuova etichetta (2 = adulti)
            })
        else:
            adjusted_labels.append(original_label)

    # Calcolo del classification report
    target_names = ['0_bambini', '1_ragazzi', '2_adulti']
    report = classification_report(test_labels, adjusted_labels, target_names=target_names, output_dict=True)

    # Stampa del classification report formattato
    print("\t\tPrecision\tRecall\tF1-score\tSupport")
    for class_name in target_names:
        class_report = report[class_name]
        print(f"{class_name}\t{class_report['precision']:.2f}\t\t{class_report['recall']:.2f}\t{class_report['f1-score']:.2f}\t\t{class_report['support']}")

    print("\n\t\t\tSupport")
    print(f"Accuracy\t\t{report['accuracy']:.2f}\t{len(test_labels)}")
    print(f"Macro avg\t{report['macro avg']['precision']:.2f}\t\t{report['macro avg']['recall']:.2f}\t{report['macro avg']['f1-score']:.2f}\t\t{len(test_labels)}")
    print(f"Weighted avg\t{report['weighted avg']['precision']:.2f}\t\t{report['weighted avg']['recall']:.2f}\t{report['weighted avg']['f1-score']:.2f}\t\t{len(test_labels)}")

    # Somma i valori di precision, recall, f1 e support per ogni classe
    for class_name in target_names:
        precision_sum[class_name] += report[class_name]['precision']
        recall_sum[class_name] += report[class_name]['recall']
        f1_sum[class_name] += report[class_name]['f1-score']
        support_sum[class_name] += report[class_name]['support']

    # Somma per macro avg e weighted avg
    macro_avg_precision_sum += report['macro avg']['precision']
    macro_avg_recall_sum += report['macro avg']['recall']
    macro_avg_f1_sum += report['macro avg']['f1-score']

    weighted_avg_precision_sum += report['weighted avg']['precision']
    weighted_avg_recall_sum += report['weighted avg']['recall']
    weighted_avg_f1_sum += report['weighted avg']['f1-score']

    # Aggiungi l'accuratezza alla lista per calcolare la media finale
    accuracy_per_fold.append(report['accuracy'])

    # Stampa i testi in cui sono state rilevate bad words
    if detected_bad_words_list:
        print(f"Bad words rilevate nel fold {fold - 1}:")
        for title, bad_words in detected_bad_words_list:
            print(f"Titolo: {title}, Bad words: {bad_words}")

# Media dei risultati per ogni classe
print("\nRisultati medi su 5 fold:")
print("\t\tPrecision\tRecall\tF1-score\tSupport")
for class_name in target_names:
    print(f"{class_name}\t{(precision_sum[class_name]/5):.2f}\t\t{(recall_sum[class_name]/5):.2f}\t{(f1_sum[class_name]/5):.2f}\t\t{support_sum[class_name]}")

# Media per Macro avg e Weighted avg
print(f"\nMacro avg\t{(macro_avg_precision_sum/5):.2f}\t\t{(macro_avg_recall_sum/5):.2f}\t{(macro_avg_f1_sum/5):.2f}")
print(f"Weighted avg\t{(weighted_avg_precision_sum/5):.2f}\t\t{(weighted_avg_recall_sum/5):.2f}\t{(weighted_avg_f1_sum/5):.2f}")

# Stampa dell'accuratezza media finale
print(f"\nAccuratezza media su 5 fold: {np.mean(accuracy_per_fold):.2f}")

# Stampa dei testi in cui l'etichetta è stata cambiata a causa delle bad words
if texts_with_label_changes:
    print(f"\nTesti con cambiamento di etichetta a causa di bad words ({len(texts_with_label_changes)} testi):")
    for item in texts_with_label_changes:
        print(f"Titolo: {item['Title']}")
        print(f"Testo originale: {item['Original Text']}")
        print(f"Parole inappropriate rilevate: {', '.join(item['Detected Bad Words'])}")
        print(f"Etichetta originale: {item['Original Label']}, Nuova etichetta: {item['New Label']}")
        print("-" * 80)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Fold 1


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0696,1.046074,0.669903
2,0.8803,0.761209,0.737864
3,0.6297,0.764629,0.718447


		Precision	Recall	F1-score	Support
0_bambini	0.97		0.72	0.82		46.0
1_ragazzi	0.56		0.97	0.71		35.0
2_adulti	1.00		0.36	0.53		22.0

			Support
Accuracy		0.73	103
Macro avg	0.84		0.68	0.69		103
Weighted avg	0.84		0.73	0.72		103
Bad words rilevate nel fold 1:
Titolo: YTP_012.txt, Bad words: {'bastardo', 'cazzi', 'cazzo'}

Fold 2


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0637,1.057522,0.61165
2,0.8347,0.791359,0.650485
3,0.5862,0.638863,0.747573


		Precision	Recall	F1-score	Support
0_bambini	0.95		0.86	0.90		44.0
1_ragazzi	0.57		0.94	0.71		31.0
2_adulti	0.92		0.39	0.55		28.0

			Support
Accuracy		0.76	103
Macro avg	0.81		0.73	0.72		103
Weighted avg	0.83		0.76	0.75		103
Bad words rilevate nel fold 2:
Titolo: YTP_007.txt, Bad words: {'cazzi'}

Fold 3


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0593,1.068272,0.475728
2,0.8537,0.832007,0.650485
3,0.595,0.666384,0.737864


		Precision	Recall	F1-score	Support
0_bambini	0.82		0.97	0.89		37.0
1_ragazzi	0.65		0.85	0.73		39.0
2_adulti	1.00		0.30	0.46		27.0

			Support
Accuracy		0.75	103
Macro avg	0.82		0.71	0.69		103
Weighted avg	0.80		0.75	0.72		103
Bad words rilevate nel fold 3:
Titolo: YTP_011.txt, Bad words: {'cazzi'}

Fold 4


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0671,1.054812,0.621359
2,0.8569,0.812326,0.669903
3,0.5563,0.605449,0.757282


		Precision	Recall	F1-score	Support
0_bambini	0.82		0.98	0.89		43.0
1_ragazzi	0.68		0.83	0.75		36.0
2_adulti	0.88		0.29	0.44		24.0

			Support
Accuracy		0.77	103
Macro avg	0.79		0.70	0.69		103
Weighted avg	0.79		0.77	0.74		103
Bad words rilevate nel fold 4:
Titolo: YTP_001.txt, Bad words: {'cazzi'}

Fold 5


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at dbmdz/electra-base-italian-xxl-cased-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0709,1.054137,0.480392
2,0.8729,0.791923,0.705882
3,0.707,0.71074,0.794118


		Precision	Recall	F1-score	Support
0_bambini	0.76		1.00	0.87		39.0
1_ragazzi	0.80		0.84	0.82		38.0
2_adulti	1.00		0.44	0.61		25.0

			Support
Accuracy		0.80	102
Macro avg	0.85		0.76	0.77		102
Weighted avg	0.84		0.80	0.79		102
Bad words rilevate nel fold 5:
Titolo: YTP_016.txt, Bad words: {'cazzi'}

Risultati medi su 5 fold:
		Precision	Recall	F1-score	Support
0_bambini	0.87		0.91	0.88		209.0
1_ragazzi	0.65		0.89	0.74		179.0
2_adulti	0.96		0.36	0.52		126.0

Macro avg	0.82		0.72	0.71
Weighted avg	0.82		0.76	0.74

Accuratezza media su 5 fold: 0.76

Testi con cambiamento di etichetta a causa di bad words (5 testi):
Titolo: YTP_012.txt
Testo originale: [Musica]
[Musica]
non è possibile non è matematicamente
Basta basta basta
Ecco gli amici di Peppa
[rumore di esplosione] 
Ognuno di loro ha una letterina per Babbo Natale
[Musica]
che cazzo avete chiesto a Babbo Bastardo?
una navicella aziale
una chitarra a giocare
Totò
una trombetta
una racchetta con la racchetta
George ha chiesto un
coltel

In [None]:
# Ricarica il file delle bad words
bad_words_path = '/content/drive/My Drive/badwords_ita.txt'

try:
    with open(bad_words_path, 'r', encoding='utf-8') as file:
        bad_words = set(line.strip() for line in file)
        print(f"Bad words caricate correttamente: {len(bad_words)} parole.")
except Exception as e:
    print(f"Error loading bad words: {e}")
    bad_words = set()  # Definisci un set vuoto in caso di errore per evitare crash


Error loading bad words: [Errno 2] No such file or directory: '/content/drive/My Drive/badwords_ita.txt'
