In [None]:
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import os
import torch
import torch.nn as nn
from torchvision.models import resnet18
import torch.optim as optim
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# problema librerie che usano OpenMP, due versioni di OpenMP sono caricate nello stesso processo, ad esempio da PyTorch e da NumPy

In [2]:
dataset_path = os.path.join(os.getcwd(), "DAML_project", 'data_images')
print(dataset_path)

/home/user/DAML/DAML_project/data_images


In [3]:
# Mean e std per immagini RGB normalizzate su [-1, 1]
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# per fare data augmentation utiliziammo transforms.Compose di torchvision, facciamo data augmentation solo ai set di train

In [4]:
train_dataset = datasets.ImageFolder(root=os.path.join(dataset_path, "train"), transform=train_transform)
val_dataset = datasets.ImageFolder(root=os.path.join(dataset_path, "valid"), transform=val_test_transform)
test_dataset = datasets.ImageFolder(root=os.path.join(dataset_path, "test"), transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Classi leggibili (opzionale)
pretty_classes = ['Adenocarcinoma', 'Adgelcarcinoma', 'Squamosgelcarcinoma', 'Noncancer']

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(pretrained=True) # trasnfer learning

# congela i layer convoluzionali per fare fine-tuning solo sull'ultimo layer
for param in model.parameters():
    param.requires_grad = False

# Sblocca solo l'ultimo FC Layer
num_features = model.fc.in_features # ultimo layer che mappa le classi
model.fc = nn.Linear(num_features, 4)  # 4 classi

model = model.to(device)




In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [7]:
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        scheduler.step()

        # Valutazione
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                correct += (preds == labels).sum().item()
        val_acc = correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}")


In [8]:
train_model(model, train_loader, val_loader, epochs=50)

KeyboardInterrupt: 

In [None]:
def evaluate_metrics(model, loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Accuracy
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')

    print(f"Accuracy : {acc:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print("\nDetailed per-class metrics:")
    print(classification_report(all_labels, all_preds, target_names=class_names))


In [None]:
pretty_classes = ['Adenocarcinoma', 'Adgelcarcinoma', 'Squamosgelcarcinoma', 'Noncancer']
evaluate_metrics(model, test_loader, pretty_classes)


Accuracy : 0.6159
F1 Score : 0.6025
Precision: 0.6801
Recall   : 0.5977

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.52      0.78      0.62       120
     Adgelcarcinoma       0.65      0.22      0.32        51
Squamosgelcarcinoma       1.00      1.00      1.00        54
          Noncancer       0.55      0.40      0.46        90

           accuracy                           0.62       315
          macro avg       0.68      0.60      0.60       315
       weighted avg       0.63      0.62      0.59       315



 A me è uscito questo:

Accuracy : 0.6095
F1 Score : 0.5746
Precision: 0.6418
Recall   : 0.5815

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.53      0.78      0.63       120
     Adgelcarcinoma       0.50      0.12      0.19        51
Squamosgelcarcinoma       1.00      1.00      1.00        54
          Noncancer       0.54      0.43      0.48        90

           accuracy                           0.61       315
          macro avg       0.64      0.58      0.57       315
       weighted avg       0.61      0.61      0.58       315

Leggendo questi risultati sembra che vada tutto un po' così così perchè nonostante con 10 epoche l'accuracy sia al 61%, la recall è molto bassa e questo vuol dire che il modello fa difficoltà a trovare molti casi, specialmente per alcune classi in particolare.
Analizzando i risulati per ogni classe si nota infatti come nessuna classe venga performi in maniera ottimale, sono tutte più o meno mediocri ad eccezione della seconda dove proprio non ci siamo e la terza che con tutto al massimo è alquanto sospetta (overfitting?).
La seconda classe ha una recall molto bassa, vuol dire che non viene quasi mai riconosciuta.



Provo ad applicare data augmentation solo alle classi più piccole:

In [None]:
#modifica ImageFolder per poter applicare DA diverse a seconda delle classi
from torchvision.datasets import ImageFolder

class BalancedAugmentDataset(ImageFolder):
    def __init__(self, root, transform_common=None, transform_augmented=None, classes_to_augment=None):
        super().__init__(root, transform=None)
        self.transform_common = transform_common
        self.transform_augmented = transform_augmented
        self.classes_to_augment = classes_to_augment or []

    def __getitem__(self, index):
        path, label = self.samples[index]
        image = self.loader(path)

        # Applica augmentation solo se la classe è tra quelle specificate
        if self.classes[label] in self.classes_to_augment:
            if self.transform_augmented:
                image = self.transform_augmented(image)
        else:
            if self.transform_common:
                image = self.transform_common(image)

        return image, label

In [None]:
# Trasformazione base (no augmentation)
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# Augmentation solo per le classi minoritarie
augmented_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


In [None]:
# provo ad aumentare le classi 1 e 3
classes_to_augment = ["Adgelcarcinoma", "Noncancer"]

In [None]:
# carica il training set con le nuove classi
train_dataset = BalancedAugmentDataset(
    root=os.path.join(dataset_path, "train"),
    transform_common=base_transform,
    transform_augmented=augmented_transform,
    classes_to_augment=classes_to_augment
)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
# riproviamo a trainare e valutare
train_model(model, train_loader, val_loader, epochs=50)

Epoch 1/50 | Train Loss: 11.631 | Train Acc: 0.821 | Val Acc: 0.625
Epoch 2/50 | Train Loss: 11.666 | Train Acc: 0.816 | Val Acc: 0.625
Epoch 3/50 | Train Loss: 12.504 | Train Acc: 0.814 | Val Acc: 0.625
Epoch 4/50 | Train Loss: 11.972 | Train Acc: 0.806 | Val Acc: 0.639
Epoch 5/50 | Train Loss: 11.384 | Train Acc: 0.825 | Val Acc: 0.611
Epoch 6/50 | Train Loss: 11.882 | Train Acc: 0.827 | Val Acc: 0.611
Epoch 7/50 | Train Loss: 12.494 | Train Acc: 0.814 | Val Acc: 0.625
Epoch 8/50 | Train Loss: 11.719 | Train Acc: 0.829 | Val Acc: 0.611
Epoch 9/50 | Train Loss: 11.851 | Train Acc: 0.816 | Val Acc: 0.625
Epoch 10/50 | Train Loss: 11.616 | Train Acc: 0.822 | Val Acc: 0.597
Epoch 11/50 | Train Loss: 12.086 | Train Acc: 0.816 | Val Acc: 0.653
Epoch 12/50 | Train Loss: 11.475 | Train Acc: 0.806 | Val Acc: 0.625
Epoch 13/50 | Train Loss: 11.316 | Train Acc: 0.812 | Val Acc: 0.611
Epoch 14/50 | Train Loss: 11.483 | Train Acc: 0.827 | Val Acc: 0.625
Epoch 15/50 | Train Loss: 11.645 | Train Ac

In [None]:
evaluate_metrics(model, test_loader, pretty_classes)

Accuracy : 0.6444
F1 Score : 0.6414
Precision: 0.6947
Recall   : 0.6307

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.56      0.73      0.63       120
     Adgelcarcinoma       0.64      0.27      0.38        51
Squamosgelcarcinoma       1.00      0.98      0.99        54
          Noncancer       0.59      0.53      0.56        90

           accuracy                           0.64       315
          macro avg       0.69      0.63      0.64       315
       weighted avg       0.65      0.64      0.63       315



Ecco i risultati:
Accuracy : 0.6667
F1 Score : 0.6739
Precision: 0.7011
Recall   : 0.6647

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.58      0.76      0.66       120
     Adgelcarcinoma       0.58      0.41      0.48        51
Squamosgelcarcinoma       1.00      1.00      1.00        54
          Noncancer       0.64      0.49      0.55        90

           accuracy                           0.67       315
          macro avg       0.70      0.66      0.67       315
       weighted avg       0.67      0.67      0.66       315

Sostanzialmente un incremento dell'accuracy del 6% (adesso a 67%) e dell'F1 score di 10 punti (adesso a 0.67)


Provo ad usare una loss pesata sulle diverse classi in maniera tale da dare più peso agli errori delle classi più piccole.

In [None]:
# Estrai le etichette (interi) dal dataset di training
labels = [label for _, label in train_dataset.samples]

from sklearn.utils.class_weight import compute_class_weight
import numpy as np # numpy is also needed for np.unique

# Calcola i pesi bilanciati per ciascuna classe
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
print("Class weights:", class_weights)

# Converti in tensor e porta su device (CPU o GPU)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

Class weights: [0.78589744 1.3326087  1.03547297 0.98870968]


In [None]:
# passo i pesi come parametro in maniera tale che durante il training gli errori sulle classi meno frequenti abbiano un peso maggiore e quindi il modello sarà incentivato a non "ignorare" quelle classi.

criterion = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(pretrained=True) # trasnfer learning

# congela i layer convoluzionali per fare fine-tuning solo sull'ultimo layer
for param in model.parameters():
    param.requires_grad = False

# Sblocca solo l'ultimo FC Layer
num_features = model.fc.in_features # ultimo layer che mappa le classi
model.fc = nn.Linear(num_features, 4)  # 4 classi

model = model.to(device)



In [None]:
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        scheduler.step()

        # Valutazione
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                correct += (preds == labels).sum().item()
        val_acc = correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}")

In [None]:
train_model(model, train_loader, val_loader, epochs=50)

Epoch 1/50 | Train Loss: 29.023 | Train Acc: 0.264 | Val Acc: 0.181
Epoch 2/50 | Train Loss: 28.900 | Train Acc: 0.259 | Val Acc: 0.153
Epoch 3/50 | Train Loss: 28.897 | Train Acc: 0.277 | Val Acc: 0.208
Epoch 4/50 | Train Loss: 28.793 | Train Acc: 0.272 | Val Acc: 0.222
Epoch 5/50 | Train Loss: 28.788 | Train Acc: 0.277 | Val Acc: 0.222
Epoch 6/50 | Train Loss: 28.939 | Train Acc: 0.269 | Val Acc: 0.194
Epoch 7/50 | Train Loss: 28.570 | Train Acc: 0.266 | Val Acc: 0.208
Epoch 8/50 | Train Loss: 29.080 | Train Acc: 0.279 | Val Acc: 0.222
Epoch 9/50 | Train Loss: 29.397 | Train Acc: 0.269 | Val Acc: 0.208
Epoch 10/50 | Train Loss: 29.183 | Train Acc: 0.253 | Val Acc: 0.194
Epoch 11/50 | Train Loss: 28.945 | Train Acc: 0.259 | Val Acc: 0.194
Epoch 12/50 | Train Loss: 28.774 | Train Acc: 0.272 | Val Acc: 0.208
Epoch 13/50 | Train Loss: 29.186 | Train Acc: 0.268 | Val Acc: 0.208
Epoch 14/50 | Train Loss: 28.864 | Train Acc: 0.272 | Val Acc: 0.208
Epoch 15/50 | Train Loss: 28.771 | Train Ac

In [None]:
pretty_classes = ['Adenocarcinoma', 'Adgelcarcinoma', 'Squamosgelcarcinoma', 'Noncancer']
evaluate_metrics(model, test_loader, pretty_classes)

Accuracy : 0.2444
F1 Score : 0.1958
Precision: 0.3170
Recall   : 0.3570

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.00      0.00      0.00       120
     Adgelcarcinoma       0.20      0.57      0.30        51
Squamosgelcarcinoma       0.27      0.81      0.40        54
          Noncancer       0.80      0.04      0.08        90

           accuracy                           0.24       315
          macro avg       0.32      0.36      0.20       315
       weighted avg       0.31      0.24      0.14       315



ho ottenuto:

Accuracy : 0.6889
F1 Score : 0.7054
Precision: 0.7114
Recall   : 0.7017

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.64      0.65      0.64       120
     Adgelcarcinoma       0.58      0.49      0.53        51
Squamosgelcarcinoma       1.00      1.00      1.00        54
          Noncancer       0.62      0.67      0.65        90

           accuracy                           0.69       315
          macro avg       0.71      0.70      0.71       315
       weighted avg       0.69      0.69      0.69       315

Rispetto al primissiomo training la sitauzione è migliorata di molto in quanto:
Accuracy da 60% a 69%, F1 score da 0.57 a 0.71, Precision da 0.64 a 0.71,Recall da 0.58 a 0.70, la classe più piccola (Adgelcarcinoma) è passata da un F1 score di 0.19 a uno di 0.53!
Dajee!

Provo a fare dropout sull'ultimo layer e aggiungo l'early stopping durante il training:

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(pretrained=True) # trasnfer learning

# congela i layer convoluzionali per fare fine-tuning solo sull'ultimo layer
for param in model.parameters():
    param.requires_grad = False

# dropout
import torch.nn as nn

model.fc = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Linear(num_features, 4)
)



In [None]:
def train_model(model, train_loader, val_loader, epochs=50, patience=5):
    best_val_loss = float('inf') # inizializzo la loss migliore a infinito
    epochs_without_improvement = 0 # conta quante epoche senza migliorare
    best_model_state = None #per salvare i pesi del modello migliore

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0

        for inputs, labels in train_loader: #training
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        scheduler.step() #aggiorna il learning rate

        # Validazione
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                correct += (preds == labels).sum().item()

        val_acc = correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f} | Val Loss: {val_loss:.3f} | Val Acc: {val_acc:.3f}")

        # Early stopping logic
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            best_model_state = model.state_dict()  # salva il miglior modello
        else:
            epochs_without_improvement += 1
            print(f"  [EarlyStopping] No improvement for {epochs_without_improvement} epoch(s)")

        if epochs_without_improvement >= patience:
            print("Early stopping triggered. Restoring best model weights.")
            model.load_state_dict(best_model_state)
            break


In [None]:
model = model.to(device) # per farlo lavorare su GPU o CPU

In [None]:
train_model(model, train_loader, val_loader, epochs=40, patience=5)

Epoch 1/40 | Train Loss: 32.496 | Train Acc: 0.285 | Val Loss: 4.346 | Val Acc: 0.278
Epoch 2/40 | Train Loss: 32.185 | Train Acc: 0.290 | Val Loss: 4.113 | Val Acc: 0.319
Epoch 3/40 | Train Loss: 32.031 | Train Acc: 0.284 | Val Loss: 4.053 | Val Acc: 0.319
Epoch 4/40 | Train Loss: 32.022 | Train Acc: 0.263 | Val Loss: 4.207 | Val Acc: 0.333
  [EarlyStopping] No improvement for 1 epoch(s)
Epoch 5/40 | Train Loss: 31.314 | Train Acc: 0.284 | Val Loss: 4.382 | Val Acc: 0.333
  [EarlyStopping] No improvement for 2 epoch(s)
Epoch 6/40 | Train Loss: 32.786 | Train Acc: 0.232 | Val Loss: 4.387 | Val Acc: 0.347
  [EarlyStopping] No improvement for 3 epoch(s)
Epoch 7/40 | Train Loss: 32.051 | Train Acc: 0.279 | Val Loss: 4.402 | Val Acc: 0.306
  [EarlyStopping] No improvement for 4 epoch(s)
Epoch 8/40 | Train Loss: 32.590 | Train Acc: 0.276 | Val Loss: 4.384 | Val Acc: 0.361
  [EarlyStopping] No improvement for 5 epoch(s)
Early stopping triggered. Restoring best model weights.


In [None]:
pretty_classes = ['Adenocarcinoma', 'Adgelcarcinoma', 'Squamosgelcarcinoma', 'Noncancer']
evaluate_metrics(model, test_loader, pretty_classes)

Accuracy : 0.4032
F1 Score : 0.2435
Precision: 0.3377
Recall   : 0.2919

Detailed per-class metrics:
                     precision    recall  f1-score   support

     Adenocarcinoma       0.42      0.91      0.58       120
     Adgelcarcinoma       0.00      0.00      0.00        51
Squamosgelcarcinoma       0.67      0.15      0.24        54
          Noncancer       0.26      0.11      0.16        90

           accuracy                           0.40       315
          macro avg       0.34      0.29      0.24       315
       weighted avg       0.35      0.40      0.31       315

