In [19]:
from torchvision import transforms
from torchvision import datasets
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import os
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights
import torch.optim as optim
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
# problema librerie che usano OpenMP, due versioni di OpenMP sono caricate nello stesso processo, ad esempio da PyTorch e da NumPy

In [20]:
dataset_path = os.path.join(os.getcwd(), "DAML_project", 'data_images')
print(dataset_path)

/home/user/DAML/DAML_project/data_images


In [21]:
class BinaryCTDataset(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root, transform=transform)

        # Classi considerate Maligne
        self.malignant_classes = ['adenocarcinoma', 'adgelcarcinoma', 'squamosgelcarcinoma']
        self.benign_classes = ['noncancer']

        # Mappa classe originale → binaria
        self.binary_targets = []
        for _, label in self.samples:
            class_name = self.classes[label]
            if class_name in self.benign_classes:
                self.binary_targets.append(0)
            else:
                self.binary_targets.append(1)

    def __getitem__(self, index):
        path, _ = self.samples[index]
        image = self.loader(path)
        if self.transform:
            image = self.transform(image)
        label = self.binary_targets[index]
        return image, label


In [22]:
# Mean e std per immagini RGB normalizzate su [-1, 1]
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

# per fare data augmentation utiliziammo transforms.Compose di torchvision, facciamo data augmentation solo ai set di train

In [23]:
train_dataset = BinaryCTDataset(root=os.path.join(dataset_path, "train"), transform=train_transform)
val_dataset = BinaryCTDataset(root=os.path.join(dataset_path, "valid"), transform=val_test_transform)
test_dataset = BinaryCTDataset(root=os.path.join(dataset_path, "test"), transform=val_test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = resnet18(weights=ResNet18_Weights.DEFAULT) # trasnfer learning

# congela i layer convoluzionali per fare fine-tuning solo sull'ultimo layer
for param in model.parameters():
    param.requires_grad = False

# Sblocca solo l'ultimo FC Layer
num_features = model.fc.in_features # ultimo layer che mappa le classi
model.fc = nn.Linear(num_features, 2)  # 4 classi

model = model.to(device)

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [26]:
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        scheduler.step()

        # Valutazione
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                correct += (preds == labels).sum().item()
        val_acc = correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}")


In [27]:
train_model(model, train_loader, val_loader, epochs=50)

Epoch 1/50 | Train Loss: 2.445 | Train Acc: 0.935 | Val Acc: 1.000
Epoch 2/50 | Train Loss: 0.005 | Train Acc: 1.000 | Val Acc: 1.000


KeyboardInterrupt: 

In [None]:
def evaluate_metrics(model, loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Accuracy
    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')

    print(f"Accuracy : {acc:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print("\nDetailed per-class metrics:")
    #print(classification_report(all_labels, all_preds, target_names=class_names))
    print(classification_report(
    all_labels,
    all_preds,
    labels=[0, 1],
    target_names=class_names,
    zero_division=0  # evita errori in caso di divisioni per 0
))


In [None]:
pretty_classes = ['Benigno', 'Maligno']
evaluate_metrics(model, test_loader, pretty_classes)

Accuracy : 1.0000
F1 Score : 1.0000
Precision: 1.0000
Recall   : 1.0000

Detailed per-class metrics:
              precision    recall  f1-score   support

     Benigno       0.00      0.00      0.00         0
     Maligno       1.00      1.00      1.00       315

    accuracy                           1.00       315
   macro avg       0.50      0.50      0.50       315
weighted avg       1.00      1.00      1.00       315



In [None]:
Benigno = 0
Maligno = 0
for _, label in test_dataset:
    if label == 0:
        Benigno += 1
    else:
        Maligno += 1
print(f"Benigno: {Benigno}, Maligno: {Maligno}")

Benigno: 0, Maligno: 315
