In [1]:
# librerie
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.models import resnet18
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

import sys

IS_COLAB = "google.colab" in sys.modules

if IS_COLAB:
  from google.colab import drive
  drive.mount('/content/drive/', force_remount=True)
  # Adapt the folder to your specific one
  project_path = '/content/drive/My Drive/DAML_project'
  print(os.getcwd())
  os.chdir(project_path) # Use os.chdir to change directory
  print(project_path)
  current_directory = os.getcwd()  # Get the current working directory using os.getcwd()
  print(f"Current working directory: {current_directory}")

elif not IS_COLAB:
  pass

Mounted at /content/drive/
/content
/content/drive/My Drive/DAML_project
Current working directory: /content/drive/My Drive/DAML_project


In [2]:
base_dir = os.getcwd()
dataset_path = os.path.join(base_dir, 'data_histo')

In [3]:
# trasformazioni
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

train_transform = transforms.Compose([
    transforms.Resize((768, 768)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_test_transform = transforms.Compose([
    transforms.Resize((768, 768)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [4]:
# caricamento dataset completo
full_dataset = datasets.ImageFolder(root=dataset_path, transform=None)

# divisione: 70% train, 15% valid, 15% test
total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

train_set, val_set, test_set = random_split(full_dataset, [train_size, val_size, test_size],
                                            generator=torch.Generator().manual_seed(42))

# applica le trasformazioni appropriate
train_set.dataset.transform = train_transform
val_set.dataset.transform = val_test_transform
test_set.dataset.transform = val_test_transform

In [5]:
# dataLoader
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

In [6]:
# classi
print("Classi:", full_dataset.classes)
pretty_classes = ['Adenocarcinoma', 'Benigno', 'Squamoso']

Classi: ['adenocarcinoma', 'benign', 'squamous_cell_carcinoma']


In [7]:
# dispositivo
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#assert torch.cuda.is_available()
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: Tesla T4


In [8]:
# modello
model = resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False  # congela tutto

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 3)  # 3 classi
model = model.to(device)

# loss, ottimizzatore e scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 165MB/s]


In [9]:
# allenamento
def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        correct = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
        train_acc = correct / len(train_loader.dataset)
        scheduler.step()

        # Validazione
        model.eval()
        val_loss = 0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
                correct += (outputs.argmax(1) == labels).sum().item()
        val_acc = correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f}")

In [None]:
train_model(model, train_loader, val_loader, epochs=10)

In [None]:
#valutazione finale
def evaluate_metrics(model, loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = outputs.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')

    print(f"\nTest Evaluation:")
    print(f"Accuracy : {acc:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall   : {recall:.4f}")
    print("\nDetailed per-class metrics:")
    print(classification_report(all_labels, all_preds, target_names=class_names, labels=[0, 1, 2]))

In [None]:
# esegui valutazione
evaluate_metrics(model, test_loader, pretty_classes)