# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# !cp '/content/drive/MyDrive/dermoscopy_classification.tar.gz' .
# !tar -xvzf 'dermoscopy_classification.tar.gz'
data_dir = '/content/drive/MyDrive/dermoscopy_classification'

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
import pandas as pd
from glob import glob
import os
from torch.utils.data import random_split
from torch import nn, optim
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 2.1-2.2 Μέθοδοι __init__(), **__len__**(), __getitem__()

In [7]:
class MLProject2Dataset(torch.utils.data.Dataset):

    def __init__(self, data_dir, metadata_fname='/content/drive/MyDrive/dermoscopy_classification/metadata.csv', transform=None):

        self.data_dir = data_dir
        self.transform = transform

        # Find all images in the data directory
        image_paths = []
        for part in ["part_1", "part_2"]:
            part_path = os.path.join(data_dir, part)
            image_paths += glob(os.path.join(part_path, "*.jpg"))

        # Create a DataFrame with the image paths
        df = pd.DataFrame({"path": image_paths, "image_id": [os.path.splitext(os.path.basename(p))[0] for p in image_paths]})

        # Load the metadata
        metadata_path = os.path.join(data_dir, metadata_fname)
        metadata = pd.read_csv(metadata_path)

        # Convert DX column to integers
        metadata['dx'] = pd.Categorical(metadata['dx']).codes

        # Combine df and dx label into a pandas DataFrame
        df = pd.merge(df, metadata[['image_id', 'dx']], on='image_id')
        self.df = df


    def __len__(self):
        return len(self.df)


    def __getitem__(self, idx):

        img_path = self.df.iloc[idx, 0]  # path is in 1st column
        label = torch.tensor(int(self.df.iloc[idx, 2]))  # dx is in 3rd column
        image = torchvision.io.read_image(img_path).float() / 255.0  # Read image and normalize pixel values to [0-1]

        if self.transform:
            image = self.transform(image)

        return image, label

# 2.3 Train / validation / test split και Data loaders - 2.4 Μετασχηματισμοί

In [8]:
# Define transformations
m, n = 50, 62
data_transforms = transforms.Compose([
    transforms.Resize((m, n)),  # Set m and n to desired dimensions
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Mean and std deviation for normalization
])

dataset = MLProject2Dataset(data_dir, transform=data_transforms)

# Define the sizes for train, validation, and test sets
train_size = int(0.6 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

# Use the random_split method with a fixed seed
generator = torch.Generator().manual_seed(42)
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size], generator=generator)

trainloader = DataLoader(train_set, batch_size=32, shuffle=True)
valloader = DataLoader(val_set, batch_size=32)
testloader = DataLoader(test_set, batch_size=32)

# 3.1 train_net

In [10]:
def train_net(model: nn.Module, trainloader: DataLoader, valloader: DataLoader = None,
              epochs: int = 10, optimizer: optim = None, loss: nn.modules.loss = None,
              device: str = 'cpu', print_period: int = 10):

    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []

    print(device)
    model.to(device)

    model.train()  # Set the model to training mode
    for epoch in range(epochs):
        train_loss = 0.0
        train_correct = 0
        total = 0

        for data, target in trainloader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            tr_loss = loss(output, target)
            tr_loss.backward()
            optimizer.step()

            train_loss += tr_loss.item() #* data.size(0)
            _, pred = torch.max(output, 1)
            train_correct += (pred == target).sum().item()
            total += target.size(0)

        train_loss /= len(trainloader.dataset)
        train_acc = 100.0 * train_correct / total

        train_losses.append(train_loss)
        train_accuracies.append(train_acc)


        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for data, target in valloader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                vl_loss = loss(output, target)
                val_loss += vl_loss.item() #* data.size(0)
                _, pred = torch.max(output, 1)
                val_correct += (pred == target).sum().item()
                val_total += target.size(0)

        val_loss /= len(valloader.dataset)
        val_acc = 100.0 * val_correct / val_total

        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}] - "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}% - "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.2f}%")

    return train_losses, train_accuracies, val_losses, val_accuracies

# 3.2 test_net

In [11]:
def test_net(model: nn.Module, testloader: torch.utils.data.DataLoader,
             loss_fn: nn.modules.loss = None, device: str = 'cpu'):

    model.eval()
    pred_labels = []
    true_labels = []
    test_loss = 0.0
    correct = 0
    total = 0

    print(device)
    model.to(device)

    with torch.no_grad():
        for data, target in testloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            ts_loss = loss(output, target)
            test_loss += ts_loss.item() #* data.size(0)
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

            true_labels.extend(target.cpu().numpy())
            pred_labels.extend(predicted.cpu().numpy())

    test_loss /= len(testloader.dataset)
    test_acc = 100.0 * correct / total

    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%')

    return pred_labels, true_labels

# 4 Απλό ΣΝ∆

In [12]:
class MyCNN(nn.Module):
    def __init__(self):

        super(MyCNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = nn.Flatten()
        self.fc = nn.Linear(2688, 64)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3(self.conv3(x)))
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Simple model

In [13]:
model = MyCNN()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 0.1)

In [None]:
train_losses, train_accuracies, val_losses, val_accuracies = train_net(
    model, trainloader, valloader, epochs=20, optimizer=optimizer, loss=loss,
    device='cuda', print_period=1
)

plt.figure(figsize=(12, 4))

epochs = range(1, 21)

plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, label='Training Loss')
plt.plot(epochs, val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training - Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, label='Training Accuracy')
plt.plot(epochs, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training - Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Evaluate the model in test_set
pred_labels, true_labels = test_net(model, testloader, loss, device="cuda")

conf_matrix = confusion_matrix(true_labels, pred_labels)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
print(conf_matrix)