In [1]:
import torch
from torch import nn

import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import torch.nn.functional as F

import torch.optim as optim
import matplotlib.pyplot as plt

print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

PyTorch version: 2.4.1+cu121
torchvision version: 0.19.1+cu121


In [2]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]


transform = transforms.Compose([
    transforms.Resize((224, 224)),     
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),      # Randomly rotate the image by up to 10 degrees
    transforms.ToTensor(), 
    transforms.Normalize(mean,std)  # Normalize to [-1, 1] range
])

In [3]:
dataset = datasets.ImageFolder('/kaggle/input/melanoma/data/train')
class_names = dataset.classes
print(class_names)
dataset = datasets.ImageFolder('/kaggle/input/melanoma/data/train', transform=transform)

['Benign', 'Malignant']


In [4]:
from torch.utils.data import random_split

total_size = len(dataset)
print(f"Total dataset size: {total_size}")

train_size = int(0.8 * total_size)
test_size = total_size - train_size

train_data, test_data = random_split(dataset, [train_size, test_size])
print(f"Number of training samples: {len(train_data)}")
print(f"Number of testing samples: {len(test_data)}")

Total dataset size: 11879
Number of training samples: 9503
Number of testing samples: 2376


In [5]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_dataloader = DataLoader(train_data, 
    batch_size=BATCH_SIZE, 
    shuffle=True,
    num_workers=4
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4                         
)

print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

Length of train dataloader: 297 batches of 32
Length of test dataloader: 75 batches of 32


In [6]:
image,label = dataset[0]
image.size()

torch.Size([3, 224, 224])

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

from torchvision import models

class ResNet(nn.Module):
    def __init__(self, num_classes=2, pretrained=True):
        super(ResNet, self).__init__()
        # Load pre-trained ResNet50
        self.model = models.resnet50(pretrained=pretrained)
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)

        # Freeze all layers except the final FC layer
        for param in self.model.parameters():
            param.requires_grad = False

        for param in self.model.fc.parameters():
            param.requires_grad = True

    def forward(self, x):
        return self.model(x)

In [13]:
net = ResNet()
net.to(device)
net

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 223MB/s]


ResNet(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0)

In [15]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)

In [None]:
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt

num_epochs = 30
best_val_accuracy = 0.0

# Learning rate scheduler (optional)
scheduler = StepLR(optimizer, step_size=7, gamma=0.1)  # Reduce LR by factor of 0.1 every 7 epochs

# Lists to store loss and accuracy for each epoch
train_loss_list = []
train_acc_list = []
val_loss_list = []
val_acc_list = []

# Lists to store true labels and predictions for performance metrics
all_labels = []
all_preds = []

for epoch in range(num_epochs):
    # Set model to training mode
    net.train()
    
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    # Training loop
    for inputs, labels in train_dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track training loss
        running_loss += loss.item()
        
        # Track accuracy
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()
    
    train_accuracy = 100 * correct_train / total_train
    train_loss = running_loss / len(train_dataloader)

    train_loss_list.append(train_loss)
    train_acc_list.append(train_accuracy)

    # Validation loop
    net.eval()
    correct_val = 0
    total_val = 0
    running_val_loss = 0.0

    # Clear previous epoch's labels and predictions
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = net(inputs)
            loss = loss_function(outputs, labels)

            running_val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

            # Collect labels and predictions
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    val_accuracy = 100 * correct_val / total_val
    val_loss = running_val_loss / len(test_dataloader)

    val_loss_list.append(val_loss)
    val_acc_list.append(val_accuracy)

    # Step learning rate scheduler
    scheduler.step()

    # Print training and validation results per epoch
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}% | Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")

    # Save the best model based on validation accuracy
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(net.state_dict(), '../app/best_model.pth')
        print(f"Saved best model with accuracy: {best_val_accuracy:.2f}%")

# After training, compute and print classification report  # Replace with your actual class names

print("\nMetrics per class::")
print(classification_report(all_labels, all_preds, target_names=class_names))

# Plot Loss
plt.figure(figsize=(10,5))
plt.plot(range(1, num_epochs+1), train_loss_list, label='Training Loss')
plt.plot(range(1, num_epochs+1), val_loss_list, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot Accuracy
plt.figure(figsize=(10,5))
plt.plot(range(1, num_epochs+1), train_acc_list, label='Training Accuracy')
plt.plot(range(1, num_epochs+1), val_acc_list, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()


Epoch [1/30] - Train Loss: 0.3906, Train Acc: 82.28% | Val Loss: 0.3066, Val Acc: 86.24%
Saved best model with accuracy: 86.24%
Epoch [2/30] - Train Loss: 0.3563, Train Acc: 84.37% | Val Loss: 0.3074, Val Acc: 86.57%
Saved best model with accuracy: 86.57%
Epoch [3/30] - Train Loss: 0.3317, Train Acc: 85.37% | Val Loss: 0.3218, Val Acc: 85.40%
Epoch [4/30] - Train Loss: 0.3316, Train Acc: 85.92% | Val Loss: 0.2936, Val Acc: 86.95%
Saved best model with accuracy: 86.95%
Epoch [5/30] - Train Loss: 0.3248, Train Acc: 86.04% | Val Loss: 0.2971, Val Acc: 87.12%
Saved best model with accuracy: 87.12%
Epoch [6/30] - Train Loss: 0.3156, Train Acc: 86.50% | Val Loss: 0.3275, Val Acc: 85.86%
Epoch [7/30] - Train Loss: 0.3182, Train Acc: 86.50% | Val Loss: 0.3697, Val Acc: 83.25%
Epoch [8/30] - Train Loss: 0.2909, Train Acc: 87.39% | Val Loss: 0.2772, Val Acc: 87.67%
Saved best model with accuracy: 87.67%
Epoch [9/30] - Train Loss: 0.2817, Train Acc: 87.49% | Val Loss: 0.2803, Val Acc: 88.17%
Save