In [173]:
import os
import sys
import random
import math
import glob

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import PIL
from PIL import Image

import torch 
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets, models

import warnings
warnings.filterwarnings('ignore')

In [174]:
print(torch.__version__)
print(torch.cuda)
print(torch.cuda.is_available())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2.6.0+cu124
<module 'torch.cuda' from '/usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py'>
True


In [175]:
root_train = '/kaggle/input/skin-cancer-malignant-vs-benign/train'
root_test = '/kaggle/input/skin-cancer-malignant-vs-benign/test'

def get_path(path, ty = 'train'):
    paths = glob.glob(path + '/*')
    return paths[0], paths[1]

In [176]:
def merge_results(benign, malignant):
    files = []

    benign = glob.glob(benign + '/*')
    malignant = glob.glob(malignant + '/*')
    
    for image in benign:
        files.append({
            'path': image,
            'malignant': 0,
        })
    
    for image in malignant:
        files.append({
            'path': image,
            'malignant': 1,
        })
    
    random.shuffle(files)
    return files

In [177]:
img_transform = {
    'valid': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size = 256),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.CenterCrop(size = 224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
}

In [178]:
class Configure(torch.utils.data.Dataset):
    def __init__(self, images, transform = None):
        super(Configure, self).__init__()
        self.images = images
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]['path']
        image = Image.open(image)
        if self.transform is not None:
            image = self.transform(image)

        return image.to(device), torch.tensor(self.images[idx]['malignant'], dtype = torch.long).to(device)

In [179]:
benign_train, malignant_test = get_path(root_train)
benign_test, malignant_test = get_path(root_test)

train_set = merge_results(benign_train, malignant_train)
test_set = merge_results(benign_test, malignant_test)

trainset = Configure(images = train_set, transform = img_transform['train'])
validset = Configure(images = test_set, transform = img_transform['valid'])

In [180]:
BATCH_SIZE = 4
NUM_WORKERS = 0

trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size = BATCH_SIZE,
    num_workers = NUM_WORKERS,
    shuffle = True,
)

testloader = torch.utils.data.DataLoader(
    validset,
    batch_size = BATCH_SIZE,
    num_workers = NUM_WORKERS,
    shuffle = False,
)

loaders = {
    'train': trainloader,
    'valid': testloader,
}

In [181]:
X, y = next(iter(trainloader))
X2, y2 = next(iter(testloader))
print(X.shape, y.shape)
print(X2.shape, y2.shape)

assert(X.shape == X2.shape)
assert(y[0].dim() == y2[0].dim() == 0)

torch.Size([4, 3, 224, 224]) torch.Size([4])
torch.Size([4, 3, 224, 224]) torch.Size([4])


In [182]:
use_cuda = torch.cuda.is_available()

model_resnet = models.resnet50(pretrained = True)
for param in model_resnet.parameters():
    param.requires_grad = False
    
in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(in_features, 64)
model_resnet.bn_fc = nn.BatchNorm2d(128)
model_resnet.drop = nn.Dropout(0.6)
model_resnet.fc2 = nn.Linear(128, 2)

if use_cuda:
    model_resnet = model_resnet.cuda()

print(model_resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [183]:
criterion_resnet = nn.CrossEntropyLoss()
grad_parameters_resnet = filter(lambda p: p.requires_grad, model_resnet.parameters())
optimizer_resnet = torch.optim.SGD(grad_parameters_resnet, lr = 1e-4)

In [184]:
n_epochs = 20

scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer = optimizer_resnet,
    max_lr = 1e-2,
    steps_per_epoch = len(trainset) // BATCH_SIZE,
    epochs = n_epochs
)

In [185]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, early_stopping_rounds, save_path):
    valid_loss_min = np.Inf 
    early_stopping_counter = 0
    
    for epoch in range(1, n_epochs + 1):
        train_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            # scheduler.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            if batch_idx % 100 == 0:
                print('Epoch: %d \tBatch: %d \tTraining Loss: %.6f \tLearning Rate: %.6f' %(epoch, batch_idx + 1, train_loss, optimizer.param_groups[0]['lr']))

        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        print('Epoch: {} \tTraining Loss: {:.4f} \tValidation Loss: {:.4f}'.format(epoch, train_loss, valid_loss))
        if valid_loss < valid_loss_min:
            early_stopping_counter = 0
            torch.save(model.state_dict(), save_path)
            print('\nBOOM! Validation loss decreased ({:.4f} --> {:.4f}).  Saving model...\n'.format(valid_loss_min,valid_loss))
            valid_loss_min = valid_loss    
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= early_stopping_rounds:
                break

    return model

In [186]:
model_resnet = train(
    n_epochs,
    loaders = loaders,
    model = model_resnet,
    optimizer = optimizer_resnet,
    criterion = criterion_resnet,
    use_cuda = use_cuda,
    early_stopping_rounds = 10,
    save_path = 'cancer_resnet50'
)

Epoch: 1 	Batch: 1 	Training Loss: 4.551740 	Learning Rate: 0.000400
Epoch: 1 	Batch: 101 	Training Loss: 0.993620 	Learning Rate: 0.000400
Epoch: 1 	Batch: 201 	Training Loss: 0.744818 	Learning Rate: 0.000400
Epoch: 1 	Batch: 301 	Training Loss: 0.630239 	Learning Rate: 0.000400
Epoch: 1 	Batch: 401 	Training Loss: 0.562988 	Learning Rate: 0.000400
Epoch: 1 	Training Loss: 0.5484 	Validation Loss: 0.9141

BOOM! Validation loss decreased (inf --> 0.9141).  Saving model...

Epoch: 2 	Batch: 1 	Training Loss: 0.143032 	Learning Rate: 0.000400
Epoch: 2 	Batch: 101 	Training Loss: 0.367012 	Learning Rate: 0.000400
Epoch: 2 	Batch: 201 	Training Loss: 0.365430 	Learning Rate: 0.000400
Epoch: 2 	Batch: 301 	Training Loss: 0.376709 	Learning Rate: 0.000400
Epoch: 2 	Batch: 401 	Training Loss: 0.365494 	Learning Rate: 0.000400
Epoch: 2 	Training Loss: 0.3610 	Validation Loss: 0.8859

BOOM! Validation loss decreased (0.9141 --> 0.8859).  Saving model...

Epoch: 3 	Batch: 1 	Training Loss: 0.14