Skin Cancer Classification Final Project

In [None]:
# importing dataset
import kagglehub
import os 
import pandas as pd


# Download latest version
path = kagglehub.dataset_download("jaiahuja/skin-cancer-detection")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/jaiahuja/skin-cancer-detection?dataset_version_number=1...


100%|██████████| 786M/786M [02:54<00:00, 4.71MB/s] 

Extracting files...





Path to dataset files: C:\Users\cliff\.cache\kagglehub\datasets\jaiahuja\skin-cancer-detection\versions\1


In [13]:
# declaring global variables
BATCH_SIZE = 32
HEIGHT_IMG = 180
WIDTH_IMG = 180

In [14]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch
import os
import matplotlib.pyplot as plt
# import glob

transform = transforms.Compose([
    transforms.Resize((HEIGHT_IMG, WIDTH_IMG)),
    transforms.ToTensor(),
])

train_data = datasets.ImageFolder("../Train", transform=transform)
# train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

test_data = datasets.ImageFolder("../Test", transform=transform)
# test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# images, labels = next(iter(train_loader))
# print(f'Image batch shape: {images.size()}')
# print(f'Label batch shape: {labels.size()}')





In [None]:

validation_size = int(0.2 * len(train_data))
training_size = len(train_data) - validation_size
train_dataset, val_dataset = random_split(train_data, [training_size, validation_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


In [26]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)


        # fully connected layers
        self.fc1 = nn.Linear(30976, 128)
        self.fc2 = nn.Linear(128, 9)  # Assuming binary classification

    def forward(self, x):
        x /= 255.0  # Normalize input to [0, 1]
        
        x = F.relu(self.conv1(x))
        x = self.pool(x)

        x = F.relu(self.conv2(x))
        x = self.pool(x)

        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [27]:
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

epochs = 20

for epoch in tqdm(range(epochs)):
    model.train()
    train_loss = 0.0
    train_correct = 0
    total = 0
    for image, labels in train_loader:
        image, labels = image.to(device), labels.to(device)
        # remember to always zero out the gradient first. Don't want old values 
        optimizer.zero_grad()

        #forward pass
        outputs = model(image)

        loss = criterion(outputs, labels)

        loss.backward()

        optimizer.step()

        # tracking prog
        _, predicted = torch.max(outputs, 1)
        train_correct += (predicted == labels).sum().item()
        total += labels.size(0)
        train_loss += loss.item()
    
    train_accuracy = 100 * train_correct / total
    print(f'Epoch {epoch+1}/{epochs}, Loss: {train_loss/len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%')

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_correct = 0
    total_val = 0

    with torch.no_grad():
        for image, labels in val_loader:
            image, labels = image.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(image)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
            total_val += labels.size(0)
            val_loss += loss.item()
    
    val_accuracy = 100 * val_correct / total_val
    print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%')
    

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:42<?, ?it/s]


KeyboardInterrupt: 