Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Upload Dataset to Google Drive

In [None]:
import zipfile
import os

# Define paths
drive_path = "/content/drive/MyDrive/skin_cancer_data"
os.makedirs(drive_path, exist_ok=True)

zip_files = ["/content/drive/MyDrive/HAM10000_images_part_1.zip",
             "/content/drive/MyDrive/HAM10000_images_part_2.zip"]

# Extract ZIP files
for zip_path in zip_files:
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(drive_path)

print(" Dataset extracted successfully in Google Drive!")


 Dataset extracted successfully in Google Drive!


Organize Dataset (Train & Validation Splits)

In [None]:
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split

# Define metadata & image paths
metadata_path = "/content/drive/MyDrive/HAM10000_metadata.csv"
image_path = "/content/drive/MyDrive/skin_cancer_data"
output_path = "/content/drive/MyDrive/processed_data"

# Create Train & Validation Directories
train_dir = os.path.join(output_path, "train")
val_dir = os.path.join(output_path, "val")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Load metadata
df = pd.read_csv(metadata_path)

# Select only 5 disease classes
selected_classes = ["akiec", "bcc", "bkl", "df", "mel"]
df = df[df["dx"].isin(selected_classes)]

# Reduce dataset size (100 images per class for fast training)
df = df.groupby("dx").head(100)

# Split dataset (80% Train, 20% Validation)
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["dx"], random_state=42)

# Move images into respective folders
for dataset, folder in [(train_df, train_dir), (val_df, val_dir)]:
    for _, row in dataset.iterrows():
        src = os.path.join(image_path, row["image_id"] + ".jpg")
        dst_folder = os.path.join(folder, row["dx"])
        os.makedirs(dst_folder, exist_ok=True)
        shutil.copy(src, os.path.join(dst_folder, row["image_id"] + ".jpg"))

print(" Train/Validation dataset organized successfully!")


 Train/Validation dataset organized successfully!


In [None]:
!pip install torch torchvision numpy pandas matplotlib


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

Train the CNN Model


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Set device (Use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Image Transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# Load Dataset
train_data = ImageFolder("/content/drive/MyDrive/processed_data/train", transform=transform)
val_data = ImageFolder("/content/drive/MyDrive/processed_data/val", transform=transform)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)

# Define CNN Model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 32 * 32, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize Model
cnn_model = SimpleCNN(num_classes=5).to(device)

# Define Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

# Train the Model
num_epochs = 10
for epoch in range(num_epochs):
    cnn_model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = cnn_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}, Train Accuracy: {train_acc:.2f}%")

# Save the CNN Model to Google Drive
torch.save(cnn_model.state_dict(), "/content/drive/MyDrive/cnn_skin_model.pth")
print(" CNN Model Trained & Saved in Google Drive!")


Epoch 1, Loss: 41.3100, Train Accuracy: 34.50%
Epoch 2, Loss: 30.7419, Train Accuracy: 50.50%
Epoch 3, Loss: 25.8547, Train Accuracy: 61.50%
Epoch 4, Loss: 20.7918, Train Accuracy: 67.25%
Epoch 5, Loss: 15.3319, Train Accuracy: 77.75%
Epoch 6, Loss: 10.7251, Train Accuracy: 85.00%
Epoch 7, Loss: 6.7422, Train Accuracy: 92.00%
Epoch 8, Loss: 4.5332, Train Accuracy: 95.50%
Epoch 9, Loss: 4.9892, Train Accuracy: 93.25%
Epoch 10, Loss: 4.3902, Train Accuracy: 94.50%
 CNN Model Trained & Saved in Google Drive!


In [None]:
!pip install torch torchvision numpy pandas matplotlib




Load Dataset from Google Drive

In [None]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import os

# Set device (Use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Image Transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalization for ResNeXt101
])

# Load Dataset
train_data = ImageFolder("/content/drive/MyDrive/processed_data/train", transform=transform)
val_data = ImageFolder("/content/drive/MyDrive/processed_data/val", transform=transform)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)

print(f" Dataset Loaded: {len(train_data)} training images, {len(val_data)} validation images.")


 Dataset Loaded: 400 training images, 100 validation images.


Load ResNeXt101 & Fine-Tune

In [None]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Load Pretrained ResNeXt101 Model
resnext_model = models.resnext101_32x8d(pretrained=True)

# Modify the final classification layer for 5 classes
resnext_model.fc = nn.Linear(2048, 5)

# Move model to GPU (if available)
resnext_model = resnext_model.to(device)

# Define Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnext_model.parameters(), lr=0.0001)


Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/hub/checkpoints/resnext101_32x8d-8ba56ff5.pth
100%|██████████| 340M/340M [00:10<00:00, 33.3MB/s]


Train ResNeXt101

In [None]:
# Train the Model
num_epochs = 10
for epoch in range(num_epochs):
    resnext_model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnext_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {running_loss:.4f}, Train Accuracy: {train_acc:.2f}%")

# Save the ResNeXt101 Model to Google Drive
torch.save(resnext_model.state_dict(), "/content/drive/MyDrive/resnext_skin_model.pth")
print(" ResNeXt101 Model Trained & Saved in Google Drive!")


Epoch 1, Loss: 14.3752, Train Accuracy: 77.75%
Epoch 2, Loss: 13.2672, Train Accuracy: 80.00%
Epoch 3, Loss: 13.4133, Train Accuracy: 82.75%
Epoch 4, Loss: 14.6420, Train Accuracy: 78.75%
Epoch 5, Loss: 15.7297, Train Accuracy: 79.50%
Epoch 6, Loss: 15.4088, Train Accuracy: 80.50%
Epoch 7, Loss: 12.8109, Train Accuracy: 82.00%
Epoch 8, Loss: 14.0580, Train Accuracy: 79.25%
Epoch 9, Loss: 16.4166, Train Accuracy: 77.00%
Epoch 10, Loss: 16.3433, Train Accuracy: 78.00%
 ResNeXt101 Model Trained & Saved in Google Drive!


Verify ResNeXt101 Model Training

In [None]:
# Load the trained model
resnext_model_path = "/content/drive/MyDrive/resnext_skin_model.pth"
resnext_model = models.resnext101_32x8d(pretrained=False)
resnext_model.fc = nn.Linear(2048, 5)

# Load the saved weights
resnext_model.load_state_dict(torch.load(resnext_model_path, map_location=torch.device('cpu')))
resnext_model.eval()

print(" ResNeXt101 Model Loaded Successfully!")


  resnext_model.load_state_dict(torch.load(resnext_model_path, map_location=torch.device('cpu')))


 ResNeXt101 Model Loaded Successfully!
