In [None]:
import kagglehub
import os

# Download latest version
path = kagglehub.dataset_download("bhaveshmittal/melanoma-cancer-dataset")

print("Path to dataset files:", path)

print(os.listdir("/kaggle/input/melanoma-cancer-dataset"))
print(os.listdir("/kaggle/input/melanoma-cancer-dataset/train"))


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device in use:", device)

# Parameters
imgSize = 224
batchSize = 32
epochs = 20
patience = 5
minDelta = 0.01

# Transforms
train_transform = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(0.3),
    transforms.RandomVerticalFlip(0.3),
    transforms.Resize((imgSize, imgSize)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((imgSize, imgSize)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Paths dari KaggleHub
train_path = "/kaggle/input/melanoma-cancer-dataset/train"
val_path   = "/kaggle/input/melanoma-cancer-dataset/test"

# Load dataset
train_dataset = datasets.ImageFolder(train_path, transform=train_transform)
val_dataset   = datasets.ImageFolder(val_path, transform=val_transform)

trainLoader = DataLoader(train_dataset, batch_size=batchSize, shuffle=True, num_workers=2)
valLoader   = DataLoader(val_dataset, batch_size=batchSize, shuffle=False, num_workers=2)

print("Classes:", train_dataset.classes)

# === ResNet50 ===
model = models.resnet50(weights="IMAGENET1K_V1")
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 1)   # Binary classification
model = model.to(device)

# Loss & Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, threshold=minDelta, factor=0.1, patience=3, min_lr=1e-5)
scaler = GradScaler()

# Tracking
trainLosses, valLosses, valAccs = [], [], []
bestLoss = float('inf')
currentPatience = 0

# Training Loop
for epoch in range(epochs):
    model.train()
    runningLoss = 0.0

    for inputs, labels in trainLoader:
        inputs, labels = inputs.to(device), labels.to(device).unsqueeze(1).float()
        optimizer.zero_grad()

        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        runningLoss += loss.item()

    trainLoss = runningLoss / len(trainLoader)
    trainLosses.append(trainLoss)

    # Validation
    model.eval()
    valLoss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for inputs, labels in valLoader:
            inputs, labels = inputs.to(device), labels.to(device).unsqueeze(1).float()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            valLoss += loss.item()

            preds = (torch.sigmoid(outputs) > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avgValLoss = valLoss / len(valLoader)
    accuracy = correct / total * 100
    valLosses.append(avgValLoss)
    valAccs.append(accuracy)

    print(f"[{epoch+1}/{epochs}] Train Loss: {trainLoss:.3f} | Val Loss: {avgValLoss:.3f} | Val Acc: {accuracy:.2f}%")

    # Early stopping
    if avgValLoss < bestLoss - minDelta:
        bestLoss = avgValLoss
        currentPatience = 0
    else:
        currentPatience += 1
        if currentPatience >= patience:
            print("Early stopping triggered.")
            break

    scheduler.step(avgValLoss)

# Plot loss & accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(trainLosses, label="Train Loss")
plt.plot(valLosses, label="Val Loss")
plt.legend(); plt.title("Loss Curve")

plt.subplot(1, 2, 2)
plt.plot(valAccs, label="Val Accuracy")
plt.legend(); plt.title("Validation Accuracy")
plt.show()

# Confusion Matrix
model.eval()
allLabels, allPreds = [], []
with torch.no_grad():
    for inputs, labels in valLoader:
        inputs = inputs.to(device)
        labels = labels.unsqueeze(1).float().to(device)
        outputs = model(inputs)
        preds = (torch.sigmoid(outputs) > 0.5).float()

        allLabels.extend(labels.cpu().numpy())
        allPreds.extend(preds.cpu().numpy())

matrix = confusion_matrix(allLabels, allPreds)
sns.heatmap(matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=train_dataset.classes,
            yticklabels=train_dataset.classes)
plt.title("Confusion Matrix (Validation)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
from sklearn.metrics import classification_report
import pandas as pd

allLabels_flat = [int(x[0]) for x in allLabels]
allPreds_flat = [int(x[0]) for x in allPreds]

class_names = train_dataset.classes  # ['Benign', 'Malignant']

# Classification report
report_dict = classification_report(
    allLabels_flat,
    allPreds_flat,
    target_names=class_names,
    output_dict=True
)

report_df = pd.DataFrame(report_dict).transpose()
report_df[['precision', 'recall', 'f1-score']] = report_df[['precision', 'recall', 'f1-score']] * 100
report_df = report_df.round(2)

print("=== Classification Report (%) ===")
print(report_df)
