In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
from pathlib import Path

In [2]:
df = pd.read_csv('../data/train-metadata.csv')
df.head()

  df = pd.read_csv('../data/train-metadata.csv')


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


In [3]:
import torch
import os
from torch.utils.data import Dataset
from PIL import Image

class SkinLesionDataset(Dataset):
    def __init__(self, dataframe, root_dir, transforms=None):
        """
        Args:
            dataframe (pd.DataFrame): The dataframe object (train_df, val_df, etc.)
            root_dir (string): Directory with all the images (the flat folder).
            transform (callable, optional): Optional transform to be applied.
        """
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        # Use iloc to access by integer position, regardless of the DataFrame index
        row = self.dataframe.iloc[index]
        
        # Column 0 is the filename, Column 1 is the label
        # Adjust these keys if your CSV columns have specific names like 'image_id'
        img_name = os.path.join(self.root_dir, f'{row.iloc[0]}.jpg') 
        label = int(row.iloc[1])
        
        image = Image.open(img_name).convert('RGB')
        image = np.array(image)
        
        if self.transforms:
            image = self.transforms(image)
            
        return image, label

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('../data/train-metadata.csv')

df_malignant = df[df['target'] == 1]
df_benign = df[df['target'] == 0]

print(f"Total Malignant: {len(df_malignant)}")
print(f"Total Benign: {len(df_benign)}")

test_mal, train_val_mal = train_test_split(df_malignant, test_size=None, train_size=50)
test_ben, train_val_ben = train_test_split(df_benign, test_size=None, train_size=1000)

val_mal, train_mal = train_test_split(train_val_mal, test_size=None, train_size=50)
val_ben, train_ben = train_test_split(train_val_ben, test_size=None, train_size=1000)

# 5. Create Training Set (The Balancing Act)
# We now have ~300 Malignant images left.
# We have ~398,000 Benign images left.
# WE CANNOT USE ALL BENIGN IMAGES. It will drown out the signal.

# Downsample Benign to a 1:5 ratio (300 Malignant : 1500 Benign)
# This gives the model a chance to actually see the cancer.
train_ben_downsampled = train_ben.sample(n=1500)

# Concatenate back together
train_df = pd.concat([train_mal, train_ben_downsampled])
val_df = pd.concat([val_mal, val_ben])
test_df = pd.concat([test_mal, test_ben])

# Shuffle them
train_df = train_df.sample(frac=1).reset_index(drop=True)
val_df = val_df.sample(frac=1).reset_index(drop=True)
test_df = test_df.sample(frac=1).reset_index(drop=True)

  df = pd.read_csv('../data/train-metadata.csv')


Total Malignant: 393
Total Benign: 400666


In [5]:
print(f"Training Set: {len(train_df)} images ({train_df['target'].sum()} Malignant)")
print(f"Val Set: {len(val_df)} images ({val_df['target'].sum()} Malignant)")
print(f"Test Set: {len(test_df)} images ({test_df['target'].sum()} Malignant)")

Training Set: 1793 images (293 Malignant)
Val Set: 1050 images (50 Malignant)
Test Set: 1050 images (50 Malignant)


In [6]:
train_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    # other augmentations for train dataset
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # ImageNet mean and std
                         std=[0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
test_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [7]:
train_ds = SkinLesionDataset(dataframe=train_df,
                             root_dir=Path('../data/train-image/image'),
                             transforms=train_transforms)
val_ds = SkinLesionDataset(dataframe=val_df,
                           root_dir=Path('../data/train-image/image'),
                           transforms=val_transforms)
test_ds = SkinLesionDataset(dataframe=test_df,
                            root_dir=Path('../data/train-image/image'),
                            transforms=test_transforms)

In [8]:
train_loader = DataLoader(train_ds, batch_size=32)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [9]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, 
                               out_channels=32, 
                               kernel_size=3, 
                               padding=1) # 32, 128, 128
        self.batchNorm1 = nn.BatchNorm2d(num_features=32)
        self.relu1 = nn.ReLU(inplace=True) # inplace saves gpu memory (vram), modifies input tensor directly in memory rather than creating a new tensor for the output
        self.pool1 = nn.MaxPool2d(kernel_size=2) # 32, 64, 64

        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(32 * 64 * 64, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.batchNorm1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.flatten(x)
        return self.fc(x)

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
# maybe add pos_weight to tell model to pay more attention to malignant cases
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
torch.cuda.is_available()

False

In [12]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train() # Set model to training mode (enables Dropout/BatchNorm)
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        # 1. Forward Pass
        outputs = model(images) 
        
        # Important: labels need to be float and shape [batch, 1] to match outputs
        loss = criterion(outputs, labels.view(-1, 1).float())
        
        # 2. Backward Pass
        optimizer.zero_grad() # Clear old gradients
        loss.backward()       # Calculate new gradients
        optimizer.step()      # Update weights
        
        # 3. Metrics
        running_loss += loss.item()
        
        # Convert raw logits to probabilities (Sigmoid) -> then round to 0 or 1
        predicted = torch.sigmoid(outputs) > 0.5
        total += labels.size(0)
        correct += (predicted.view(-1) == labels).sum().item()
        
    avg_loss = running_loss / len(loader)
    acc = 100 * correct / total
    return avg_loss, acc

def validate(model, loader, criterion, device):
    model.eval() # Set model to evaluation mode (freezes BatchNorm/Dropout)
    running_loss = 0.0
    correct = 0
    total = 0
    
    # No gradients needed for validation (saves memory)
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels.view(-1, 1).float())
            
            running_loss += loss.item()
            predicted = torch.sigmoid(outputs) > 0.5
            total += labels.size(0)
            correct += (predicted.view(-1) == labels).sum().item()
            
    avg_loss = running_loss / len(loader)
    acc = 100 * correct / total
    return avg_loss, acc

In [13]:
EPOCHS = 10

print("Starting Training...")

for epoch in range(EPOCHS):
    # Train
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    
    # Validate
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}]")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
    print("-" * 30)

Starting Training...


TypeError: Unexpected type <class 'numpy.ndarray'>