<a href="https://colab.research.google.com/github/chuanlintneoh/OvarianCystClassification/blob/main/PyTorch_Ovarian_Cyst_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import roc_auc_score, classification_report

Mounted at /content/drive


In [None]:
# Locate dataset
default_dir = '/content/drive/MyDrive/UM Artificial Intelligence/Y3S1/Ovarian Cyst Prediction/Implementation'
print(os.listdir(default_dir))
e2e_dir = os.path.join(default_dir, 'e2e.zip')
print(e2e_dir)
# Create temporary directory for session
extract_path = '/content/pytorch_ovarian_cyst_dataset'
os.makedirs(extract_path, exist_ok=True)
# Unzip file
with zipfile.ZipFile(e2e_dir, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
# Validate unzipped dataset contents
data_dir = os.path.join(extract_path, 'e2e')
print(os.listdir(data_dir))

['e2e.zip', 'PyTorch DenseNet121', 'TensorFlow DenseNet121']
/content/drive/MyDrive/UM Artificial Intelligence/Y3S1/Ovarian Cyst Prediction/Implementation/e2e.zip
['Benign', 'Malignant']


In [None]:
# Custom dataset class
class OvarianCancerDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        class_map = {"Benign": 0, "Malignant": 1}

        for class_name, label in class_map.items():
            class_folder = os.path.join(image_dir, class_name)
            for image_name in os.listdir(class_folder):
                img_path = os.path.join(class_folder, image_name)
                self.image_paths.append(img_path)
                self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = cv2.imread(self.image_paths[idx], cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # Convert grayscale to RGB
        img = cv2.resize(img, (224, 224))
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, torch.tensor(label, dtype=torch.long)

In [None]:
# Define transformations based on document preprocessing details
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random resize crop with 80-100% of image size
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance of horizontal flip
    transforms.RandomRotation(10),  # Rotate images randomly within ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, saturation, hue
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
# Load dataset
dataset = OvarianCancerDataset(data_dir, transform=transform)

In [None]:
# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
# Compute class weights
labels = np.array([label.item() for _, label in train_dataset])
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float32)

In [None]:
# Define model
model = models.densenet121(pretrained=True)
model.classifier = nn.Linear(1024, 1)  # Binary classification
model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights[1])
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 137MB/s]


In [None]:
# Training function
def train_model(model, train_loader, criterion, optimizer, epochs=30, scheduler=None):
    model.train()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    for epoch in range(epochs):
        total_loss = 0
        all_labels = []
        all_preds = []
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            if scheduler:
                scheduler.step()
            total_loss += loss.item()
            probs = torch.sigmoid(outputs).detach().cpu().numpy()
            all_preds.extend(probs)
            all_labels.extend(labels.cpu().numpy())
        auc = roc_auc_score(all_labels, all_preds)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, AUC: {auc:.4f}")

In [None]:
# Train model
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
train_model(model, train_loader, criterion, optimizer, scheduler=scheduler)

Epoch 1/30, Loss: 5.5395, AUC: 0.5533
Epoch 2/30, Loss: 5.2645, AUC: 0.6860
Epoch 3/30, Loss: 5.1409, AUC: 0.7463
Epoch 4/30, Loss: 4.6673, AUC: 0.8509
Epoch 5/30, Loss: 4.5318, AUC: 0.8572
Epoch 6/30, Loss: 4.3748, AUC: 0.8942
Epoch 7/30, Loss: 4.3404, AUC: 0.8720
Epoch 8/30, Loss: 4.6225, AUC: 0.8666
Epoch 9/30, Loss: 4.5348, AUC: 0.8889
Epoch 10/30, Loss: 4.3287, AUC: 0.8713
Epoch 11/30, Loss: 4.5992, AUC: 0.8791
Epoch 12/30, Loss: 4.2915, AUC: 0.8798
Epoch 13/30, Loss: 4.6540, AUC: 0.8743
Epoch 14/30, Loss: 4.7168, AUC: 0.8382
Epoch 15/30, Loss: 4.3367, AUC: 0.8666
Epoch 16/30, Loss: 4.4506, AUC: 0.8402
Epoch 17/30, Loss: 4.5289, AUC: 0.9026
Epoch 18/30, Loss: 4.3508, AUC: 0.8663
Epoch 19/30, Loss: 4.3426, AUC: 0.8753
Epoch 20/30, Loss: 4.6558, AUC: 0.8490
Epoch 21/30, Loss: 4.3776, AUC: 0.8594
Epoch 22/30, Loss: 4.7068, AUC: 0.8382
Epoch 23/30, Loss: 4.6533, AUC: 0.8550
Epoch 24/30, Loss: 4.4009, AUC: 0.8464
Epoch 25/30, Loss: 4.6530, AUC: 0.8590
Epoch 26/30, Loss: 4.3787, AUC: 0.

In [None]:
# Evaluation function
def evaluate_model(model, val_loader):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            probs = torch.sigmoid(outputs).detach().cpu().numpy()
            all_preds.extend(probs)
            all_labels.extend(labels.cpu().numpy())
    auc = roc_auc_score(all_labels, all_preds)
    print(f"Validation AUC: {auc:.4f}")
    print(classification_report(all_labels, (np.array(all_preds) > 0.5).astype(int), target_names=["Benign", "Malignant"]))

In [None]:
# Evaluate model
evaluate_model(model, val_loader)

Validation AUC: 0.7509
              precision    recall  f1-score   support

      Benign       0.80      0.77      0.79        31
   Malignant       0.63      0.67      0.65        18

    accuracy                           0.73        49
   macro avg       0.72      0.72      0.72        49
weighted avg       0.74      0.73      0.74        49

