In [1]:
# ===============================
# 1. Import Required Libraries
# ===============================
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tqdm import tqdm



In [2]:
# ===============================
# 2. Check Device
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [4]:
# ===============================
# 3. Dataset Path and Transforms
# ===============================
from torchvision import transforms # <== avi
import random

class HideAndSeek:  # <== evtr
    def __init__(self, grid_size=4, hide_prob=0.5):
        """
        grid_size: number of grid blocks per side (e.g., 4x4)
        hide_prob: probability of hiding each block
        """
        self.grid_size = grid_size
        self.hide_prob = hide_prob

    def __call__(self, img_tensor):
        """
        img_tensor: (C, H, W) PyTorch Tensor
        """
        _, H, W = img_tensor.shape
        grid_h, grid_w = H // self.grid_size, W // self.grid_size

        for i in range(self.grid_size):
            for j in range(self.grid_size):
                if random.random() < self.hide_prob:
                    y1 = i * grid_h
                    y2 = (i + 1) * grid_h
                    x1 = j * grid_w
                    x2 = (j + 1) * grid_w
                    img_tensor[:, y1:y2, x1:x2] = 0.0  # Zero-out the block

        return img_tensor

# dataset_path = "data\source\CervicalCancer\JPEG\CROPPED"  # Update if different
dataset_path = "data\source\CervicalCancer\JPEG\CROPPED"


# Data augmentation and normalization
data_transforms = {
    "train": transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(20),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.ToTensor(),
        # HideAndSeek(grid_size=4, hide_prob=0.5), # <== evtr
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
        # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=[0.5, 0.5, 0.5])  # <== avi
    ]),
    "val_test": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}


  dataset_path = "data\source\CervicalCancer\JPEG\CROPPED"


In [6]:
# ===============================
# 4. Load Dataset
# ===============================
full_dataset = datasets.ImageFolder(root=dataset_path, transform=data_transforms["train"])
full_dataset_size = len(full_dataset)
class_names = full_dataset.classes
num_classes = len(class_names)

# Split per class to avoid leakage
def stratified_split(dataset, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    from collections import defaultdict
    from torch.utils.data import Subset

    class_indices = defaultdict(list)
    for idx, (_, label) in enumerate(dataset.samples):
        class_indices[label].append(idx)

    train_idx, val_idx, test_idx = [], [], []

    for indices in class_indices.values():
        n_total = len(indices)
        n_train = int(train_ratio * n_total)
        n_val = int(val_ratio * n_total)

        np.random.shuffle(indices)
        train_idx += indices[:n_train]
        val_idx += indices[n_train:n_train + n_val]
        test_idx += indices[n_train + n_val:]

    return Subset(dataset, train_idx), Subset(dataset, val_idx), Subset(dataset, test_idx)

train_set, val_set, test_set = stratified_split(full_dataset)

# Update transforms
train_set.dataset.transform = data_transforms["train"]
val_set.dataset.transform = data_transforms["val_test"]
test_set.dataset.transform = data_transforms["val_test"]

# Data loaders
batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

print(f"Train set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Test set size: {len(test_set)}")

print(f"Dataset size: {full_dataset_size}")
print(f"Number of classes: {num_classes}")
print(f"Class names: {class_names}")


import torchvision.utils as vutils
from torchvision.transforms.functional import to_pil_image
import os

# Directory to save test images
save_dir = "data/source/CervicalCancer/saved_test_images"
os.makedirs(save_dir, exist_ok=True)

# Get class names from the dataset
class_names = test_set.dataset.classes if hasattr(test_set.dataset, "classes") else []

for batch_idx, (images, labels) in enumerate(test_loader):
    for i in range(images.size(0)):
        img = images[i]
        label = labels[i].item()
        # Denormalize
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3,1,1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3,1,1)
        img_denorm = img * std + mean
        img_denorm = torch.clamp(img_denorm, 0, 1)
        pil_img = to_pil_image(img_denorm)
        # Build class subfolder
        class_folder = class_names[label] if class_names else str(label)
        out_dir = os.path.join(save_dir, class_folder)
        os.makedirs(out_dir, exist_ok=True)
        # Save with unique name
        out_path = os.path.join(out_dir, f"testimg_{batch_idx}_{i}.jpg")
        pil_img.save(out_path)

print(f"All test images saved to {save_dir}")


Train set size: 2427
Validation set size: 808
Test set size: 814
Dataset size: 4049
Number of classes: 5
Class names: ['Dyskeratotic', 'Koilocytotic', 'Metaplastic', 'Parabasal', 'Superficial-Intermediate']
All test images saved to data/source/CervicalCancer/saved_test_images


In [7]:
# ===============================
# 5. Define the Model (ResNet50)
# ===============================
model = models.resnet18(pretrained=True)

# Freeze early layers
for param in model.parameters():
    param.requires_grad = False

# Replace classifier for 7-class output
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),  # Dropout applied
    nn.Linear(512, num_classes)
)

model = model.to(device)





In [8]:
# ===============================
# 6. Define Loss, Optimizer
# ===============================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)  # Weight decay applied


In [9]:
# ===============================
# 7. Train the Model
# ===============================
def train_model(model, train_loader, val_loader, epochs=20):
    train_loss_log = []
    val_loss_log = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train, total_train = 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct_train += (preds == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_acc = correct_train / total_train

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val, total_val = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_acc = correct_val / total_val

        # Log
        train_loss_log.append(train_loss)
        val_loss_log.append(val_loss)

        print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, "
              f"Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

    return model, train_loss_log, val_loss_log

# Train now
model, train_loss_log, val_loss_log = train_model(model, train_loader, val_loader)


save_dir = "data/weights"
os.makedirs(save_dir, exist_ok=True)    # ← make sure the folder is there
torch.save(model.state_dict(), os.path.join(save_dir, "cancer_resnet18_random_plus.pth"))


Epoch 1/20: 100%|██████████| 456/456 [01:25<00:00,  5.31it/s]


Epoch 1: Train Loss = 0.7831, Train Acc = 0.7408, Val Loss = 0.4593, Val Acc = 0.8466


Epoch 2/20: 100%|██████████| 456/456 [00:17<00:00, 26.53it/s]


Epoch 2: Train Loss = 0.4647, Train Acc = 0.8414, Val Loss = 0.3624, Val Acc = 0.8746


Epoch 3/20: 100%|██████████| 456/456 [00:16<00:00, 27.34it/s]


Epoch 3: Train Loss = 0.3962, Train Acc = 0.8642, Val Loss = 0.3265, Val Acc = 0.8886


Epoch 4/20: 100%|██████████| 456/456 [00:16<00:00, 26.87it/s]


Epoch 4: Train Loss = 0.3586, Train Acc = 0.8740, Val Loss = 0.3034, Val Acc = 0.8936


Epoch 5/20: 100%|██████████| 456/456 [00:16<00:00, 26.86it/s]


Epoch 5: Train Loss = 0.3361, Train Acc = 0.8814, Val Loss = 0.2844, Val Acc = 0.9008


Epoch 6/20: 100%|██████████| 456/456 [00:17<00:00, 26.75it/s]


Epoch 6: Train Loss = 0.3194, Train Acc = 0.8878, Val Loss = 0.2688, Val Acc = 0.9045


Epoch 7/20: 100%|██████████| 456/456 [00:16<00:00, 26.95it/s]


Epoch 7: Train Loss = 0.3001, Train Acc = 0.8949, Val Loss = 0.2605, Val Acc = 0.9074


Epoch 8/20: 100%|██████████| 456/456 [00:16<00:00, 27.08it/s]


Epoch 8: Train Loss = 0.2851, Train Acc = 0.8990, Val Loss = 0.2500, Val Acc = 0.9102


Epoch 9/20: 100%|██████████| 456/456 [00:16<00:00, 27.08it/s]


Epoch 9: Train Loss = 0.2712, Train Acc = 0.9028, Val Loss = 0.2385, Val Acc = 0.9129


Epoch 10/20: 100%|██████████| 456/456 [00:16<00:00, 27.01it/s]


Epoch 10: Train Loss = 0.2698, Train Acc = 0.9032, Val Loss = 0.2438, Val Acc = 0.9137


Epoch 11/20: 100%|██████████| 456/456 [00:16<00:00, 27.05it/s]


Epoch 11: Train Loss = 0.2487, Train Acc = 0.9114, Val Loss = 0.2337, Val Acc = 0.9156


Epoch 12/20: 100%|██████████| 456/456 [00:16<00:00, 26.90it/s]


Epoch 12: Train Loss = 0.2418, Train Acc = 0.9123, Val Loss = 0.2203, Val Acc = 0.9191


Epoch 13/20: 100%|██████████| 456/456 [00:17<00:00, 26.71it/s]


Epoch 13: Train Loss = 0.2375, Train Acc = 0.9141, Val Loss = 0.2153, Val Acc = 0.9185


Epoch 14/20: 100%|██████████| 456/456 [00:16<00:00, 27.00it/s]


Epoch 14: Train Loss = 0.2313, Train Acc = 0.9177, Val Loss = 0.2082, Val Acc = 0.9228


Epoch 15/20: 100%|██████████| 456/456 [00:16<00:00, 26.85it/s]


Epoch 15: Train Loss = 0.2163, Train Acc = 0.9238, Val Loss = 0.2015, Val Acc = 0.9273


Epoch 16/20: 100%|██████████| 456/456 [00:16<00:00, 26.88it/s]


Epoch 16: Train Loss = 0.2127, Train Acc = 0.9217, Val Loss = 0.1935, Val Acc = 0.9284


Epoch 17/20: 100%|██████████| 456/456 [00:16<00:00, 26.94it/s]


Epoch 17: Train Loss = 0.2025, Train Acc = 0.9259, Val Loss = 0.2012, Val Acc = 0.9246


Epoch 18/20: 100%|██████████| 456/456 [00:16<00:00, 26.87it/s]


Epoch 18: Train Loss = 0.1994, Train Acc = 0.9308, Val Loss = 0.1852, Val Acc = 0.9333


Epoch 19/20: 100%|██████████| 456/456 [00:17<00:00, 26.78it/s]


Epoch 19: Train Loss = 0.1908, Train Acc = 0.9311, Val Loss = 0.1805, Val Acc = 0.9345


Epoch 20/20: 100%|██████████| 456/456 [00:16<00:00, 26.84it/s]


Epoch 20: Train Loss = 0.1853, Train Acc = 0.9331, Val Loss = 0.1767, Val Acc = 0.9364


In [8]:
# ===============================
# 8. Evaluate on Test Set
# ===============================
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Metrics
acc = accuracy_score(y_true, y_pred) * 100
f1 = f1_score(y_true, y_pred, average='macro')
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')

print("\n=== Final Test Metrics ===")
print(f"Accuracy: {acc:.2f}%")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


=== Final Test Metrics ===
Accuracy: 93.00%
F1 Score: 0.9300
Precision: 0.9304
Recall: 0.9298


In [None]:
# ===============================
# 9. K-fold cross validation
# ===============================
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Subset
import pandas as pd

# ===============================
# Prepare target labels for stratified sampling
# ===============================
targets = [sample[1] for sample in full_dataset.samples]
targets = np.array(targets)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []

# ===============================
# Cross-validation Loop
# ===============================
for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(targets)), targets)):
    print(f"\n------ Fold {fold + 1}/5 ------")

    # Create Subsets for this fold
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)

    # Apply correct transforms
    train_subset.dataset.transform = data_transforms["train"]
    val_subset.dataset.transform = data_transforms["val_test"]

    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    # ============================
    # Define new ResNet50 model
    # ============================
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False

    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, num_classes)
    )
    model = model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

    # ============================
    # Train on this fold
    # ============================
    model, _, _ = train_model(model, train_loader, val_loader, epochs=10)

    # ============================
    # Evaluate on this fold
    # ============================
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(preds.cpu().numpy())

    # Metrics
    acc = accuracy_score(y_true, y_pred) * 100
    f1 = f1_score(y_true, y_pred, average='macro')
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')

    print(f"Fold {fold + 1} - Accuracy: {acc:.2f}%, F1: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")
    fold_metrics.append([acc, f1, precision, recall])

# ===============================
# Average Performance Summary
# ===============================
df = pd.DataFrame(fold_metrics, columns=["Accuracy", "F1 Score", "Precision", "Recall"])
print("\n=== Cross-Validation Performance ===")
print(df.mean())



------ Fold 1/5 ------


Epoch 1/10: 100%|██████████| 102/102 [00:06<00:00, 14.61it/s]


Epoch 1: Train Loss = 0.9525, Train Acc = 0.7082, Val Loss = 0.5667, Val Acc = 0.8296


Epoch 2/10: 100%|██████████| 102/102 [00:06<00:00, 15.56it/s]


Epoch 2: Train Loss = 0.5279, Train Acc = 0.8259, Val Loss = 0.4122, Val Acc = 0.8716


Epoch 3/10: 100%|██████████| 102/102 [00:06<00:00, 15.56it/s]


Epoch 3: Train Loss = 0.4303, Train Acc = 0.8552, Val Loss = 0.3440, Val Acc = 0.8778


Epoch 4/10:  19%|█▊        | 19/102 [00:01<00:05, 15.56it/s]


KeyboardInterrupt: 

: 

In [None]:
# ===============================
# 10. Plot Loss Curve
# ===============================
def plot_loss_curves(train_log, val_log):
    plt.figure(figsize=(10, 6))
    plt.plot(train_log, label='Train Loss', marker='o')
    plt.plot(val_log, label='Validation Loss', marker='x')
    plt.title("Training vs Validation Loss Curve")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

plot_loss_curves(train_loss_log, val_loss_log)
