In [1]:
# ===============================
# 1. Import Required Libraries
# ===============================
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
from tqdm import tqdm



In [2]:
# ===============================
# 2. Check Device
# ===============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
# ===============================
# 3. Dataset Path and Transforms
# ===============================
from torchvision import transforms # <== avi

dataset_path = "data\source\CervicalCancer\JPEG\CROPPED_modified_1-2"  # Update if different

# Data augmentation and normalization
data_transforms = {
    "train": transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(20),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225]),
        # transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=[0.5, 0.5, 0.5])  # <== avi
    ]),
    "val_test": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}


  dataset_path = "data\source\CervicalCancer\JPEG\CROPPED_modified_1-10"  # Update if different


In [4]:
# ===============================
# 4. Load Dataset
# ===============================
full_dataset = datasets.ImageFolder(root=dataset_path, transform=data_transforms["train"])
full_dataset_size = len(full_dataset)
class_names = full_dataset.classes
num_classes = len(class_names)

# Split per class to avoid leakage
def stratified_split(dataset, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
    from collections import defaultdict
    from torch.utils.data import Subset

    class_indices = defaultdict(list)
    for idx, (_, label) in enumerate(dataset.samples):
        class_indices[label].append(idx)

    train_idx, val_idx, test_idx = [], [], []

    for indices in class_indices.values():
        n_total = len(indices)
        n_train = int(train_ratio * n_total)
        n_val = int(val_ratio * n_total)

        np.random.shuffle(indices)
        train_idx += indices[:n_train]
        val_idx += indices[n_train:n_train + n_val]
        test_idx += indices[n_train + n_val:]

    return Subset(dataset, train_idx), Subset(dataset, val_idx), Subset(dataset, test_idx)

train_set, val_set, test_set = stratified_split(full_dataset)

# Update transforms
train_set.dataset.transform = data_transforms["train"]
val_set.dataset.transform = data_transforms["val_test"]
test_set.dataset.transform = data_transforms["val_test"]

# Data loaders
batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


print(f"Dataset size: {full_dataset_size}")
print(f"Number of classes: {num_classes}")
print(f"Class names: {class_names}")


Dataset size: 8098
Number of classes: 5
Class names: ['Dyskeratotic', 'Koilocytotic', 'Metaplastic', 'Parabasal', 'Superficial-Intermediate']


In [5]:
# ===============================
# 5. Define the Model (ResNet50)
# ===============================
model = models.resnet18(pretrained=True)

# Freeze early layers
for param in model.parameters():
    param.requires_grad = False

# Replace classifier for 7-class output
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),  # Dropout applied
    nn.Linear(512, num_classes)
)

model = model.to(device)





In [6]:
# ===============================
# 6. Define Loss, Optimizer
# ===============================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)  # Weight decay applied


In [None]:
# ===============================
# 7. Train the Model
# ===============================
def train_model(model, train_loader, val_loader, epochs=20):
    train_loss_log = []
    val_loss_log = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct_train, total_train = 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct_train += (preds == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_acc = correct_train / total_train

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val, total_val = 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)

        val_loss /= len(val_loader)
        val_acc = correct_val / total_val

        # Log
        train_loss_log.append(train_loss)
        val_loss_log.append(val_loss)

        print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Acc = {train_acc:.4f}, "
              f"Val Loss = {val_loss:.4f}, Val Acc = {val_acc:.4f}")

    return model, train_loss_log, val_loss_log

# Train now
model, train_loss_log, val_loss_log = train_model(model, train_loader, val_loader)


save_dir = "data/weights"
os.makedirs(save_dir, exist_ok=True)    # ← make sure the folder is there
torch.save(model.state_dict(), os.path.join(save_dir, "cancer_resnet18_modified_1-2.pth"))


Epoch 1/20: 100%|██████████| 152/152 [00:25<00:00,  5.91it/s]


Epoch 1: Train Loss = 1.1514, Train Acc = 0.6059, Val Loss = 0.7365, Val Acc = 0.8035


Epoch 2/20: 100%|██████████| 152/152 [00:06<00:00, 23.91it/s]


Epoch 2: Train Loss = 0.7143, Train Acc = 0.7692, Val Loss = 0.5167, Val Acc = 0.8492


Epoch 3/20: 100%|██████████| 152/152 [00:06<00:00, 23.98it/s]


Epoch 3: Train Loss = 0.5675, Train Acc = 0.8112, Val Loss = 0.4558, Val Acc = 0.8473


Epoch 4/20: 100%|██████████| 152/152 [00:06<00:00, 23.98it/s]


Epoch 4: Train Loss = 0.5024, Train Acc = 0.8324, Val Loss = 0.4055, Val Acc = 0.8640


Epoch 5/20: 100%|██████████| 152/152 [00:06<00:00, 24.07it/s]


Epoch 5: Train Loss = 0.4648, Train Acc = 0.8404, Val Loss = 0.3735, Val Acc = 0.8782


Epoch 6/20: 100%|██████████| 152/152 [00:06<00:00, 24.05it/s]


Epoch 6: Train Loss = 0.4455, Train Acc = 0.8513, Val Loss = 0.3604, Val Acc = 0.8789


Epoch 7/20: 100%|██████████| 152/152 [00:06<00:00, 23.41it/s]


Epoch 7: Train Loss = 0.4154, Train Acc = 0.8573, Val Loss = 0.3408, Val Acc = 0.8832


Epoch 8/20: 100%|██████████| 152/152 [00:06<00:00, 23.18it/s]


Epoch 8: Train Loss = 0.4004, Train Acc = 0.8565, Val Loss = 0.3404, Val Acc = 0.8844


Epoch 9/20: 100%|██████████| 152/152 [00:06<00:00, 22.81it/s]


Epoch 9: Train Loss = 0.3806, Train Acc = 0.8686, Val Loss = 0.3234, Val Acc = 0.8912


Epoch 10/20: 100%|██████████| 152/152 [00:06<00:00, 23.27it/s]


Epoch 10: Train Loss = 0.3698, Train Acc = 0.8726, Val Loss = 0.3226, Val Acc = 0.8869


Epoch 11/20: 100%|██████████| 152/152 [00:06<00:00, 23.78it/s]


Epoch 11: Train Loss = 0.3685, Train Acc = 0.8684, Val Loss = 0.3162, Val Acc = 0.8844


Epoch 12/20: 100%|██████████| 152/152 [00:06<00:00, 23.81it/s]


Epoch 12: Train Loss = 0.3395, Train Acc = 0.8822, Val Loss = 0.3084, Val Acc = 0.8912


Epoch 13/20: 100%|██████████| 152/152 [00:06<00:00, 24.06it/s]


Epoch 13: Train Loss = 0.3568, Train Acc = 0.8752, Val Loss = 0.3128, Val Acc = 0.8881


Epoch 14/20: 100%|██████████| 152/152 [00:06<00:00, 23.71it/s]


Epoch 14: Train Loss = 0.3331, Train Acc = 0.8826, Val Loss = 0.3011, Val Acc = 0.8993


Epoch 15/20: 100%|██████████| 152/152 [00:06<00:00, 24.00it/s]


Epoch 15: Train Loss = 0.3314, Train Acc = 0.8866, Val Loss = 0.2996, Val Acc = 0.8956


Epoch 16/20: 100%|██████████| 152/152 [00:06<00:00, 23.95it/s]


Epoch 16: Train Loss = 0.3220, Train Acc = 0.8847, Val Loss = 0.3128, Val Acc = 0.8844


Epoch 17/20: 100%|██████████| 152/152 [00:06<00:00, 23.95it/s]


Epoch 17: Train Loss = 0.3147, Train Acc = 0.8903, Val Loss = 0.2964, Val Acc = 0.8956


Epoch 18/20: 100%|██████████| 152/152 [00:06<00:00, 23.76it/s]


Epoch 18: Train Loss = 0.3105, Train Acc = 0.8925, Val Loss = 0.2886, Val Acc = 0.8980


Epoch 19/20: 100%|██████████| 152/152 [00:06<00:00, 23.85it/s]


Epoch 19: Train Loss = 0.2961, Train Acc = 0.8917, Val Loss = 0.2997, Val Acc = 0.8912


Epoch 20/20: 100%|██████████| 152/152 [00:06<00:00, 24.00it/s]


Epoch 20: Train Loss = 0.2982, Train Acc = 0.8944, Val Loss = 0.2882, Val Acc = 0.8974


In [8]:
# ===============================
# 8. Evaluate on Test Set
# ===============================
model.eval()
y_true, y_pred = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Metrics
acc = accuracy_score(y_true, y_pred) * 100
f1 = f1_score(y_true, y_pred, average='macro')
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')

print("\n=== Final Test Metrics ===")
print(f"Accuracy: {acc:.2f}%")
print(f"F1 Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")


=== Final Test Metrics ===
Accuracy: 88.60%
F1 Score: 0.8867
Precision: 0.8874
Recall: 0.8862


In [9]:
# ===============================
# 9. K-fold cross validation
# ===============================
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Subset
import pandas as pd

# ===============================
# Prepare target labels for stratified sampling
# ===============================
targets = [sample[1] for sample in full_dataset.samples]
targets = np.array(targets)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []

# ===============================
# Cross-validation Loop
# ===============================
for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(targets)), targets)):
    print(f"\n------ Fold {fold + 1}/5 ------")

    # Create Subsets for this fold
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)

    # Apply correct transforms
    train_subset.dataset.transform = data_transforms["train"]
    val_subset.dataset.transform = data_transforms["val_test"]

    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

    # ============================
    # Define new ResNet50 model
    # ============================
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False

    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, num_classes)
    )
    model = model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

    # ============================
    # Train on this fold
    # ============================
    model, _, _ = train_model(model, train_loader, val_loader, epochs=10)

    # ============================
    # Evaluate on this fold
    # ============================
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            y_true.extend(labels.numpy())
            y_pred.extend(preds.cpu().numpy())

    # Metrics
    acc = accuracy_score(y_true, y_pred) * 100
    f1 = f1_score(y_true, y_pred, average='macro')
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')

    print(f"Fold {fold + 1} - Accuracy: {acc:.2f}%, F1: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")
    fold_metrics.append([acc, f1, precision, recall])

# ===============================
# Average Performance Summary
# ===============================
df = pd.DataFrame(fold_metrics, columns=["Accuracy", "F1 Score", "Precision", "Recall"])
print("\n=== Cross-Validation Performance ===")
print(df.mean())



------ Fold 1/5 ------


Epoch 1/10: 100%|██████████| 203/203 [00:11<00:00, 17.13it/s]


Epoch 1: Train Loss = 0.9362, Train Acc = 0.6964, Val Loss = 0.5422, Val Acc = 0.8389


Epoch 2/10: 100%|██████████| 203/203 [00:11<00:00, 17.34it/s]


Epoch 2: Train Loss = 0.5459, Train Acc = 0.8194, Val Loss = 0.4123, Val Acc = 0.8667


Epoch 3/10:  36%|███▌      | 73/203 [00:04<00:07, 17.20it/s]


KeyboardInterrupt: 

In [None]:
# ===============================
# 10. Plot Loss Curve
# ===============================
def plot_loss_curves(train_log, val_log):
    plt.figure(figsize=(10, 6))
    plt.plot(train_log, label='Train Loss', marker='o')
    plt.plot(val_log, label='Validation Loss', marker='x')
    plt.title("Training vs Validation Loss Curve")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

plot_loss_curves(train_loss_log, val_loss_log)
