In [2]:
# --- 1. SETUP AND IMPORTS ---
!pip install -q timm pandas openpyxl

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.v2 as transforms
from PIL import Image
import csv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from tqdm.notebook import tqdm
import timm

# --- 2. CONFIGURATION ---
DATASET_INPUT_NAME = 'capstone-dataset'
BASE_PATH = os.path.join('/kaggle/input/', DATASET_INPUT_NAME, 'dataset')
RAW_PATH = os.path.join(BASE_PATH, 'images/')
SPLIT_PATH = os.path.join(BASE_PATH, 'splits/')

# Model & Training Hyperparameters
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS_PER_FOLD = 10
LEARNING_RATE = 1e-4
NUM_FOLDS = 5

# --- NEW: List of CNN models to test ---
CNN_MODELS_TO_TEST = [
    'mobilenetv2_100'
]

# Dataset specific constants (unchanged)
mean = [0.19159977371889528, 0.12109632404575055, 0.11253919439439257]
std = [0.0726453726937946, 0.06086047143439545, 0.05632158600067249]
classes = {
    'normal_esophagus': {'label': 0},
    'esophageal_protruded_lesions': {'label': 1},
    'esophagitis': {'label': 2},
    'barrett_esophagus': {'label': 3},
    'esophageal_cancer': {'label': 4},
}
class_num = len(classes)
class_names = list(classes.keys())

# --- 3. DATASET & TRANSFORMS (Unchanged) ---
train_transform = transforms.Compose([
    transforms.Resize([IMG_SIZE, IMG_SIZE]),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])
val_transform = transforms.Compose([
    transforms.Resize([IMG_SIZE, IMG_SIZE]),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

class Esophagus_Dataset(Dataset):
    # (Dataset class remains unchanged from the last correct version)
    def __init__(self, raw_path, split_path, task, is_train=True):
        self.raw_path = raw_path
        self.image_files = []
        self.transform = train_transform if is_train else val_transform
        for cl_name, cl_info in classes.items():
            label = cl_info['label']
            split_file = os.path.join(split_path, cl_name, task + '.csv')
            if not os.path.exists(split_file): continue
            try:
                with open(split_file, newline='') as f:
                    reader = csv.reader(f)
                    for row in reader:
                        file_name = str(row[0])
                        img_path = os.path.join(self.raw_path, cl_name, file_name)
                        if os.path.exists(img_path):
                            self.image_files.append((img_path, label))
            except Exception as e:
                print(f"Error reading {split_file}: {e}")
    def __len__(self):
        return len(self.image_files)
    def __getitem__(self, idx):
        img_path, label = self.image_files[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# --- 4. MODEL DEFINITION (MODIFIED to be flexible) ---
def create_cnn_model(model_name, num_classes=class_num, pretrained=True):
    """Creates a pre-trained CNN model from a given name."""
    model = timm.create_model(
        model_name,
        pretrained=pretrained,
        num_classes=num_classes
    )
    return model.to(DEVICE)

# --- 5. TRAINING & EVALUATION FUNCTIONS (Unchanged) ---
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        progress_bar.set_postfix(loss=loss.item())
    return running_loss / total, 100 * correct / total

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    progress_bar = tqdm(dataloader, desc="Validation", leave=False)
    with torch.no_grad():
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return running_loss / total, 100 * correct / total, all_preds, all_labels

# --- 6. MAIN EXPERIMENT LOOP (NEW) ---
print(f"Using device: {DEVICE}")

# Dictionary to store the final results for each model
all_model_results = {}

# Loop over each model name
for model_name in CNN_MODELS_TO_TEST:
    print(f"\n\n" + "#"*50)
    print(f"### Starting Experiment for Model: {model_name.upper()} ###")
    print("#"*50 + "\n")

    fold_accuracies = []
    criterion = nn.CrossEntropyLoss()

    for fold in range(NUM_FOLDS):
        print(f"\n" + "="*25 + f" FOLD {fold} " + "="*25)
        
        train_task = f'train{fold}'
        val_task = f'val{fold}'
        
        train_dataset = Esophagus_Dataset(RAW_PATH, SPLIT_PATH, task=train_task, is_train=True)
        val_dataset = Esophagus_Dataset(RAW_PATH, SPLIT_PATH, task=val_task, is_train=False)
        
        if len(train_dataset) == 0 or len(val_dataset) == 0:
            print(f"Skipping Fold {fold} due to empty dataset.")
            continue

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
        val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

        print(f"Fold {fold}: {len(train_dataset)} training images, {len(val_dataset)} validation images.")
        
        model = create_cnn_model(model_name)
        optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS_PER_FOLD)
        
        best_val_acc = 0.0
        
        for epoch in range(EPOCHS_PER_FOLD):
            train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, DEVICE)
            val_loss, val_acc, _, _ = evaluate(model, val_loader, criterion, DEVICE)
            scheduler.step()
            
            print(f"Epoch {epoch+1}/{EPOCHS_PER_FOLD} -> Train Loss: {train_loss:.4f}, Acc: {train_acc:.2f}% | Val Loss: {val_loss:.4f}, Acc: {val_acc:.2f}%")
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
        
        fold_accuracies.append(best_val_acc)
        print(f"\n--- Best Validation Accuracy for Fold {fold}: {best_val_acc:.2f}% ---")

    # --- Store results for this model ---
    if fold_accuracies:
        mean_acc = np.mean(fold_accuracies)
        std_acc = np.std(fold_accuracies)
        all_model_results[model_name] = {'mean_accuracy': mean_acc, 'std_dev': std_acc}
        print(f"\n--- Results for {model_name} ---")
        print(f"Average Accuracy: {mean_acc:.2f}% | Std Dev: {std_acc:.2f}%")

# --- 7. FINAL COMPARISON SUMMARY ---
print("\n\n" + "="*20 + " All Experiments Finished " + "="*20 + "\n")
print("Final Comparison of CNN Architectures (Average Accuracy across 5 Folds):\n")

# Adding your ViT result for a complete comparison
all_model_results['vision_transformer'] = {'mean_accuracy': 95.43, 'std_dev': 1.26}

# Create a pandas DataFrame for a nice table
results_df = pd.DataFrame.from_dict(all_model_results, orient='index')
results_df = results_df.sort_values(by='mean_accuracy', ascending=False)
results_df['mean_accuracy'] = results_df['mean_accuracy'].map('{:.2f}%'.format)
results_df['std_dev'] = results_df['std_dev'].map('{:.2f}%'.format)

print(results_df)

Using device: cpu


##################################################
### Starting Experiment for Model: MOBILENETV2_100 ###
##################################################






Fold 0: 717 training images, 178 validation images.


model.safetensors:   0%|          | 0.00/14.2M [00:00<?, ?B/s]

Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 2.6589, Acc: 35.01% | Val Loss: 1.6771, Acc: 53.93%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 1.2723, Acc: 62.20% | Val Loss: 1.1293, Acc: 62.92%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 0.9792, Acc: 70.43% | Val Loss: 0.9129, Acc: 67.98%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.7993, Acc: 73.64% | Val Loss: 0.8570, Acc: 68.54%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.7205, Acc: 76.85% | Val Loss: 0.8000, Acc: 69.66%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.6752, Acc: 76.85% | Val Loss: 0.7886, Acc: 69.66%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.6331, Acc: 78.80% | Val Loss: 0.7742, Acc: 71.35%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.5798, Acc: 81.45% | Val Loss: 0.7581, Acc: 72.47%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.5532, Acc: 78.66% | Val Loss: 0.7733, Acc: 71.91%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.5632, Acc: 79.78% | Val Loss: 0.7185, Acc: 73.60%

--- Best Validation Accuracy for Fold 0: 73.60% ---

Fold 1: 717 training images, 178 validation images.


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 2.7885, Acc: 35.29% | Val Loss: 1.5686, Acc: 60.11%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 1.5280, Acc: 58.72% | Val Loss: 1.1519, Acc: 66.85%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 1.0112, Acc: 70.29% | Val Loss: 1.0139, Acc: 67.42%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.8010, Acc: 75.03% | Val Loss: 0.8725, Acc: 68.54%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.7049, Acc: 75.59% | Val Loss: 0.8321, Acc: 70.22%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.6254, Acc: 78.94% | Val Loss: 0.8110, Acc: 73.60%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.5563, Acc: 81.59% | Val Loss: 0.7702, Acc: 71.91%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.5318, Acc: 81.73% | Val Loss: 0.7781, Acc: 73.60%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.5722, Acc: 81.17% | Val Loss: 0.7860, Acc: 74.72%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.5495, Acc: 82.01% | Val Loss: 0.7697, Acc: 75.28%

--- Best Validation Accuracy for Fold 1: 75.28% ---

Fold 2: 717 training images, 178 validation images.


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 2.3091, Acc: 40.45% | Val Loss: 1.4981, Acc: 59.55%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 1.3606, Acc: 60.53% | Val Loss: 1.0506, Acc: 64.04%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 0.9873, Acc: 69.18% | Val Loss: 0.9449, Acc: 69.10%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.7796, Acc: 75.59% | Val Loss: 0.8108, Acc: 73.60%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.7007, Acc: 77.41% | Val Loss: 0.7031, Acc: 76.97%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.5976, Acc: 79.78% | Val Loss: 0.6764, Acc: 78.65%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.5959, Acc: 79.78% | Val Loss: 0.6919, Acc: 76.97%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.5465, Acc: 81.17% | Val Loss: 0.6485, Acc: 80.90%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.5343, Acc: 81.87% | Val Loss: 0.6482, Acc: 78.09%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.4825, Acc: 84.66% | Val Loss: 0.6260, Acc: 79.78%

--- Best Validation Accuracy for Fold 2: 80.90% ---

Fold 3: 717 training images, 178 validation images.


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 2.3303, Acc: 40.86% | Val Loss: 1.7314, Acc: 57.30%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 1.2298, Acc: 63.32% | Val Loss: 1.3637, Acc: 60.67%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 0.8952, Acc: 71.97% | Val Loss: 1.1867, Acc: 65.17%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.6934, Acc: 79.64% | Val Loss: 1.0033, Acc: 69.66%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.6515, Acc: 79.64% | Val Loss: 1.0014, Acc: 69.10%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.5938, Acc: 79.92% | Val Loss: 0.9574, Acc: 70.79%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.5622, Acc: 81.87% | Val Loss: 0.8919, Acc: 74.16%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.5356, Acc: 82.15% | Val Loss: 0.8882, Acc: 69.10%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.5353, Acc: 82.85% | Val Loss: 0.9004, Acc: 72.47%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.5249, Acc: 82.15% | Val Loss: 0.8978, Acc: 71.91%

--- Best Validation Accuracy for Fold 3: 74.16% ---

Fold 4: 712 training images, 183 validation images.


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/10 -> Train Loss: 2.9763, Acc: 38.90% | Val Loss: 2.5876, Acc: 43.72%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 2/10 -> Train Loss: 1.3782, Acc: 62.08% | Val Loss: 1.5522, Acc: 52.46%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 3/10 -> Train Loss: 1.0618, Acc: 68.68% | Val Loss: 1.1952, Acc: 60.66%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 4/10 -> Train Loss: 0.7829, Acc: 74.72% | Val Loss: 1.0142, Acc: 66.67%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 5/10 -> Train Loss: 0.7531, Acc: 74.58% | Val Loss: 0.9225, Acc: 68.31%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 6/10 -> Train Loss: 0.6839, Acc: 76.97% | Val Loss: 0.7985, Acc: 72.13%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 7/10 -> Train Loss: 0.6423, Acc: 79.21% | Val Loss: 0.8150, Acc: 68.85%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 8/10 -> Train Loss: 0.4812, Acc: 83.57% | Val Loss: 0.8219, Acc: 71.58%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 9/10 -> Train Loss: 0.6086, Acc: 81.88% | Val Loss: 0.7946, Acc: 72.13%


Training:   0%|          | 0/23 [00:00<?, ?it/s]

Validation:   0%|          | 0/6 [00:00<?, ?it/s]

Epoch 10/10 -> Train Loss: 0.5441, Acc: 81.74% | Val Loss: 0.7523, Acc: 72.68%

--- Best Validation Accuracy for Fold 4: 72.68% ---

--- Results for mobilenetv2_100 ---
Average Accuracy: 75.32% | Std Dev: 2.91%



Final Comparison of CNN Architectures (Average Accuracy across 5 Folds):

                   mean_accuracy std_dev
vision_transformer        95.43%   1.26%
mobilenetv2_100           75.32%   2.91%


In [3]:
import os
import torch

# ✅ Directory to save (Kaggle working dir)
MODEL_DIR = "/kaggle/working"
os.makedirs(MODEL_DIR, exist_ok=True)

# ✅ Save model state dict
mobilenet_path = os.path.join(MODEL_DIR, "mobilenet_v2_final.pth")
torch.save(model.state_dict(), mobilenet_path)

print(f"✅ MobileNetV2 model saved at: {mobilenet_path}")


✅ MobileNetV2 model saved at: /kaggle/working/mobilenet_v2_final.pth
