<a href="https://colab.research.google.com/github/aksharagupta082007/Pneumonia-Disease-Prediction-and-Anomaly-Detection-Using-X-ray-Images/blob/main/Yet_another_copy_of_RYANnewHYBRID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📘 Hybrid CNN + ViT Pneumonia Detection — Full Pipeline (with Merged Datasets)

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import datasets, transforms
import timm
import cv2
import numpy as np
import os
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [None]:
# ✅ Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ✅ Upload kaggle.json manually
from google.colab import files
files.upload()  # Upload kaggle.json when prompted

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# ✅ Download and unzip Kaggle dataset
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia -p /content/kaggle_data --unzip

# ✅ Unzip your custom dataset from Drive
!unzip -q '/content/drive/MyDrive/Copy of chest_xray-20250726T110654Z-1-001.zip' -d /content/custom_data

# ✅ Set custom dataset path
custom_data = '/content/custom_data'
kaggle_data = '/content/kaggle_data/chest_xray'


Mounted at /content/drive


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
License(s): other
Downloading chest-xray-pneumonia.zip to /content/kaggle_data
100% 2.29G/2.29G [00:16<00:00, 22.8MB/s]
100% 2.29G/2.29G [00:16<00:00, 149MB/s] 


In [None]:
class CLAHETransform:
    def __call__(self, img):
        img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        cl_img = clahe.apply(img_cv)
        return Image.fromarray(cl_img).convert('RGB')

transform = transforms.Compose([
    CLAHETransform(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import ConcatDataset, DataLoader
import os

# ✅ Define paths
custom_data = '/content/custom_data/chest_xray'
kaggle_data = '/content/kaggle_data/chest_xray'

# ✅ Define transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# ✅ Helper to safely load a dataset or return empty
def safe_load(folder, split_name):
    path = os.path.join(folder, split_name)
    if os.path.exists(path):
        print(f"✅ Found {split_name} data in {folder}")
        return datasets.ImageFolder(path, transform=transform)
    else:
        print(f"⚠️  Missing {split_name} data in {folder}")
        return None

# ✅ Load datasets
train_ds_list = []
val_ds_list = []
test_ds_list = []

for folder in [custom_data, kaggle_data]:
    train = safe_load(folder, 'train')
    val = safe_load(folder, 'val')
    test = safe_load(folder, 'test')
    if train: train_ds_list.append(train)
    if val: val_ds_list.append(val)
    if test: test_ds_list.append(test)

# ✅ Concatenate available datasets
train_ds = ConcatDataset(train_ds_list)
val_ds = ConcatDataset(val_ds_list)
test_ds = ConcatDataset(test_ds_list)

# ✅ Print dataset stats
print(f"\n📊 Total training images: {len(train_ds)}")
print(f"📊 Total validation images: {len(val_ds)}")
print(f"📊 Total test images: {len(test_ds)}")

# ✅ Dataloaders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=2)


⚠️  Missing train data in /content/custom_data/chest_xray
⚠️  Missing val data in /content/custom_data/chest_xray
⚠️  Missing test data in /content/custom_data/chest_xray
✅ Found train data in /content/kaggle_data/chest_xray
✅ Found val data in /content/kaggle_data/chest_xray
✅ Found test data in /content/kaggle_data/chest_xray

📊 Total training images: 5216
📊 Total validation images: 16
📊 Total test images: 624


In [None]:
import torch.nn as nn
from torchvision import models
from transformers import ViTModel

class HybridCNNViT(nn.Module):
    def __init__(self):
        super(HybridCNNViT, self).__init__()
        densenet = models.densenet121(pretrained=True)
        self.cnn_features = densenet.features
        self.cnn_pool = nn.AdaptiveAvgPool2d((1, 1))

        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224')
        for param in self.vit.parameters():
            param.requires_grad = False

        self.fc = nn.Sequential(
            nn.Linear(1024 + 768, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 2)
        )

    def forward(self, x):
        cnn_out = self.cnn_features(x)
        cnn_out = self.cnn_pool(cnn_out).view(x.size(0), -1)

        vit_out = self.vit(pixel_values=x).last_hidden_state[:, 0, :]
        out = torch.cat((cnn_out, vit_out), dim=1)
        return self.fc(out)


In [None]:
# ✅ Training Setup and Loop

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

# ✅ Device Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
hybrid_model = HybridCNNViT().to(device)

from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
from collections import Counter
import torch

# Compute class weights based on training dataset distribution
class_counts = Counter(train_dataset.targets)  # Assuming targets are in .targets
total_samples = sum(class_counts.values())
class_weights = [total_samples / class_counts[i] for i in range(len(class_counts))]
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

# Use weighted cross-entropy loss
criterion = CrossEntropyLoss(weight=class_weights)
optimizer = optim.Adam(hybrid_model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

In [None]:
# ✅ Training Function
def train_model(model, criterion, optimizer, scheduler, train_loader, val_loader, device, num_epochs=10):
    best_val_loss = float('inf')
    patience, max_patience = 0, 5

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / total
        epoch_acc = correct / total
        print(f"Train Loss: {epoch_loss:.4f} | Train Acc: {epoch_acc:.4f}")

        # ✅ Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss = val_loss / val_total
        val_acc = val_correct / val_total
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

        scheduler.step(val_loss)

        # ✅ Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_hybrid_model.pth")
            print("✅ Model Saved!")
            patience = 0
        else:
            patience += 1
            if patience >= max_patience:
                print("⏹️ Early stopping triggered.")
                break

# ✅ Call the training
train_model(hybrid_model, criterion, optimizer, scheduler, train_loader, val_loader, device, num_epochs=10)


Epoch 1/10


100%|██████████| 327/327 [06:13<00:00,  1.14s/it]

Train Loss: 0.0874 | Train Acc: 0.9647





Val Loss: 0.0547 | Val Acc: 1.0000
✅ Model Saved!

Epoch 2/10


100%|██████████| 327/327 [06:03<00:00,  1.11s/it]

Train Loss: 0.0434 | Train Acc: 0.9850





Val Loss: 0.1536 | Val Acc: 0.9062

Epoch 3/10


100%|██████████| 327/327 [06:19<00:00,  1.16s/it]

Train Loss: 0.0297 | Train Acc: 0.9896





Val Loss: 0.3532 | Val Acc: 0.7812

Epoch 4/10


100%|██████████| 327/327 [05:54<00:00,  1.08s/it]

Train Loss: 0.0215 | Train Acc: 0.9931





Val Loss: 0.0918 | Val Acc: 0.9375

Epoch 5/10


100%|██████████| 327/327 [05:51<00:00,  1.07s/it]

Train Loss: 0.0079 | Train Acc: 0.9973





Val Loss: 0.1838 | Val Acc: 0.8438

Epoch 6/10


100%|██████████| 327/327 [05:52<00:00,  1.08s/it]

Train Loss: 0.0051 | Train Acc: 0.9984





Val Loss: 0.3513 | Val Acc: 0.9062
⏹️ Early stopping triggered.
