In [None]:
# --- STEP 1: Install Dependencies ---
!pip install opencv-python
!pip install timm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import timm
import numpy as np
import cv2
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from google.colab import files


In [None]:
# --- STEP 2: Upload kaggle.json and Download Dataset ---
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia
!unzip chest-xray-pneumonia.zip


In [None]:

# --- STEP 3: CLAHE Preprocessing with Augmentation ---
def preprocess_with_clahe(img_path):
    gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(gray)
    rgb_img = cv2.cvtColor(clahe_img, cv2.COLOR_GRAY2RGB)
    pil_img = Image.fromarray(rgb_img)

    transform_pipeline = transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
        transforms.RandomAffine(degrees=5, translate=(0.05, 0.05)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(5),
        transforms.ColorJitter(brightness=0.05, contrast=0.05),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    return transform_pipeline(pil_img)


In [None]:
# --- STEP 4: Dataset Class ---
class XrayDataset(Dataset):
    def __init__(self, image_paths, labels, transform_fn):
        self.image_paths = image_paths
        self.labels = labels
        self.transform_fn = transform_fn

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img_tensor = self.transform_fn(img_path)
        label = self.labels[idx]
        return img_tensor, torch.tensor(label)

In [None]:
# --- STEP 5: Load Data ---
train_dir = '/content/chest_xray/train'
all_images = []
all_labels = []

for label, class_name in enumerate(['NORMAL', 'PNEUMONIA']):
    class_dir = os.path.join(train_dir, class_name)
    for file in os.listdir(class_dir):
        if file.endswith('.jpeg'):
            all_images.append(os.path.join(class_dir, file))
            all_labels.append(label)

train_imgs, val_imgs, train_lbls, val_lbls = train_test_split(
    all_images, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

train_dataset = XrayDataset(train_imgs, train_lbls, preprocess_with_clahe)
val_dataset = XrayDataset(val_imgs, val_lbls, preprocess_with_clahe)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)