In [2]:
#Download the data

import kagglehub
import shutil
import os

# Download to kagglehub's default cache location
cached_path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")

print('default path: ',cached_path)
# Your desired custom folder
destination = "../data/raw/"

# Copy dataset to your preferred location
# shutil.copytree(cached_path, destination, dirs_exist_ok=True)
shutil.move(cached_path, destination)

print(f"Dataset moved to: {destination}")


  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/chest-xray-pneumonia?dataset_version_number=2...


100%|██████████| 2.29G/2.29G [04:05<00:00, 10.0MB/s]

Extracting files...





default path:  /home/amar/.cache/kagglehub/datasets/paultimothymooney/chest-xray-pneumonia/versions/2
Dataset copied to: ../data/raw/


### Data preprocessing steps

- Inspect images: Detect corrupted files
- Resize to fixed size: Standard input for CNNs
- Normalize:	Faster convergence
- Convert to Tensor:	Required for model input
- Data Augmentation (train only):	Improve generalization
- Balanced classes:	Avoid bias towards one class
- Dataloaders:	Efficient training pipeline


In [17]:
# see if there any image is corrupted
from PIL import Image
import os

def inspect_images(parent_dir):
    for split in os.listdir(parent_dir):  # train, test, val
        split_path = os.path.join(parent_dir, split)
        if not os.path.isdir(split_path):
            continue
        for cls in os.listdir(split_path):  # NORMAL, PNEUMONIA
            class_path = os.path.join(split_path, cls)
            if not os.path.isdir(class_path):
                continue
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                try:
                    with Image.open(img_path) as img:
                        img.verify()  # Check for corruption
                except Exception as e:
                    print(f"Corrupted image: {img_path}, Error: {e}")



DATA_DIR = "/home/amar/amar/MLOps_project/MLOps_chest_xray_pneumonia/data/raw/chest_xray"
inspect_images(parent_dir)
print("No Corrupted image found")


No Corrupted image found


In [18]:
import os
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

# ========== Step 1: Check for corrupted images ==========
def check_corrupted_images(root_dir):
    print("🔍 Checking for corrupted images...")
    for phase in ['train', 'val', 'test']:
        phase_path = os.path.join(root_dir, phase)
        for cls in os.listdir(phase_path):
            class_path = os.path.join(phase_path, cls)
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                try:
                    img = Image.open(img_path)
                    img.verify()
                except Exception as e:
                    print(f"⚠️ Corrupted image found: {img_path} — {e}")

# ========== Step 2: Define Transforms ==========
IMG_SIZE = 224
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

val_test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# ========== Step 3: Load Datasets ==========
def load_datasets(data_dir):
    print("📦 Loading datasets...")
    train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_transform)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=val_test_transform)
    test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=val_test_transform)
    return train_dataset, val_dataset, test_dataset

# ========== Step 4: Create DataLoaders ==========
def create_dataloaders(train_ds, val_ds, test_ds, batch_size=32):
    print("🚚 Creating dataloaders...")
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)
    return train_loader, val_loader, test_loader

# ========== Main Execution ==========
if __name__ == "__main__":
    check_corrupted_images(DATA_DIR)
    
    train_ds, val_ds, test_ds = load_datasets(DATA_DIR)
    train_loader, val_loader, test_loader = create_dataloaders(train_ds, val_ds, test_ds)

    print("✅ Preprocessing complete.")
    print(f"Train samples: {len(train_ds)}, Val: {len(val_ds)}, Test: {len(test_ds)}")


🔍 Checking for corrupted images...
📦 Loading datasets...
🚚 Creating dataloaders...
✅ Preprocessing complete.
Train samples: 5216, Val: 16, Test: 624


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm

# ===========================
# Load Pretrained DenseNet121
# ===========================
def get_model():
    model = models.densenet121(pretrained=True)
    num_features = model.classifier.in_features
    model.classifier = nn.Linear(num_features, 2)  # 2 classes: Normal, Pneumonia
    return model

# ===========================
# Training Function
# ===========================
def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss, correct = 0, 0

    for images, labels in tqdm(dataloader, desc="Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, 1)
        correct += (preds == labels).sum().item()

    accuracy = 100 * correct / len(dataloader.dataset)
    avg_loss = total_loss / len(dataloader)
    return avg_loss, accuracy

# ===========================
# Validation Function
# ===========================
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss, correct = 0, 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = torch.argmax(outputs, 1)
            correct += (preds == labels).sum().item()

    accuracy = 100 * correct / len(dataloader.dataset)
    avg_loss = total_loss / len(dataloader)
    return avg_loss, accuracy

# ===========================
# Main Training Loop
# ===========================
def train_model(model, train_loader, val_loader, device, epochs=10, lr=1e-4):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = evaluate(model, val_loader, criterion, device)

        print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%")
        print(f"Val   Loss: {val_loss:.4f}, Accuracy: {val_acc:.2f}%")

    return model

# ===========================
# Example Entry Point
# ===========================
if __name__ == "__main__":
    # from pneumonia_preprocessing import train_loader, val_loader  # assumes same file structure

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = get_model()

    trained_model = train_model(model, train_loader, val_loader, device, epochs=10)

    # Save model
    torch.save(trained_model.state_dict(), "densenet_pneumonia.pth")
    print("✅ Model saved as densenet_pneumonia.pth")




Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /home/amar/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth


100%|██████████| 30.8M/30.8M [00:03<00:00, 10.2MB/s]



Epoch 1/10


Training: 100%|██████████| 163/163 [11:36<00:00,  4.28s/it]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.06it/s]


Train Loss: 0.1108, Accuracy: 96.09%
Val   Loss: 0.8385, Accuracy: 62.50%

Epoch 2/10


Training: 100%|██████████| 163/163 [14:02<00:00,  5.17s/it]
Validation: 100%|██████████| 1/1 [00:01<00:00,  1.08s/it]


Train Loss: 0.0519, Accuracy: 97.99%
Val   Loss: 0.0787, Accuracy: 100.00%

Epoch 3/10


Training: 100%|██████████| 163/163 [14:01<00:00,  5.16s/it]
Validation: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


Train Loss: 0.0430, Accuracy: 98.47%
Val   Loss: 0.3391, Accuracy: 75.00%

Epoch 4/10


Training: 100%|██████████| 163/163 [13:52<00:00,  5.11s/it]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.09it/s]


Train Loss: 0.0300, Accuracy: 99.06%
Val   Loss: 0.5057, Accuracy: 75.00%

Epoch 5/10


Training: 100%|██████████| 163/163 [13:25<00:00,  4.94s/it]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


Train Loss: 0.0302, Accuracy: 98.87%
Val   Loss: 0.5690, Accuracy: 62.50%

Epoch 6/10


Training: 100%|██████████| 163/163 [13:38<00:00,  5.02s/it]
Validation: 100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


Train Loss: 0.0210, Accuracy: 99.23%
Val   Loss: 0.0197, Accuracy: 100.00%

Epoch 7/10


Training: 100%|██████████| 163/163 [16:01<00:00,  5.90s/it]
Validation: 100%|██████████| 1/1 [00:01<00:00,  1.20s/it]


Train Loss: 0.0155, Accuracy: 99.50%
Val   Loss: 0.1347, Accuracy: 87.50%

Epoch 8/10


Training: 100%|██████████| 163/163 [15:11<00:00,  5.59s/it]
Validation: 100%|██████████| 1/1 [00:01<00:00,  1.06s/it]


Train Loss: 0.0234, Accuracy: 99.25%
Val   Loss: 0.1273, Accuracy: 93.75%

Epoch 9/10


Training:  42%|████▏     | 68/163 [05:50<08:14,  5.20s/it]