# Detecting COVID-19 with Chest X Ray using PyTorch

Image classification of Chest X Rays in one of three classes: Normal, Viral Pneumonia, COVID-19

Notebook created for the guided project [Detecting COVID-19 with Chest X Ray using PyTorch](https://www.coursera.org/projects/covid-19-detection-x-ray) on Coursera

Dataset from [COVID-19 Radiography Dataset](https://www.kaggle.com/tawsifurrahman/covid19-radiography-database) on Kaggle

In [1]:
%matplotlib inline

import os
import shutil
import random
import torch
import torchvision
import numpy as np

from PIL import Image
from matplotlib import pyplot as plt

torch.manual_seed(0)

print('Using PyTorch version', torch.__version__)

Using PyTorch version 1.13.1


# Preparing Training and Test Sets.

## Cleaning the dataset here

In [15]:
print(os.listdir('COVID-19_Radiography_Dataset'))


['Viral Pneumonia.metadata.xlsx', '.DS_Store', 'test', 'COVID.metadata.xlsx', 'Normal.metadata.xlsx', 'pneumonia', 'normal', 'COVID', 'README.md.txt']


In [14]:
root_dir = 'COVID-19_Radiography_Dataset'
clean_dir = 'Clean_ChestXRay_Dataset'

# Classes you want to include
class_map = {
    'COVID': 'covid',
    'pneumonia': 'pneumonia',
    'normal': 'normal'
}

# Create new clean dataset directory
os.makedirs(clean_dir, exist_ok=True)

for orig_class, new_class in class_map.items():
    # Path to original images folder
    img_dir = os.path.join(root_dir, orig_class, 'images')
    if not os.path.isdir(img_dir):
        print(f"⚠️ Skipping {orig_class} (no images folder found).")
        continue

    # Path to new clean class folder
    clean_class_dir = os.path.join(clean_dir, new_class)
    os.makedirs(clean_class_dir, exist_ok=True)

    # Copy all .png files from images/ to clean dataset
    images = [f for f in os.listdir(img_dir) if f.lower().endswith('.png')]
    for img in images:
        src = os.path.join(img_dir, img)
        dst = os.path.join(clean_class_dir, img)
        shutil.copy(src, dst)

    print(f"✅ Copied {len(images)} images to {clean_class_dir}")

print("🎉 Clean dataset created successfully!")


✅ Copied 3616 images to Clean_ChestXRay_Dataset/covid
✅ Copied 1345 images to Clean_ChestXRay_Dataset/pneumonia
✅ Copied 10192 images to Clean_ChestXRay_Dataset/normal
🎉 Clean dataset created successfully!


# SPLITTING DATA Training and Test

In [16]:
# Source and target directories
source_dir = 'Clean_ChestXRay_Dataset'
target_dir = 'dataset_split'

# Train-test ratio
train_ratio = 0.8

# Create main folders
train_dir = os.path.join(target_dir, 'train')
test_dir = os.path.join(target_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop over each class
for class_name in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    # Create subfolders for each class in train and test
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

    # Get all images
    images = [f for f in os.listdir(class_path) if f.lower().endswith('.png')]
    random.shuffle(images)

    # Split into train and test
    split_idx = int(len(images) * train_ratio)
    train_images = images[:split_idx]
    test_images = images[split_idx:]

    # Copy images into respective folders
    for img in train_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(train_dir, class_name, img))
    for img in test_images:
        shutil.copy(os.path.join(class_path, img), os.path.join(test_dir, class_name, img))

    print(f"✅ {class_name}: {len(train_images)} train, {len(test_images)} test")

print("🎉 Dataset successfully split into training and test sets!")

✅ pneumonia: 1076 train, 269 test
✅ normal: 8153 train, 2039 test
✅ covid: 2892 train, 724 test
🎉 Dataset successfully split into training and test sets!
