In [None]:
import numpy as np


# Preparing train set & test set

In [None]:
import os
import random
import torch
import shutil
from PIL import Image

# seed for RNG(random number generator)->controlling source of randomness
torch.manual_seed(0)

In [None]:
# create train data and test data directories

class_names = ['covid', 'normal', 'viral']
root_dir = 'COVID-19RadiographyDatabase'
source_dirs = ['COVID', 'NORMAL', 'Viral Pneumonia']

if not os.path.exists(os.path.join(root_dir, 'test')):
    os.mkdir(os.path.join(root_dir, 'test'))

if not os.path.exists(os.path.join(root_dir, 'train')):
    os.mkdir(os.path.join(root_dir, 'train'))

for class_name in class_names:
    if not os.path.exists(os.path.join(root_dir, 'test', class_name)):
        os.mkdir(os.path.join(root_dir, 'test', class_name))

In [None]:
# rename the raw directory names to class names

for i, d in enumerate(source_dirs):
    if os.path.exists(os.path.join(root_dir, source_dirs[i])):
        os.rename(os.path.join(root_dir, source_dirs[i]), os.path.join(root_dir, class_names[i]))

# extract 30 images from each class for the test data set

for class_name in class_names:
    images = [x for x in os.listdir(os.path.join(root_dir, class_name)) if x.lower().endswith('png')]
    images = random.sample(images, 30)
    for image in images:
        shutil.move(os.path.join(root_dir, class_name, image), os.path.join(root_dir, 'test', class_name))

    # move remaining class folder into 'train'
    shutil.move(os.path.join(root_dir, class_name), os.path.join(root_dir, 'train', class_name))

# Create Custom Dataset

In [None]:
from torch.utils.data import Dataset

In [None]:
class ChestXRayDataset(Dataset):
    def __init__(self, image_dirs, transform):
        self.image_dirs = image_dirs
        self.transform = transform
        self.images = {}
        self.class_names = ['covid', 'normal', 'viral']
        
        def get_image_list(class_name):
            images = [x for x in os.listdir(image_dirs[class_name]) if x.lower().endswith('png')]
            print(f'Found {len(images)}images of {class_name} examples')
            return images

        for class_name in self.class_names:
            self.images[class_name] = get_image_list(class_name)

    def __len__(self):
        return sum([len(self.images[class_name]) for class_name in self.class_names])
        
    def __getitem__(self, index):
        class_name = random.choice(self.class_names)
        index = index % len(self.images[class_name])
        image_name = self.images[class_name][index]
        image_path = os.path.join(self.image_dirs[class_name], image_name)
        image = Image.open(image_path).convert('RGB')
        return self.transform(image), self.class_names.index(class_name)

# Image Transformation

In [None]:
from torchvision import transforms

In [None]:
# We use the same normalization that was used on the ImageNet data when training the ResNet18 model
image_transforms = {
    'train': transforms.Compose([
        transforms.Resize(size=(224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=256),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
}

# Data Loader

In [None]:
from torch.utils.data import DataLoader

In [None]:
# imade_dirs for eacg data set(train, test)
train_image_dirs = {}
test_image_dirs = {}

for c in class_names:
    if os.path.exists(os.path.join(root_dir, 'train', c)):
        train_image_dirs[c] = os.path.join(root_dir, 'train', c)
    if os.path.exists(os.path.join(root_dir, 'test', c)):
        test_image_dirs[c] = os.path.join(root_dir, 'test', c)

In [None]:
train_dataset = ChestXRayDataset(train_image_dirs, image_transforms['train'])

In [None]:
test_dataset = ChestXRayDataset(test_image_dirs, image_transforms['test'])

In [None]:
# Batch Size
bs = 6

# Create data loaders for each data sets
train_data_loader = DataLoader(data['train'], batch_size=bs, shuffle=True)
test_data_loader = DataLoader(data['test'], batch_size=bs, shuffle=True)

print(f'Num of training batches: {len(train_data_loader)}')
print(f'Num of test batches: {len(test_data_loader)}')

# Data Visualization

In [None]:
% matplotlib inline
from matplotlib import pyplot as plt

In [None]:
class_names = train_dataset.class_names


def show_images(images, labels, preds):
    plt.figure(figsize=(8, 4))
    for i, image in enumerate(images):
        plt.subplot(1, 6, i + 1, xticks=[], yticks=[])
        image = image.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = image * std + mean
        image = np.clip(image, 0., 1.)
        plt.imshow(image)
        col = 'green'
        if preds[i] != labels[i]:
            col = 'red'
            
        plt.xlabel(f'{class_names[int(labels[i].numpy())]}')
        plt.ylabel(f'{class_names[int(preds[i].numpy())]}', color=col)
    plt.tight_layout()
    plt.show()

In [None]:
images, labels = next(iter(dl_train))
show_images(images, labels, labels)

In [None]:
images, labels = next(iter(dl_test))
show_images(images, labels, labels)

# Creating the Model

# Training the Model