# Binary Classification: COVID Detection in X-Rays with PyTorch

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, random_split, DataLoader

import torchvision.transforms as transforms
from torchvision import utils

from torchsummary import summary

import os
from PIL import Image, ImageDraw
import copy
from tqdm import tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
toPIL = transforms.Compose([
            transforms.ToPILImage()
        ])

In [4]:
class CovidDatasetTrain(Dataset):
    def __init__(self):
        train_dir = "xray_dataset_covid19/train/"
        
        train_normal_dir = train_dir + "NORMAL/"
        train_pneumonia_dir = train_dir + "PNEUMONIA/"
        
        train_normal_fnames = os.listdir(train_normal_dir)
        train_pneumonia_fnames = os.listdir(train_pneumonia_dir)
        
        self.train_dataset = [[train_normal_dir + image, 0] for image in train_normal_fnames]
        self.train_dataset = self.train_dataset + [[train_pneumonia_dir + image, 1] for image in train_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.train_dataset))
    
    def __getitem__(self, idx):
        data = self.train_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, data[1], idx)

In [5]:
class CovidDatasetTest(Dataset):
    def __init__(self):
        test_dir = "xray_dataset_covid19/test/"
        
        test_normal_dir = test_dir + "NORMAL/"
        test_pneumonia_dir = test_dir + "PNEUMONIA/"
        
        test_normal_fnames = os.listdir(test_normal_dir)
        test_pneumonia_fnames = os.listdir(test_pneumonia_dir)
        
        self.test_dataset = [[test_normal_dir + image, 0] for image in test_normal_fnames]
        self.test_dataset = self.test_dataset + [[test_pneumonia_dir + image, 1] for image in test_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.test_dataset))
    
    def __getitem__(self, idx):
        data = self.test_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, data[1], idx)

In [6]:
train_df = CovidDatasetTrain()
covid_trainloader = DataLoader(train_df, batch_size=2, shuffle=True)

In [7]:
test_df = CovidDatasetTest()
covid_testloader = DataLoader(test_df, batch_size=2, shuffle=True)

In [8]:
for batch_idx, (data, label, _) in enumerate(covid_trainloader):
    print(data.size())
    label = label.unsqueeze(1)
    print(label)
    print(label.size())
    break 
print(len(train_df))

torch.Size([2, 1, 1024, 1024])
tensor([[1],
        [1]])
torch.Size([2, 1])
148


In [9]:
for batch_idx, (data, label, _) in enumerate(covid_testloader):
    print(data.size())
    label = label.unsqueeze(1)
    print(label)
    print(label.size())
    break 
print(len(test_df))

torch.Size([2, 1, 1024, 1024])
tensor([[1],
        [1]])
torch.Size([2, 1])
40


In [10]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=2)
        
        self.num_flatten = 128 * 3 * 3
        
        self.fc1 = nn.Linear(self.num_flatten, 100)
        self.fc2 = nn.Linear(100, 1)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
    
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = x.view(-1, self.num_flatten)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, 0.3)
        x = self.fc2(x)
        
        return F.sigmoid(x)

In [11]:
classifier = CNN()
classifier

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
  (fc1): Linear(in_features=1152, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [12]:
classifier.to(device)

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
  (fc1): Linear(in_features=1152, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [13]:
summary(classifier, input_size=(1, 1024, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 511, 511]             160
            Conv2d-2         [-1, 32, 127, 127]           4,640
            Conv2d-3           [-1, 64, 31, 31]          18,496
            Conv2d-4            [-1, 128, 7, 7]          73,856
            Linear-5                  [-1, 100]         115,300
            Linear-6                    [-1, 1]             101
Total params: 212,553
Trainable params: 212,553
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 4.00
Forward/backward pass size (MB): 36.33
Params size (MB): 0.81
Estimated Total Size (MB): 41.14
----------------------------------------------------------------




In [14]:
loss_function = nn.BCELoss()

In [15]:
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

In [16]:
def train(model, n_epoches, loss_hist):    
    
    for epoch in range(1, n_epoches+1):
        model.train()
    
        epoch_train_loss = 0
        epoch_test_loss = 0
        
        y_true_train = []
        y_pred_train = []
        
        y_true_test = []
        y_pred_test = []
        
        for batch_idx, (data, label, _) in enumerate(covid_trainloader):
            data = data.to(device)
            label = label.type(torch.FloatTensor).to(device)
        
            preds = model(data)
            
            loss = loss_function(preds, label.unsqueeze(1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            y_pred_train.extend(preds.squeeze(1).detach().round().tolist())
            y_true_train.extend(label.detach().round().tolist())
            
            epoch_train_loss += loss.item()
            
        with torch.no_grad():
            model.eval()
                
            for batch_idx, (data, label, _) in enumerate(covid_testloader):
                data = data.to(device)
                label = label.type(torch.FloatTensor).to(device)
                    
                preds = model(data)
                
                val_loss = loss_function(preds, label.unsqueeze(1))
                
                y_pred_test.extend(preds.squeeze(1).detach().round().tolist())
                y_true_test.extend(label.detach().round().tolist())
                
                epoch_test_loss += val_loss.item()
        
        #print(y_pred_test, "<>", y_true_test)
        
        epoch_train_loss = epoch_train_loss / len(covid_trainloader.dataset)
        epoch_test_loss = epoch_test_loss / len(covid_testloader.dataset)
        
        loss_hist["train loss"].append(epoch_train_loss)
        loss_hist["test loss"].append(epoch_test_loss)
        
        print("-------------------------------------------------")
        print("Epoch: {} Train mean loss: {:.8f}".format(epoch, epoch_train_loss))
        print("       {} Test  mean loss: {:.8f}".format(epoch, epoch_test_loss))
        print("       Train Accuracy: ", len([True for x, y in zip(y_pred_train, y_true_train) if x==y])/len(y_pred_train), "==", len([True for x, y in zip(y_pred_train, y_true_train) if x==y]), "/", len(y_pred_train))
        print("       Test Accuracy: ", len([True for x, y in zip(y_pred_test, y_true_test) if x==y])/len(y_pred_test), "==", len([True for x, y in zip(y_pred_test, y_true_test) if x==y]), "/", len(y_pred_test))
        print("-------------------------------------------------")
    return loss_hist

In [17]:
loss_hist = {}
loss_hist["train loss"] = []
loss_hist["test loss"] = []

NUM_EPOCHES = 15

loss_hist = train(classifier, NUM_EPOCHES, loss_hist)
loss_hist

-------------------------------------------------
Epoch: 1 Train mean loss: 0.34738674
       1 Test  mean loss: 0.34716849
       Train Accuracy:  0.5202702702702703 == 77 / 148
       Test Accuracy:  0.5 == 20 / 40
-------------------------------------------------
-------------------------------------------------
Epoch: 2 Train mean loss: 0.34499240
       2 Test  mean loss: 0.34474061
       Train Accuracy:  0.5540540540540541 == 82 / 148
       Test Accuracy:  0.45 == 18 / 40
-------------------------------------------------
-------------------------------------------------
Epoch: 3 Train mean loss: 0.33977035
       3 Test  mean loss: 0.34762985
       Train Accuracy:  0.6216216216216216 == 92 / 148
       Test Accuracy:  0.55 == 22 / 40
-------------------------------------------------
-------------------------------------------------
Epoch: 4 Train mean loss: 0.25426642
       4 Test  mean loss: 0.16357259
       Train Accuracy:  0.7702702702702703 == 114 / 148
       Test Accur

{'train loss': [0.3473867447795095,
  0.34499240203483683,
  0.3397703543808815,
  0.25426642141128714,
  0.2146799931856426,
  0.14587319618393038,
  0.1478664415850732,
  0.12812695568464808,
  0.09978445764077273,
  0.09095804646180163,
  0.0735988136256052,
  0.09509608777866438,
  0.07205312608594623,
  0.06466420438369529,
  0.04433572535934808],
 'test loss': [0.34716848731040956,
  0.3447406068444252,
  0.34762985333800317,
  0.16357258693315088,
  0.16802298189140857,
  0.1532883032137761,
  0.14773751349421219,
  0.13206294309784425,
  0.11995710904648149,
  0.1760583763199975,
  0.14573571643209107,
  0.23996709922212175,
  0.0508628816336568,
  0.10165619306062582,
  0.15764522823228616]}