<a href="https://colab.research.google.com/github/ahmedhammad97/Chest-X-Ray-Covid-19-Diagnosis/blob/main/Chest_X_Ray_Covid_19_Diagnosis_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import os
import torch.optim as optim
from sklearn import metrics
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from random import shuffle

# Mounting to Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd "/content/drive/My Drive/Chest X-Ray Dataset"

/content/drive/My Drive/Chest X-Ray Dataset


# Dataset Preparation

In [None]:
def read_dataset(folder_path, label):
  paths = [os.path.abspath(folder_path + image_path) for image_path in os.listdir(folder_path)]
  labels = [label] * len(paths)
  return paths, labels

In [None]:
def read_all_datasets():
  covid_paths, covid_labels = read_dataset("COVID/", 1)
  normal_paths, normal_labels = read_dataset("NORMAL/", 0)
  pneumonia_paths, pneumonia_labels = read_dataset("PNEUMONIA/", 0)
  paths = covid_paths + covid_paths + normal_paths + pneumonia_paths
  labels = covid_labels + covid_labels + normal_labels + pneumonia_labels
  zipped_dataset = c = list(zip(paths, labels))
  shuffle(zipped_dataset)
  return zip(*zipped_dataset)

# Dataset Split

In [None]:
full_data, full_labels = read_all_datasets()
train_data, test_data, train_labels, test_labels = train_test_split(full_data, full_labels, test_size=0.2, random_state=1, stratify=full_labels)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.25, random_state=1, stratify=train_labels)

# Dataset Class

In [None]:
class DatasetWrapper(Dataset):
  def __init__(self, array, labels):
    self.array = array
    self.labels = labels

  def __getitem__(self, index):
    image = cv2.imread(self.array[index],)
    image = cv2.resize(image, (128, 128), interpolation=cv2.INTER_AREA)
    image = image.reshape((3, 128, 128))
    label = self.labels[index]
    return torch.tensor(image).float(), torch.tensor(label)

  def __len__(self):
    return len(self.array)

In [None]:
train_dataset = DatasetWrapper(train_data, train_labels)
val_dataset = DatasetWrapper(val_data, val_labels)
test_dataset = DatasetWrapper(test_data, test_labels)

# DataLoaders 

In [None]:
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# CNN Basic Model

In [None]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.conv1 = nn.Conv2d(3, 8, 3)
    self.fc1 = nn.Linear(8*126*126, 32)
    self.fc2 = nn.Linear(32, 2)

  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = x.reshape(-1, 8*126*126)
    x = self.fc1(x)
    x = self.fc2(x)
    return x

# Training Loop

In [None]:
def run_model(model, dataloader, optimizer, is_train=False):
  predictions = []
  epoch_labels = []
  loss_type = nn.CrossEntropyLoss()
  total_loss = 0
  optimizer.zero_grad()

  if is_train:
    model.train()
    for(data, labels) in dataloader:
      output = model(data)
      loss = loss_type(output, labels)
      total_loss += loss.item()
      loss.backward()
      optimizer.step()
      predictions += torch.argmax(output, 1)
      epoch_labels += labels.tolist()
  else:
    with torch.no_grad():
      for(data, labels) in dataloader:
        output = model(data)
        loss = loss_type(output, labels)
        total_loss += loss.item()
        predictions += torch.argmax(output, 1)
        epoch_labels += labels.tolist()

  epoch_loss = total_loss / len(dataloader)
  return predictions, epoch_labels, epoch_loss

In [None]:
model = Model()
epochs = 10
optimizer = optim.Adam(model.parameters(), 0.0005)

# Basic Model Run

In [None]:
for _ in range(epochs):
  t_predictions, t_labels, t_loss = run_model(model, train_dataloader, optimizer, True)
  print("loss: " + str(t_loss))
  print("Accuracy: " + str(metrics.accuracy_score(t_labels, t_predictions)))
  print("F1: " + str(metrics.f1_score(t_labels, t_predictions)))

loss: 59.125044097969884
Accuracy: 0.929177545691906
F1: 0.9250949257852952
loss: 6.589976001998807
Accuracy: 0.9526762402088773
F1: 0.94994822229893
loss: 1.7075662154818982
Accuracy: 0.9751958224543081
F1: 0.9737024221453286
loss: 1.1755083790051586
Accuracy: 0.9797650130548303
F1: 0.9786059351276744
loss: 0.8560887660678492
Accuracy: 0.97911227154047
F1: 0.9778546712802768
loss: 0.6665811296824123
Accuracy: 0.9823759791122716
F1: 0.9813407049067036
loss: 0.5391734711167406
Accuracy: 0.9794386422976501
F1: 0.9781477627471383
loss: 0.41285918255216664
Accuracy: 0.9846605744125326
F1: 0.9837538886968544
loss: 1.3519114600112125
Accuracy: 0.9755221932114883
F1: 0.9740932642487047
loss: 0.5957302245606485
Accuracy: 0.9797650130548303
F1: 0.9785911602209945


In [None]:
s_predictions, s_labels, s_loss = run_model(model, test_dataloader, optimizer)
print("loss: " + str(s_loss))
print("Accuracy: " + str(metrics.accuracy_score(s_labels, s_predictions)))
print("F1: " + str(metrics.f1_score(s_labels, s_predictions)))

loss: 1.94937990677087
Accuracy: 0.9784735812133072
F1: 0.977319587628866


# Resnet 18 Model Run

In [None]:
import torchvision
resnet18_model = torchvision.models.resnet18(pretrained = True)
resnet18_model.fc = nn.Linear(in_features=512, out_features=2)
resnet18_loss_fn = torch.nn.CrossEntropyLoss()
resnet18_optimizer = torch.optim.Adam(resnet18_model.parameters(), lr=3e-5)

In [None]:
for _ in range(epochs):
  res18_t_predictions, res18_t_labels, res18_t_loss = run_model(resnet18_model, train_dataloader, resnet18_optimizer, True)
  print("loss: " + str(res18_t_loss))
  print("Accuracy: " + str(metrics.accuracy_score(res18_t_labels, res18_t_predictions)))
  print("F1: " + str(metrics.f1_score(res18_t_labels, res18_t_predictions, average='micro')))

loss: 0.22874556681199465
Accuracy: 0.9242819843342036
F1: 0.9242819843342036
loss: 0.15145689201744972
Accuracy: 0.9484334203655352
F1: 0.9484334203655354
loss: 0.13675613886637925
Accuracy: 0.9503916449086162
F1: 0.9503916449086162
loss: 0.16090566711742818
Accuracy: 0.9383159268929504
F1: 0.9383159268929504
loss: 0.13091504248586716
Accuracy: 0.9539817232375979
F1: 0.9539817232375979
loss: 0.06575200724849613
Accuracy: 0.9774804177545692
F1: 0.9774804177545692
loss: 0.049780298519976895
Accuracy: 0.983355091383812
F1: 0.983355091383812
loss: 0.043961216237827706
Accuracy: 0.9866187989556136
F1: 0.9866187989556136
loss: 0.04461392418564477
Accuracy: 0.9827023498694517
F1: 0.9827023498694517
loss: 0.057782538057836064
Accuracy: 0.9794386422976501
F1: 0.9794386422976501


In [None]:
res18_s_predictions, res18_s_labels, res18_s_loss = run_model(resnet18_model, test_dataloader, resnet18_optimizer)
print("loss: " + str(res18_s_loss))
print("Accuracy: " + str(metrics.accuracy_score(res18_s_labels, res18_s_predictions)))
print("F1: " + str(metrics.f1_score(res18_s_labels, res18_s_predictions, average='micro')))

loss: 0.11237470885862422
Accuracy: 0.9559686888454012
F1: 0.9559686888454012


# Resnet 50 Model Run

In [None]:
resnet50_model = torchvision.models.resnet50(pretrained = True)
resnet50_model.fc = nn.Linear(in_features=2048, out_features=2)
resnet50_loss_fn = torch.nn.CrossEntropyLoss()
resnet50_optimizer = torch.optim.Adam(resnet50_model.parameters(), lr=0.005)

In [None]:
for _ in range(epochs-3):
  res50_t_predictions, res50_t_labels, res50_t_loss = run_model(resnet50_model, train_dataloader, resnet50_optimizer, True)
  print("loss: " + str(res50_t_loss))
  print("Accuracy: " + str(metrics.accuracy_score(res50_t_labels, res50_t_predictions)))
  print("F1: " + str(metrics.f1_score(res50_t_labels, res50_t_predictions, average='micro')))

loss: 6.1119900693496065
Accuracy: 0.6109660574412533
F1: 0.6109660574412533
loss: 0.8606067275783668
Accuracy: 0.7617493472584856
F1: 0.7617493472584856
loss: 0.6697553082291657
Accuracy: 0.8554177545691906
F1: 0.8554177545691906
loss: 0.5893698179570492
Accuracy: 0.8459530026109661
F1: 0.8459530026109661
loss: 1.4758600917411968
Accuracy: 0.847911227154047
F1: 0.8479112271540469
loss: 0.35548601805930957
Accuracy: 0.8867493472584856
F1: 0.8867493472584856
loss: 0.5596004112351997
Accuracy: 0.7947127937336814
F1: 0.7947127937336814


In [None]:
res50_s_predictions, res50_s_labels, res50_s_loss = run_model(resnet50_model, test_dataloader, resnet50_optimizer)
print("loss: " + str(res50_s_loss))
print("Accuracy: " + str(metrics.accuracy_score(res50_s_labels, res50_s_predictions)))
print("F1: " + str(metrics.f1_score(res50_s_labels, res50_s_predictions, average='micro')))

loss: 2.494884343119338
Accuracy: 0.8454011741682974
F1: 0.8454011741682974
