In [1]:
!pip install imutils



In [2]:
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from IPython.display import FileLink
from imutils import paths
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.random.seed(0)

import random
import shutil
import cv2
import os

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!unzip "/content/gdrive/My Drive/dataset.zip"

In [None]:
!unzip "/content/gdrive/My Drive/Chest_xray_dataset.zip"

In [12]:
dataset_path = './Covid_dataset'
input_path = './dataset/Covid dataset'

In [7]:
%%bash
rm -rf Covid_dataset
mkdir -p Covid_dataset/covid
mkdir -p Covid_dataset/normal

In [13]:
samples = 25

In [14]:
# construct the path to the metadata CSV file and load it
csvPath = os.path.sep.join([input_path, "metadata.csv"])
df = pd.read_csv(csvPath)

In [15]:
# loop over the rows of the COVID-19 data frame
for (i, row) in df.iterrows():
    # if (1) the current case is not COVID-19 or (2) this is not
    # a 'PA' view, then ignore the row
    if row["finding"] != "COVID-19" or row["view"] != "PA":
        continue

    # build the path to the input image file
    imagePath = os.path.sep.join([input_path, "images", row["filename"]])

    # if the input image file does not exist (there are some errors in
    # the COVID-19 metadeta file), ignore the row
    if not os.path.exists(imagePath):
        continue

    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = row["filename"].split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/covid", filename])

    # copy the image
    shutil.copy2(imagePath, outputPath)

In [16]:
covid_images = list(paths.list_images(f"{dataset_path}/covid"))
covid_images

['./Covid_dataset/covid/88de9d8c39e946abd495b37cd07d89e5-2ee6-0.jpg',
 './Covid_dataset/covid/covid-19-pneumonia-22-day1-pa.png',
 './Covid_dataset/covid/F63AB6CE-1968-4154-A70F-913AF154F53D.jpeg',
 './Covid_dataset/covid/extubation-8.jpg',
 './Covid_dataset/covid/paving.jpg',
 './Covid_dataset/covid/auntminnie-b-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg',
 './Covid_dataset/covid/F4341CE7-73C9-45C6-99C8-8567A5484B63.jpeg',
 './Covid_dataset/covid/kjr-21-e25-g001-l-a.jpg',
 './Covid_dataset/covid/6C94A287-C059-46A0-8600-AFB95F4727B7.jpeg',
 './Covid_dataset/covid/1-s2.0-S1684118220300682-main.pdf-003-b2.png',
 './Covid_dataset/covid/covid-19-pneumonia-35-1.jpg',
 './Covid_dataset/covid/16663_1_1.jpg',
 './Covid_dataset/covid/4-x-day1.jpg',
 './Covid_dataset/covid/auntminnie-d-2020_01_28_23_51_6665_2020_01_28_Vietnam_coronavirus.jpeg',
 './Covid_dataset/covid/covid-19-pneumonia-34.png',
 './Covid_dataset/covid/1.CXRCTThoraximagesofCOVID-19fromSingapore.pdf-001-fig2b.png',

In [35]:
normal_dataset_path ='./Chest_xray_dataset'

In [36]:
basePath = os.path.sep.join([normal_dataset_path, "train", "NORMAL"])
imagePaths = list(paths.list_images(basePath))

# randomly sample the image paths
random.seed(42)
random.shuffle(imagePaths)
imagePaths = imagePaths[:samples]

# loop over the image paths
for (i, imagePath) in enumerate(imagePaths):
    # extract the filename from the image path and then construct the
    # path to the copied image file
    filename = imagePath.split(os.path.sep)[-1]
    outputPath = os.path.sep.join([f"{dataset_path}/normal", filename])

    # copy the image

    shutil.copy2(imagePath, outputPath)



In [19]:
df.finding.unique()

array(['COVID-19', 'ARDS', 'SARS', 'Pneumocystis', 'Streptococcus',
       'No Finding', 'Chlamydophila', 'E.Coli', 'COVID-19, ARDS',
       'Klebsiella', 'Legionella'], dtype=object)

In [20]:
df1 = df[df['finding'] == 'COVID-19, ARDS']

In [21]:
df1.head()

Unnamed: 0,patientid,offset,sex,age,finding,survival,intubated,intubation_present,went_icu,in_icu,needed_supplemental_O2,extubated,temperature,pO2_saturation,leukocyte_count,neutrophil_count,lymphocyte_count,view,modality,date,location,folder,filename,doi,url,license,clinical_notes,other_notes,Unnamed: 28
183,96,0.0,M,60.0,"COVID-19, ARDS",,,,Y,,,,,89.0,,,,PA,X-ray,2020,Spain,images,covid-19-pneumonia-rapidly-progressive-admissi...,,https://radiopaedia.org/cases/covid-19-pneumon...,CC BY-NC-SA,Fever and odynophagia. Trip to Italy 7 days ag...,"Case courtesy of Dr Edgar Lorente, Radiopaedia...",
184,96,1.0,M,60.0,"COVID-19, ARDS",,,,Y,,,,,89.0,,,,PA,X-ray,2020,Spain,images,covid-19-pneumonia-rapidly-progressive-12-hour...,,https://radiopaedia.org/cases/covid-19-pneumon...,CC BY-NC-SA,Fever and odynophagia. Trip to Italy 7 days ag...,"Case courtesy of Dr Edgar Lorente, Radiopaedia...",
185,96,3.0,M,60.0,"COVID-19, ARDS",,,,Y,,,,,89.0,,,,PA,X-ray,2020,Spain,images,covid-19-pneumonia-rapidly-progressive-3-days.jpg,,https://radiopaedia.org/cases/covid-19-pneumon...,CC BY-NC-SA,Fever and odynophagia. Trip to Italy 7 days ag...,"Case courtesy of Dr Edgar Lorente, Radiopaedia...",
186,97,0.0,F,70.0,"COVID-19, ARDS",,Y,N,,,,,38.0,85.0,,,,PA,X-ray,2020,Spain,images,covid-19-rapidly-progressive-acute-respiratory...,,https://radiopaedia.org/cases/covid-19-rapidly...,CC BY-NC-SA,"Admitted with acute respiratory failure, fever...","Case courtesy of Dr Edgar Lorente, Radiopaedia...",
187,97,1.0,F,70.0,"COVID-19, ARDS",,Y,Y,,,,,,,,,,PA,X-ray,2020,Spain,images,covid-19-rapidly-progressive-acute-respiratory...,,https://radiopaedia.org/cases/covid-19-rapidly...,CC BY-NC-SA,"Admitted with acute respiratory failure, fever...","Case courtesy of Dr Edgar Lorente, Radiopaedia...",


In [22]:
df1.shape

(12, 29)

In [37]:
normal_images = list(paths.list_images(f"{dataset_path}/normal"))
covid_images = list(paths.list_images(f"{dataset_path}/covid"))


In [None]:
plots_from_files(normal_images, rows=5, maintitle="Normal X-ray images")

NameError: ignored

In [38]:
# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images

print("[INFO] loading images...")
imagePaths = list(paths.list_images(dataset_path))

data = []
labels = []

# loop over the image paths
for imagePath in imagePaths:
    # extract the class label from the filename
    label = imagePath.split(os.path.sep)[-2]
    # load the image, swap color channels, and resize it to be a fixed
    # 224x224 pixels while ignoring aspect ratio
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (224, 224))
    # update the data and labels lists, respectively
    image_np = (image).flatten()
    data.append(image_np)
    labels.append(label)
    
# convert the data and labels to NumPy arrays while scaling the pixel
# intensities to the range [0, 1]
data_train = np.array(data) / 255
labels_train = np.array(labels)

[INFO] loading images...


In [58]:
# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels_train_y = lb.fit_transform(labels_train)
labels_train_cat = to_categorical(labels_train_y)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing

(X_train, X_test, y_train, y_test) = train_test_split(data, labels, test_size=0.20, stratify=labels, random_state=42)

In [40]:
X_train

[array([  0,   0,   0, ..., 153, 153, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([149, 149, 149, ...,  33,  33, 255], dtype=uint8),
 array([ 46,  46,  46, ..., 111, 111, 255], dtype=uint8),
 array([  6,   6,   6, ...,   6,   6, 255], dtype=uint8),
 array([  0,   0,   0, ...,  11,  11, 255], dtype=uint8),
 array([  1,   1,   1, ...,   1,   1, 255], dtype=uint8),
 array([221, 221, 221, ..., 217, 217, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([133, 129, 128, ...,  33,  35, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([ 59,  56,  58, ...,  56,  58, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([  9,   9,   9, ..., 162, 162, 255], dtype=uint8),
 array([ 26,  25,  29, ...,  37,  37, 255], dtype=uint8),
 array([104, 104, 104, ...,  46,  46, 255], dtype=uint8),
 array([ 95,  95,  95, ..., 120, 120, 255], dtype=uint8),
 array([177, 1

In [59]:
X_test

[array([ 45,  45,  45, ...,   0,   0, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([ 47,  47,  47, ...,   0,   0, 255], dtype=uint8),
 array([ 89,  46,   6, ...,  48,  44, 255], dtype=uint8),
 array([  1,   1,   1, ...,  53,  54, 255], dtype=uint8),
 array([ 79,  79,  79, ...,   3,   3, 255], dtype=uint8),
 array([ 90,  90,  90, ..., 114, 114, 255], dtype=uint8),
 array([ 50,  41,  32, ...,  44,  34, 255], dtype=uint8),
 array([  0,   0,   0, ...,   0,   0, 255], dtype=uint8),
 array([ 87,  87,  87, ...,   1,   1, 255], dtype=uint8),
 array([ 15,  15,  15, ...,  39,  39, 255], dtype=uint8),
 array([179, 179, 179, ..., 122, 122, 255], dtype=uint8),
 array([ 77,  77,  77, ..., 133, 133, 255], dtype=uint8),
 array([ 86,  86,  86, ...,   0,   0, 255], dtype=uint8),
 array([133, 133, 133, ..., 254, 254, 255], dtype=uint8),
 array([ 12,  12,  12, ...,  39,  39, 255], dtype=uint8),
 array([  9,   4,   5, ..., 248, 244, 255], dtype=uint8),
 array([ 17,  

In [41]:
y_train

['covid',
 'normal',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'normal',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covi

In [67]:
y_test

['normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid',
 'normal',
 'covid',
 'covid',
 'covid']

In [42]:
np.unique(y_train)

array(['covid', 'normal'], dtype='<U6')

In [43]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import calibration_curve



# Create classifiers
lr = LogisticRegression(max_iter=10000)
gnb = GaussianNB()
svc = LinearSVC(C=1.0, max_iter=10000)
rfc = RandomForestClassifier()

In [44]:
LR_Model = lr.fit(X_train, y_train)

GNB_Model = gnb.fit(X_train, y_train)

SVC_Model = svc.fit(X_train, y_train)

RFC_Model = rfc.fit(X_train, y_train)



In [45]:


import pickle

with open('LR_model.pkl', 'wb') as f1:
    pickle.dump(LR_Model,f1)
    f1.close()

with open('GNB_model.pkl', 'wb') as f2:
    pickle.dump(GNB_Model,f2)
    f2.close()

with open('SVM_model.pkl', 'wb') as f3:
    pickle.dump(SVC_Model,f3)
    f3.close()

with open('RF_model.pkl', 'wb') as f4:
    pickle.dump(RFC_Model,f4)
    f4.close()


    



In [46]:
from IPython.display import FileLink

FileLink(r'LR_model.pkl')

In [47]:
FileLink(r'GNB_model.pkl')

In [48]:
FileLink(r'SVM_model.pkl')

In [49]:
FileLink(r'RF_model.pkl')

In [50]:


from sklearn.metrics import accuracy_score


LR_y_pred = LR_Model.predict(X_test)
GNB_y_pred = GNB_Model.predict(X_test)
SVC_y_pred = SVC_Model.predict(X_test)
RFC_y_pred = RFC_Model.predict(X_test)


# accuracy_score(y_true, y_pred)



In [51]:
LR_acc_sc = accuracy_score(y_test, LR_y_pred)
LR_acc_sc

1.0

In [52]:
GNB_acc_sc = accuracy_score(y_test, GNB_y_pred)
GNB_acc_sc

0.9411764705882353

In [53]:
SVC_acc_sc = accuracy_score(y_test, SVC_y_pred)
SVC_acc_sc

1.0

In [54]:
RFC_acc_sc = accuracy_score(y_test, RFC_y_pred)
RFC_acc_sc

0.9411764705882353

In [55]:
print('LogisticRegression score: %f' % LR_Model.score(X_test, y_test))
print('Naive Bayes score: %f' % GNB_Model.score(X_test, y_test))
print('SVM score: %f' % SVC_Model.score(X_test, y_test))
print('Random Forest score: %f' % RFC_Model.score(X_test, y_test))

LogisticRegression score: 1.000000
Naive Bayes score: 0.941176
SVM score: 1.000000
Random Forest score: 0.941176


In [68]:
test_image_path ="./Covid_dataset"
image = cv2.imread(imagePath)
image = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
image = cv2.resize(image, (224, 224))
# update the data and labels lists, respectively
image_np = (image).flatten()
test_img=[]
test_img_label=[]
test_img.append(image_np)
test_img_label.append('Covid')

In [69]:

LR_y_pred = LR_Model.predict(test_img)
GNB_y_pred = GNB_Model.predict(test_img)
SVC_y_pred = SVC_Model.predict(test_img)
RFC_y_pred = RFC_Model.predict(test_img)


In [70]:
LR_y_pred

array(['covid'], dtype='<U6')

In [71]:
GNB_y_pred

array(['covid'], dtype='<U6')

In [72]:
SVC_y_pred

array(['covid'], dtype='<U6')

In [73]:
RFC_y_pred

array(['normal'], dtype='<U6')