# TASK 1 : Import Libraries

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np # numerical analysis
import matplotlib.pyplot as plt # image visualization

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [5]:
tf.test.is_gpu_available

<function tensorflow.python.framework.test_util.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)>

# TASK 2 : Clone & Explore dataset

In [2]:
#clone the dataset from the github repository
! git clone https://github.com/education454/datasets.git

Cloning into 'datasets'...
fatal: unable to access 'https://github.com/education454/datasets.git/': Failed to connect to github.com port 443 after 21358 ms: Timed out


##### We split the training set to 80% as the new training set and 20% as the validation set (validate the model during training)

In [7]:
#set the path to the main dir
import os
main_dir = "datasets/Data/"

#set the path to the train dir
train_dir = os.path.join(main_dir, "train")

#set the path to the test dir
test_dir = os.path.join(main_dir, "test")

#directory with the training covid images
train_covid_dir = os.path.join(train_dir, "COVID19")

#directory with the training normal images
train_normal_dir = os.path.join(train_dir, "NORMAL")

#directory with the testing covid images
test_covid_dir = os.path.join(test_dir, "COVID19")

#directory with the testing normal images
test_normal_dir = os.path.join(test_dir, "NORMAL")

In [8]:
#print the filenames
# Training set
train_covid_names = os.listdir(train_covid_dir)
print(train_covid_names[:10])

train_normal_names = os.listdir(train_normal_dir)
print(train_normal_names[:10])

# Test set
test_covid_names = os.listdir(test_covid_dir)
print(test_covid_names[:10])

test_normal_names = os.listdir(test_normal_dir)
print(test_normal_names[:10])

['COVID-19 (1).jpeg', 'COVID-19 (1).png', 'COVID-19 (10).jpeg', 'COVID-19 (101).jpg', 'COVID-19 (107).jpg', 'COVID-19 (11).jpeg', 'COVID-19 (11).jpg', 'COVID-19 (12).jpeg', 'COVID-19 (12).jpg', 'COVID-19 (13).jpeg']
['NORMAL(0).jpg', 'NORMAL(1).jpg', 'NORMAL(100).jpg', 'NORMAL(1000).jpg', 'NORMAL(1002).jpg', 'NORMAL(1005).jpg', 'NORMAL(1006).jpg', 'NORMAL(1007).jpg', 'NORMAL(1008).jpg', 'NORMAL(1009).jpg']
['COVID-19 (313).jpg', 'COVID-19 (353).jpg', 'COVID-19 (371).jpg', 'COVID-19 (425).jpg', 'COVID-19 (439).jpg', 'COVID-19 (457).jpg', 'COVID-19 (461).jpg', 'COVID-19 (475).jpg', 'COVID-19 (479).jpg', 'COVID-19 (485).jpg']
['NORMAL(10).jpg', 'NORMAL(1001).jpg', 'NORMAL(1003).jpg', 'NORMAL(1004).jpg', 'NORMAL(1011).jpg', 'NORMAL(1012).jpg', 'NORMAL(1015).jpg', 'NORMAL(1017).jpg', 'NORMAL(1020).jpg', 'NORMAL(1022).jpg']


In [9]:
#print the total no of images present in each dir
print("Total images in training set:", len(train_covid_names) + len(train_normal_names))

print("Total images in testing set:", len(test_covid_names) + len(test_normal_names))

Total images in training set: 1811
Total images in testing set: 484


# TASK 3 : Data Visualization

In [11]:
# plot a grid of 16 images (8 images of Covid19 and 8 images of Normal)
import matplotlib.image as mpimg

#set the number of columns and rows
rows = 4
cols = 4

#set the figure size
fig = plt.gcf() # gcf = get current figure
fig.set_size_inches(12, 12)

#get the filenames from the covid & normal dir of the train dataset
covid_pic = [os.path.join(train_covid_dir, filename) for filename in train_covid_names[0:8]]
normal_pic = [os.path.join(train_normal_dir, filename) for filename in train_normal_names[0:8]]

#print the list
print(covid_pic)
print(normal_pic)

#merge the covid and normal list
merged_list = covid_pic + normal_pic

for i, img_path in enumerate(merged_list):
    data = img_path.split('/', 6)[6]
    sp = plt.subplot(rows, cols, i+1)
    sp.axis('off')
    img = mpimg.imread(img_path)
    sp.set_title(data, fontsize=10)
    plt.imshow(img, cmap="gray")

plt.show()

['datasets/Data/train\\COVID19\\COVID-19 (1).jpeg', 'datasets/Data/train\\COVID19\\COVID-19 (1).png', 'datasets/Data/train\\COVID19\\COVID-19 (10).jpeg', 'datasets/Data/train\\COVID19\\COVID-19 (101).jpg', 'datasets/Data/train\\COVID19\\COVID-19 (107).jpg', 'datasets/Data/train\\COVID19\\COVID-19 (11).jpeg', 'datasets/Data/train\\COVID19\\COVID-19 (11).jpg', 'datasets/Data/train\\COVID19\\COVID-19 (12).jpeg']
['datasets/Data/train\\NORMAL\\NORMAL(0).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(100).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1000).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1002).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1005).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1006).jpg', 'datasets/Data/train\\NORMAL\\NORMAL(1007).jpg']


IndexError: list index out of range

<Figure size 1200x1200 with 0 Axes>

# TASK 4 : Data Preprocessing & Augmentation

In [None]:
# generate training, testing and validation batches (use augmentation for better generalizability)
dgen_train = ImageDataGenerator(rescale=1./255, 
                                validation_split=0.2,
                                zoom_range= 0.2,
                                horizontal_flip = True) #select 20% of train set to be the validation set

dgen_validation = ImageDataGenerator(rescale=1./255)
dgen_test = ImageDataGenerator(rescale=1./255)

# Get the images from the directories
train_generator = dgen_train.flow_from_directory(train_dir, 
                                                 target_size=(150,150),
                                                 subset="training",
                                                 batch_size=32,
                                                 class_mode="binary") 
                      #target size is resizing all images to 150x150, 
                      #if more than 2 classes we have class_mode=categorical

validation_generator = dgen_train.flow_from_directory(train_dir, 
                                                 target_size=(150,150),
                                                 subset="validation",
                                                 batch_size=32,
                                                 class_mode="binary") 

test_generator = dgen_test.flow_from_directory(test_dir, 
                                                 target_size=(150,150),
                                                 batch_size=32,
                                                 class_mode="binary") 


In [None]:
#get the class indices

train_generator.class_indices

In [None]:
#get the image shape
train_generator.image_shape # Images are in RGB type

# TASK 5 : Build Convolutional Neural Network Model

In [None]:
model = Sequential()

# add the convolutional layer
# filters, size of filters,padding,activation_function,input_shape
model.add(Conv2D(32, 
                 (5,5), 
                 padding='SAME', 
                 activation='relu', 
                 input_shape=(150,150,3))
          )

# pooling layer
model.add(MaxPooling2D(pool_size=(2,2)))

# place a dropout layer
model.add(Dropout(0.5))

# add another convolutional layer
model.add(Conv2D(64, 
                 (5,5), 
                 padding='SAME', 
                 activation='relu')
          )

# pooling layer
model.add(MaxPooling2D(pool_size=(2,2)))

# place a dropout layer
model.add(Dropout(0.5))

# Flatten layer
model.add(Flatten())

# add a dense layer : amount of nodes, activation
model.add(Dense(256,activation="relu"))
# place a dropout layer
# 0.5 drop out rate is recommended, half input nodes will be dropped at each update
model.add(Dropout(0.5))
model.add(Dense(1,activation="sigmoid"))
model.summary()

# TASK 6 : Compile & Train the Model

In [None]:
#compile the model

model.compile(Adam(lr=0.001), 
              loss="binary_crossentropy", 
              metrics=['accuracy']) #if there are more than 2 classes, we use categorical_crossentropy

In [None]:
#train the model
history = model.fit(train_generator, 
                    epochs=30,
                    validation_data=validation_generator)

# TASK 7 : Performance Evaluation

In [None]:
#get the keys of history object

history.history.keys()

In [None]:
#plot graph between training and validation loss

plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.legend(["training", "validation"])
plt.title('Training and Validation Losses')
plt.xlabel('Epoch')
plt.ylabel('Loss')

In [None]:
#plot graph between training and validation accuarcy

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.legend(["training", "validation"])
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

In [None]:
# get the test acuarcy and loss (data never seen in the training process)

test_loss, test_acc = model.evaluate(test_generator)
print("test loss:", test_loss)
print("test accuracy:", test_acc)

# TASK 8 : Prediction On New Data

In [None]:
from google.colab import files
from tensorflow.keras.preprocessing import image

uploaded = files.upload()

for filename in uploaded.keys():
  # Preprocess the imagefile
  img_path = '/content/'+filename
  img = image.load_img(img_path, target_size=(150,150))
  images = image.img_to_array(img)
  images = np.expand_dims(images, axis=0)

  # Make prediction
  prediction = model.predict(images)
  print(filename)

  if prediction==0:
    print('COVID detected')

  else:
    print('Normal')