In [1]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import PIL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

We will download flowers dataset from google website and store it locally. In below call it downloads the zip file (.tgz) in cache_dir which is . meaning the current folder

<h3 style='color:purple'>Load  dataset</h3>

In [2]:
# dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
#data_dir = tf.keras.utils.get_file('COVID-19_Radiography_Dataset', origin=dataset_url,  cache_dir='.', untar=True)
# cache_dir indicates where to download data. I specified . which means current directory
# untar true will unzip it

In [3]:
# data_dir

In [4]:
import pathlib
data_dir = pathlib.Path('./small_dataset/flower_photos/')
data_dir

WindowsPath('small_dataset/flower_photos')

In [5]:
# list(data_dir.glob('*/*.png'))[:5]

In [6]:
# image_count = len(list(data_dir.glob('*/*.png')))
# print(image_count)

In [7]:
# COVID = list(data_dir.glob('COVID/*'))
# COVID[:5]

In [8]:
# PIL.Image.open(str(COVID[4]))

In [9]:
# Lung_Opacity = list(data_dir.glob('Lung_Opacity/*'))
# PIL.Image.open(str(Lung_Opacity[3]))

<h3 style='color:purple'>Read flowers images from disk into numpy array using opencv</h3>

In [10]:
flowers_images_dict = {
    'COVID': list(data_dir.glob('COVID/*')),
    'Lung_Opacity': list(data_dir.glob('Lung_Opacity/*')),
    'Normal': list(data_dir.glob('Normal/*')),
    'Viral_Pneumonia': list(data_dir.glob('Viral_Pneumonia/*')),
    
}

In [11]:
flowers_labels_dict = {
    'COVID': 0,
    'Lung_Opacity': 1,
    'Normal': 2,
    'Viral_Pneumonia': 3,
    
}

In [12]:
flowers_images_dict['COVID'][:5]

[WindowsPath('small_dataset/flower_photos/COVID/COVID-1.png'),
 WindowsPath('small_dataset/flower_photos/COVID/COVID-10.png'),
 WindowsPath('small_dataset/flower_photos/COVID/COVID-100.png'),
 WindowsPath('small_dataset/flower_photos/COVID/COVID-1000.png'),
 WindowsPath('small_dataset/flower_photos/COVID/COVID-1001.png')]

In [13]:
str(flowers_images_dict['COVID'][0])

'small_dataset\\flower_photos\\COVID\\COVID-1.png'

In [14]:
img = cv2.imread(str(flowers_images_dict['COVID'][0]))

In [15]:
img.shape

(299, 299, 3)

In [16]:
cv2.resize(img,(180,180)).shape

(180, 180, 3)

In [17]:
X, y = [], []

for flower_name, images in flowers_images_dict.items():
    for image in images:
        img = cv2.imread(str(image))
        #resized_img = cv2.resize(img,(180,180))
        X.append(img)
        y.append(flowers_labels_dict[flower_name])

In [18]:
X = np.array(X)
y = np.array(y)

<h3 style='color:purple'>Train test split</h3>

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

<h3 style='color:purple'>Preprocessing: scale images</h3>

In [None]:
X_train_scaled = X_train / 255


In [None]:
import time
time.sleep(15)


In [None]:
X_test_scaled = X_test / 255

<h3 style='color:purple'>Build convolutional neural network and train it</h3>

In [None]:
# num_classes = 5

# model = Sequential([
#   layers.Conv2D(16, 3, padding='same', activation='relu'),
#   layers.MaxPooling2D(),
#   layers.Conv2D(32, 3, padding='same', activation='relu'),
#   layers.MaxPooling2D(),
#   layers.Conv2D(64, 3, padding='same', activation='relu'),
#   layers.MaxPooling2D(),
#   layers.Flatten(),
#   layers.Dense(128, activation='relu'),
#   layers.Dense(num_classes)
# ])

# model.compile(optimizer='adam',
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#               metrics=['accuracy'])
              
# model.fit(X_train_scaled, y_train, epochs=30)              

In [None]:
# model.evaluate(X_test_scaled,y_test)

**Here we see that while train accuracy is very high (99%), the test accuracy is significantly low (66.99%) indicating overfitting. Let's make some predictions before we use data augmentation to address overfitting**

In [None]:
# score = tf.nn.softmax(predictions[0])

In [None]:
# np.argmax(score)

In [None]:
# y_test[0]

<h3 style='color:purple'>Improve Test Accuracy Using Data Augmentation</h3>

In [None]:
data_augmentation = keras.Sequential(
  [
    layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(300, 
                                                              300,
                                                              3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1),
  ]
)

**Original Image**

In [None]:
# plt.axis('off')
# plt.imshow(X[0])

**Newly generated training sample using data augmentation**

In [None]:
# plt.axis('off')
# plt.imshow(data_augmentation(X)[0].numpy().astype("uint8"))

<h3 style='color:purple'>Train the model using data augmentation and a drop out layer</h3>

In [None]:
num_classes = 5

model = Sequential([
  data_augmentation,
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
              
model.fit(X_train_scaled, y_train, epochs=10)   

In [None]:
model.evaluate(X_test_scaled,y_test)

In [None]:
predictions = model.predict(X_test_scaled)
predictions