In [2]:
import numpy as np 
import pandas as pd
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from imutils import paths
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import argparse
import cv2
import os
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:
# Uncomment the below codes for downloading the data

#!gdown https://drive.google.com/open?id=1odxJF4kyHEtBqhkvz3iXpV3iQK34m6z0
#!unzip covid_data_compiled_sagar.zip

In [4]:
## Global variables
data_list = os.listdir('multi_class/train')
DATASET_PATH  = 'multi_class/train'
test_dir =  'multi_class/test'
IMAGE_SIZE    = (150, 150)
NUM_CLASSES   = len(data_list)
BATCH_SIZE    = 10  # try reducing batch size or freeze more layers if your GPU runs out of memory
NUM_EPOCHS    = 80
LEARNING_RATE =0.00001


## Detecting covid vs normal vs tertiary pneumonia (Bacterial and viral)


As the number of image classes is too few, we will use image augmentation while loading the images in memory.

In [5]:

def augment(training=True):
    

    # Train Image Augmentation
    train_datagen = ImageDataGenerator(rescale=1./255,
                                       rotation_range=50,
                                       featurewise_center = True,
                                       featurewise_std_normalization = True,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       shear_range=0.25,
                                       zoom_range=0.1,
                                       zca_whitening = True,
                                       channel_shift_range = 20,
                                       horizontal_flip = True ,
                                       vertical_flip = True ,
                                       validation_split = 0.2,
                                       fill_mode='constant')

    
    if training == True:
        

        batches = train_datagen.flow_from_directory(DATASET_PATH,
                                                          target_size=IMAGE_SIZE,
                                                          shuffle=True,
                                                          batch_size=BATCH_SIZE,
                                                          subset = "training",
                                                          seed=42,
                                                          class_mode="categorical"
                                                          )
    else:
        
        batches = train_datagen.flow_from_directory(DATASET_PATH,
                                                  target_size=IMAGE_SIZE,
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE,
                                                  subset = "validation",
                                                  seed=42,
                                                  class_mode="categorical"
                                                  )
        

    return batches

## Creating model architecture and using VGG 16 with Imagenet weights as initial layer.

We will be using Adam as the optimizer and categorical crossentropy as the loss function.

In [6]:
def model_transfer_learning(learning_rate = LEARNING_RATE):
    
    base_model = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))
    base_model.trainable = False
    model = models.Sequential()
    model.add(base_model)
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    model.compile(loss='categorical_crossentropy',

                  optimizer=Adam(lr=LEARNING_RATE),
                  metrics=['acc'])
    
    print(model.summary())
    
    return model

## Train the model

In [7]:
def fit_model(model, train_batches, valid_batches):
    #FIT MODEL
    print(len(train_batches))
    print(len(valid_batches))

    STEP_SIZE_TRAIN=train_batches.n//train_batches.batch_size
    STEP_SIZE_VALID=valid_batches.n//valid_batches.batch_size

    result=model.fit_generator(train_batches,
                            steps_per_epoch =STEP_SIZE_TRAIN,
                            validation_data = valid_batches,
                            validation_steps = STEP_SIZE_VALID,
                            epochs= NUM_EPOCHS,                        
                           )
    
    return result

In [8]:
def plot_results(result, epochs = 80):

    acc = result.history['acc']
    loss = result.history['loss']
    val_acc = result.history['val_acc']
    val_loss = result.history['val_loss']
    plt.figure(figsize=(15, 5))
    plt.subplot(121)
    plt.plot(range(1,epochs), acc[1:], label='Train_acc')
    plt.plot(range(1,epochs), val_acc[1:], label='Test_acc')
    plt.title('Accuracy over ' + str(epochs) + ' Epochs', size=15)
    plt.legend()
    plt.grid(True)
    plt.subplot(122)
    plt.plot(range(1,epochs), loss[1:], label='Train_loss')
    plt.plot(range(1,epochs), val_loss[1:], label='Test_loss')
    plt.title('Loss over ' + str(epochs) + ' Epochs', size=15)
    plt.legend()
    plt.grid(True)
    plt.show()
    

## Plot some predictions and probability.

In [9]:
def check_test_data_and_image_map(model, test_dir=test_dir, target_size=IMAGE_SIZE, batch_size = BATCH_SIZE):
    
    test_datagen = ImageDataGenerator(rescale=1. / 255)
    eval_generator = test_datagen.flow_from_directory(
     test_dir,target_size=IMAGE_SIZE,
     batch_size=1,
     shuffle=False,
     seed=42, 
     class_mode='categorical')

    eval_generator.reset()
    x = model.evaluate_generator(eval_generator,
     steps = np.ceil(len(eval_generator) / BATCH_SIZE),
     use_multiprocessing = False,
     verbose = 1,
     workers=1
     )
    print('Test loss:' , x[0])
    print('Test accuracy:',x[1])
    
    eval_generator.reset()  
    pred = model.predict_generator(eval_generator,1000,verbose=1)
    
    for index, probability in enumerate(pred):
        try:
            image_path = test_dir + "/" +eval_generator.filenames[index]
            image = mpimg.imread(image_path)
            #BGR TO RGB conversion using CV2
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            pixels = np.array(image)
            plt.imshow(pixels)

            print(eval_generator.filenames[index])
            if np.argmax(probability) == 0:
                plt.title("%.2f" % (probability[0]*100) + "% COVID19")
            elif np.argmax(probability) == 1:
                plt.title("%.2f" % (probability[1]*100) + "% Normal")
            else:
                plt.title("%.2f" % (probability[2]*100) + "% Tertiary pneumonia")
            plt.show()
        except:
            continue




In [10]:
def main():
    train_batches = augment()
    valid_batches = augment(False)
    model = model_transfer_learning()
    result = fit_model(model, train_batches, valid_batches)
    plot_results(result, epochs = 80)
    check_test_data_and_image_map(model)

In [11]:
if __name__ == '__main__':
    main()



Found 216 images belonging to 4 classes.
Found 54 images belonging to 4 classes.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               2097408   
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 771       
Total params: 16,812,867
Trainable params: 2,098,179
Non-trainable params: 14,714,688
_________________________________________________________________
None
22
6
Epoch 1/80




ValueError: A target array with shape (6, 4) was passed for an output of shape (None, 3) while using as loss `categorical_crossentropy`. This loss expects targets to have the same shape as the output.