#COVID-19 & INFECTION DETECTION WITH CT-SCAN IMAGES
##- Binary Classification & CNN based Model -

Group Members:

Muneeza Iftikhar (02-136212-012)

Hafsa Hafeez Siddiqui (02-136212-026)

Aqsa Khan (02-136212-039)

# Data Handling


In [2]:
import os
from sklearn.model_selection import train_test_split
import pandas as pd


def define_paths(data_dir):
    filepaths = []
    labels = []

    folds = os.listdir(data_dir)
    for fold in folds:
        foldpath = os.path.join(data_dir, fold)
        filelist = os.listdir(foldpath)
        for file in filelist:
            fpath = os.path.join(foldpath, file)
            filepaths.append(fpath)
            labels.append(fold)

    return filepaths, labels


# Concatenate data paths with labels into one dataframe ( to later be fitted into the model )
def define_df(files, classes):
    Fseries = pd.Series(files, name= 'filepaths')
    Lseries = pd.Series(classes, name='labels')
    return pd.concat([Fseries, Lseries], axis= 1)


# Split dataframe into train, valid, and test
def split_data(data_dir):
    # create train dataframe
    files, classes = define_paths(data_dir)
    df = define_df(files, classes)

    strat = df['labels']
    # split the whole dataset into train and non-train dataframes
    train_df, dummy_df = train_test_split(df, train_size=0.7, shuffle=True, random_state=101, stratify=strat)

    # from the non-train dataset, create validation and test dataframes
    strat = dummy_df['labels']
    validation_df, test_df = train_test_split(dummy_df, train_size=0.5, shuffle=True, random_state=101, stratify=strat)

    return train_df, validation_df, test_df


def data_generators(data_dir, img_size, batch_size, class_mode, color_mode):

    from keras.preprocessing.image import ImageDataGenerator

    train_df, val_df, test_df = split_data(data_dir)

    # model input is taken ONLY from imagedatagenerator
    # initializing the imagedatagenerator class
    training_data_generator = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    fill_mode='nearest'
    )

    testing_data_generator = ImageDataGenerator(
        rescale=1./255      # normalisation of images between 0 and 1 from 0 to 255 pixels
    )

    # initialize training, validation and testing generators
    train_generator = training_data_generator.flow_from_dataframe(
        train_df,
        x_col='filepaths',
        y_col='labels',
        target_size=img_size,
        batch_size=batch_size,
        class_mode=class_mode,
        color_mode=color_mode,
        shuffle=True,
    )

    val_generator = testing_data_generator.flow_from_dataframe(
        val_df,
        x_col='filepaths',
        y_col='labels',
        target_size=img_size,
        batch_size=batch_size,
        class_mode=class_mode,
        color_mode=color_mode,
        shuffle=True,
    )

    test_generator = testing_data_generator.flow_from_dataframe(
        test_df,
        x_col='filepaths',
        y_col='labels',
        target_size=img_size,
        batch_size=batch_size,
        class_mode=class_mode,
        color_mode=color_mode,
        shuffle=False,
    )

    return train_generator, val_generator, test_generator

# Model Definition

In [3]:
from keras.layers import *
from keras.models import *
from keras.optimizers import Adam


# defining Convolutional Neural Network (CNN)
# after initializing the imagedatagenerator

class Classifier:
    def get_model(input_shape):
        model = Sequential() # runs the layers in sequence
        model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
        model.add(Conv2D(64, (3,3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25)) # used to reduce overfitting

        model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))

        model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))

        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(2, activation='softmax'))

        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

        return model


    def VGG_model(input_shape):         # VERY LARGE CNN MODEL ON WHICH WE DID TRANSFER LEARNING
                                                              # pre-trained model
        from keras.applications import VGG16

        num_classes=2

        # VGG_model HAS BEEN TRAINED ON imagenet DATASET
        vgg = VGG16(input_shape=input_shape, weights = 'imagenet', include_top = False)

        for layer in vgg.layers:    # only using feature extractor, every layer will remain the same
            layer.trainable = False

        x = Flatten()(vgg.output)   # using our classification layer
        x = Dense(128, activation = 'relu')(x)
        x = Dense(64, activation = 'relu')(x)
        x = Dense(num_classes, activation = 'softmax')(x)

        model = Model(inputs = vgg.input, outputs = x)

        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

        return model

# Configuration & Training

In [4]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

# setting only relative path to get dataset, not actually getting the dataset
data_dir = '/content/drive/MyDrive/dataset'

# setting only relative path to save / retrieve models
# (because model will not be trained everytime you want to use it)
model_dir = '/content/drive/MyDrive/models'


# configuration parameters for the classifier we created:
# INPUT_SHAPE = (128,128,1)                    # input shape of model
# model = Classifier.get_model(INPUT_SHAPE)
# COLOR_MODE = 'greyscale'                     # read the images as greyscale - 1 channel
# CLASS_MODE = 'categorical'                   # softmax layer at the output of the model
# model_path = '/model.h5'                            # this path will be used to save the model
# MODEL_NAME = 'CLASSY'


# configuration parameters for the VGG model (used for transfer learning):
INPUT_SHAPE = (224,224,3)                       # input shape of model
model = Classifier.VGG_model(INPUT_SHAPE)
COLOR_MODE = 'rgb'                              # read the images as colored - 3 channels because VGG model is trained on colored images even though our dataset images are greyscale
CLASS_MODE = 'categorical'                      # softmax layer at the output of the model
model_path = '/vgg_model.h5'                    # this path will be used to save the model
MODEL_NAME = 'VGG'


# input_shape of model (entry layer) and image size should be the same
# INPUT_SHAPE[0] = 224 , INPUT_SHAPE[1] = 224
img_size = (INPUT_SHAPE[0],INPUT_SHAPE[1])

# approximated by dividing the number of images in the training set for a single iteration
# when all the batches are completed running that is equal to total number of images which is equal to 1 epoch
batch_size = 32

# get ImageDataGenerators.flow_from_dataframe - get training, validation, and testing data generators
train_generator, validation_generator, test_generator = \
    data_generators(data_dir=data_dir, img_size=img_size, batch_size=batch_size, class_mode=CLASS_MODE, color_mode=COLOR_MODE)

# defining model

# Callback functions for monitoring, training, and saving purposes of the fitting of the model:
# checkpoint and early_stopping functions w.r.t val_loss by default

# saves the model if the validation loss is lowest
# verbose: Mode 0 is silent, and mode 1 displays messages when the callback takes an action
checkpoint = ModelCheckpoint(model_dir+model_path, verbose=1, save_best_only=True)

# stops the training if the validation loss does not decrease constantly
# patience: Number of epochs with no improvement after which training will be stopped.
early_stopping = EarlyStopping( patience=8)


##################################### TRAIN THE MODEL #####################################
# train the model using fit_generator
# after defining the CNN model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=25,
    validation_data=validation_generator,
    validation_steps=len(validation_generator),
    callbacks=[checkpoint, early_stopping]
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 1736 validated image filenames belonging to 2 classes.
Found 372 validated image filenames belonging to 2 classes.
Found 373 validated image filenames belonging to 2 classes.
Epoch 1/25
Epoch 1: val_loss improved from inf to 0.60027, saving model to /content/drive/MyDrive/models/vgg_model.h5


  saving_api.save_model(


Epoch 2/25
Epoch 2: val_loss improved from 0.60027 to 0.41059, saving model to /content/drive/MyDrive/models/vgg_model.h5
Epoch 3/25
Epoch 3: val_loss improved from 0.41059 to 0.33458, saving model to /content/drive/MyDrive/models/vgg_model.h5
Epoch 4/25
Epoch 4: val_loss did not improve from 0.33458
Epoch 5/25
Epoch 5: val_loss did not improve from 0.33458
Epoch 6/25
Epoch 6: val_loss did not improve from 0.33458
Epoch 7/25
Epoch 7: val_loss improved from 0.33458 to 0.28484, saving model to /content/drive/MyDrive/models/vgg_model.h5
Epoch 8/25
Epoch 8: val_loss did not improve from 0.28484
Epoch 9/25
Epoch 9: val_loss improved from 0.28484 to 0.26395, saving model to /content/drive/MyDrive/models/vgg_model.h5
Epoch 10/25
Epoch 10: val_loss did not improve from 0.26395
Epoch 11/25
Epoch 11: val_loss did not improve from 0.26395
Epoch 12/25
Epoch 12: val_loss did not improve from 0.26395
Epoch 13/25
Epoch 13: val_loss did not improve from 0.26395
Epoch 14/25
Epoch 14: val_loss did not i

# Testing

In [5]:
import keras
from keras.preprocessing.image import ImageDataGenerator

data_dir = '/content/drive/MyDrive/dataset'
model_dir = '/content/drive/MyDrive/models/vgg_model.h5'

_, test_df, _ = split_data(data_dir)


testing_data_generator = ImageDataGenerator(
        rescale=1./255,
    )

test_generator = testing_data_generator.flow_from_dataframe(
    test_df,
    x_col='filepaths',
    y_col='labels',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
)

model = keras.models.load_model(model_dir)

loss, acc = model.evaluate(test_generator)

print('\n\n')
print('Saved VGG Model, accuracy: {:5.2f}%'.format(100*acc))
print('\n')

Found 372 validated image filenames belonging to 2 classes.



Saved VGG Model, accuracy: 92.20%


