In [19]:
path_root = '/content/drive/My Drive/Colab Notebooks/DU/data/'
path_augment = path_root + 'augment/'
path_log = path_root + 'log/'
path_model = path_root + 'model'
path_zip = path_root + 'Coronahack-Chest-XRay-Dataset.zip'
path_unzip = path_root + 'Coronahack-Chest-XRay-Dataset/'
path_csv = path_unzip + 'Chest_xray_Corona_Metadata.csv'
path_train = path_unzip + 'Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/'
path_test = path_unzip + 'Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test/'

In [20]:
#!unzip '{path_zip}' -d '{path_unzip}'

In [21]:
import time
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.python.util.deprecation as deprecation
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPool2D
from keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [22]:
%load_ext tensorboard
deprecation._PRINT_DEPRECATION_WARNINGS = False
pd.options.mode.chained_assignment = None

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [23]:
class Dataset():
    def __init__(self, classes):
        self.shape = (224, 224)
        self.classes = classes
        self.class_trans = {}
        for i, c in enumerate(self.classes):
            self.class_trans[c], self.class_trans[i] = i, c


    def load(self):
        df = pd.read_csv(path_csv)#.tail(700).head(100)
        df = df.drop(columns=['Unnamed: 0', 'Label_1_Virus_category', 'Label_2_Virus_category'])
        df = df.rename(columns={'X_ray_image_name':'x', 'Label':'y'})

        train_mask = df['Dataset_type'] == 'TRAIN'
        self.xy_train, self.xy_test = df[train_mask], df[~train_mask]

        self.xy_train['x'] = path_train + self.xy_train['x']
        self.xy_test['x'] = path_test + self.xy_test['x']

        self.y_test = self.xy_test['y'].apply(lambda x: self.class_trans[x])

In [24]:
%%time
dataset = Dataset(['Normal', 'Pnemonia'])
dataset.load()

CPU times: user 18.1 ms, sys: 2 ms, total: 20.1 ms
Wall time: 23.7 ms


In [25]:
class Model():
    def __init__(self, data, epochs=2, batch_size=128, learning_rate=0.01):
        self.data = data
        self.epochs = epochs
        self.batch_size = batch_size
        self.learning_rate = learning_rate


    def build(self):
        self.model = Sequential()
        self.model.add(Conv2D(filters=96, kernel_size=11, strides=4, activation='relu'))
        self.model.add(MaxPool2D(pool_size=3, strides=2))
        self.model.add(Conv2D(filters=256, kernel_size=5, padding='same', activation='relu'))
        self.model.add(MaxPool2D(pool_size=3, strides=2))
        self.model.add(Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'))
        self.model.add(Conv2D(filters=384, kernel_size=3, padding='same', activation='relu'))
        self.model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
        self.model.add(MaxPool2D(pool_size=3, strides=2))
        self.model.add(Flatten())
        self.model.add(Dense(4096, activation='relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(4096, activation='relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(len(self.data.classes) // 2))
        self.model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])


    def fit(self):
        tensorboard_callback = tf.keras.callbacks.TensorBoard(path_log, histogram_freq=1)

        train_flow, test_flow = self._flow(self.data.xy_train), self._flow(self.data.xy_test)

        class_weights = class_weight.compute_class_weight('balanced', self.data.classes, self.data.xy_train['y'])
        class_weights = dict(zip(range(len(self.data.classes)), class_weights))

        self.model.fit(train_flow,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       callbacks=[tensorboard_callback],
                       validation_data=test_flow,
                       class_weight=class_weights)
        

    def predict(self):
        test_flow = self._flow(self.data.xy_test)
        y_pred = self.model.predict_classes(test_flow)
        
        cr = classification_report(self.data.y_test, y_pred, target_names=self.data.classes)
        print(cr)

        cm = confusion_matrix(self.data.y_test, y_pred)
        plt.matshow(cm)
        plt.colorbar()
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.show()


    def save(self):
        self.model.save(path_model)

    
    def board(self):
        %tensorboard --logdir '{path_log}'


    def _flow(self, df):
        gen = ImageDataGenerator(rotation_range=15, zoom_range=0.25)
        return gen.flow_from_dataframe(dataframe=df,
                                       x_col='x',
                                       y_col='y',
                                       target_size=self.data.shape,
                                       color_mode='grayscale',
                                       classes=self.data.classes,
                                       class_mode='binary',
                                       batch_size=self.batch_size,
                                       save_to_dir=path_augment,
                                       save_prefix='aug',
                                       save_format='jpeg')

In [26]:
!rm -rf '{path_augment}' && mkdir '{path_augment}'

In [None]:
%%time
model = Model(dataset)
model.build()
model.fit()
model.predict()
model.save()
model.board()

Found 5286 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.
Epoch 1/2
 8/42 [====>.........................] - ETA: 38:33 - loss: 8.0021 - accuracy: 0.6963