<a href="https://colab.research.google.com/github/hector6298/Deep-Learning-Collab-notebooks/blob/master/InceptionResNetV2_covid_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive, files
drive.mount('/content/drive')

In [0]:
!pip3 install pydicom
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip3 install --upgrade kaggle

In [0]:
!mkdir /root/.kaggle
!echo '{"username":"hector6298","key":"724778e3045b27ede8002c9f01b9da72"}' > /root/.kaggle/kaggle.json

In [0]:
!git clone https://github.com/ieee8023/covid-chestxray-dataset.git
!git clone https://github.com/agchung/Figure1-COVID-chestxray-dataset
!git clone https://github.com/agchung/Actualmed-COVID-chestxray-dataset
!kaggle datasets download -d "tawsifurrahman/covid19-radiography-database"
!kaggle competitions download -c "rsna-pneumonia-detection-challenge" 

In [0]:
!unzip rsna-pneumonia-detection-challenge.zip
!rm rsna-pneumonia-detection-challenge.zip
!unzip covid19-radiography-database.zip
!rm covid19-radiography-database.zip
!mkdir data
!mkdir data/train
!mkdir data/test
!mkdir /content/logs

In [0]:
!python3 "/content/drive/My Drive/COVID-Net-master/COVID-Net-master/create_covidx_v3.py"

In [0]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from skimage.exposure import equalize_adapthist
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
from sklearn.metrics import average_precision_score
from tensorflow.python.keras.callbacks import Callback
from sklearn.metrics import recall_score, classification_report
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [0]:
def _process_csv_file(file):
    with open(file, 'r') as fr:
        files = fr.readlines()
    return files

In [0]:
def xray_enhance(img):
  #DENOISE
  median = cv2.medianBlur(img, 3)
  #SHARP EDGES
  gaussian_3 = cv2.GaussianBlur(median, (3,3), 10.0)
  unsharp_image = cv2.addWeighted(median, 1.5, gaussian_3, -0.5, 0, median)
  #Contrast Limited Adaptive Histogram Equalization
  cl1 = equalize_adapthist(unsharp_image)
  return cl1

In [0]:
from tensorflow import keras

import numpy as np
import os
import cv2

from tensorflow.keras.preprocessing.image import ImageDataGenerator

def crop_top(img, percent=0.15):
    offset = int(img.shape[0] * percent)
    return img[offset:]


def process_image_file(filepath, top_percent, size):
    img = cv2.imread(filepath)
    img = crop_top(img, percent=top_percent)
    img = cv2.resize(img, (size, size))
    return img



_augmentation_transform = ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
)

def apply_augmentation(img):
    img = _augmentation_transform.random_transform(img)
    return img

def _process_csv_file(file):
    with open(file, 'r') as fr:
        files = fr.readlines()
    return files


class BalanceCovidDataset(keras.utils.Sequence):
    'Generates data for Keras'

    def __init__(
            self,
            data_dir,
            csv_file,
            is_training=True,
            batch_size=8,
            input_shape=(448, 448),
            n_classes=3,
            num_channels=3,
            mapping={
                'normal': 0,
                'pneumonia': 1,
                'COVID-19': 2
            },
            shuffle=True,
            augmentation=apply_augmentation,
            covid_percent=0.3,
            class_weights=[1., 1., 6.],
            top_percent=0.08
    ):
        'Initialization'
        self.datadir = data_dir
        self.dataset = _process_csv_file(csv_file)
        self.is_training = is_training
        self.batch_size = batch_size
        self.N = len(self.dataset)
        self.input_shape = input_shape
        self.n_classes = n_classes
        self.num_channels = num_channels
        self.mapping = mapping
        self.shuffle = True
        self.covid_percent = covid_percent
        self.class_weights = class_weights
        self.n = 0
        self.augmentation = augmentation
        self.top_percent = top_percent

        datasets = {'normal': [], 'pneumonia': [], 'COVID-19': []}
        for l in self.dataset:
            datasets[l.split()[2]].append(l)
        self.datasets = [
            datasets['normal'] + datasets['pneumonia'],
            datasets['COVID-19'],
        ]
        print(len(self.datasets[0]), len(self.datasets[1]))

        self.on_epoch_end()

    def __next__(self):
        # Get one batch of data
        batch_x, batch_y, weights = self.__getitem__(self.n)
        # Batch index
        self.n += 1

        # If we have processed the entire dataset then
        if self.n >= self.__len__():
            self.on_epoch_end
            self.n = 0

        return batch_x, batch_y, weights

    def __len__(self):
        return int(np.ceil(len(self.datasets[0]) / float(self.batch_size)))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == True:
            for v in self.datasets:
                np.random.shuffle(v)

    def __getitem__(self, idx):
        batch_x, batch_y = np.zeros(
            (self.batch_size, *self.input_shape,
             self.num_channels)), np.zeros(self.batch_size)

        batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) *
                                       self.batch_size]

        # upsample covid cases
        covid_size = max(int(len(batch_files) * self.covid_percent), 1)
        covid_inds = np.random.choice(np.arange(len(batch_files)),
                                      size=covid_size,
                                      replace=False)
        covid_files = np.random.choice(self.datasets[1],
                                       size=covid_size,
                                       replace=False)
        for i in range(covid_size):
            batch_files[covid_inds[i]] = covid_files[i]

        for i in range(len(batch_files)):
            sample = batch_files[i].split()

            if self.is_training:
                folder = 'train'
            else:
                folder = 'test'

            x = process_image_file(os.path.join(self.datadir, folder, sample[1]),
                                   self.top_percent,
                                   self.input_shape[0])

            if self.is_training and hasattr(self, 'augmentation'):
                x = self.augmentation(x)

            x = x.astype('float32') / 255.0
            y = self.mapping[sample[2]]

            batch_x[i] = x
            batch_y[i] = y

        class_weights = self.class_weights
        weights = np.take(class_weights, batch_y.astype('int64'))

        return batch_x, keras.utils.to_categorical(batch_y, num_classes=self.n_classes), weights

In [0]:
EPOCHS = 30
LEARNING_RATE = 0.001
BATCH_SIZE = 64
INPUT_SIZE = 224
NCLASSES = 3
INPUT_SHAPE = (INPUT_SIZE, INPUT_SIZE, NCLASSES)

TRAINFILE = '/content/train_split_v3.txt'
TESTFILE = '/content/test_split_v3.txt'
DATADIR = '/content/data'

In [0]:
with open(TRAINFILE) as f:
    trainfiles = f.readlines()
with open(TESTFILE) as f:
    testfiles = f.readlines()

train_generator = BalanceCovidDataset(data_dir=DATADIR,
                                csv_file=TRAINFILE,
                                batch_size=BATCH_SIZE,
                                input_shape=(INPUT_SIZE, INPUT_SIZE),
                                covid_percent=0.3,
                                class_weights=[1., 1., 4.],
                                top_percent=0.08)

test_generator = BalanceCovidDataset(data_dir=DATADIR,
                                csv_file=TESTFILE,
                                batch_size=BATCH_SIZE,
                                is_training=False,
                                input_shape=(INPUT_SIZE, INPUT_SIZE),
                                covid_percent=0.3,
                                class_weights=[1., 1., 4.],
                                top_percent=0.08)
                                


In [0]:
train_flag = False

base_model = tf.keras.applications.InceptionResNetV2(
    include_top=False, weights='imagenet', input_shape=INPUT_SHAPE,
)
base_model.trainable = False


In [0]:
def get_model(base_model, input_shape):
  inputs = tf.keras.layers.Input(shape=input_shape)
  global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
  prediction_layer = tf.keras.layers.Dense(NCLASSES, activation='softmax')

  x = inputs
  x = base_model(x)
  x = global_average_layer(x)
  x = prediction_layer(x)

  return tf.keras.Model(inputs=inputs, outputs=x)

In [0]:
METRICS = [
  tf.keras.metrics.CategoricalAccuracy(),

  keras.metrics.Precision(name='precision_0', class_id=0),
  keras.metrics.Recall(name='recall_0', class_id=0),
  keras.metrics.AUC(name='auc_all', multi_label=True, label_weights=[1.,1.,3.]),

  keras.metrics.Precision(name='precision_1', class_id=1),
  keras.metrics.Recall(name='recall_1', class_id=1),

  keras.metrics.Precision(name='precision_2', class_id=2),
  keras.metrics.Recall(name='recall_2', class_id=2),
]


In [0]:
metrics = METRICS
optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
loss_object = tf.keras.losses.CategoricalCrossentropy()

model = get_model(base_model, INPUT_SHAPE)

model.compile(optimizer=optimizer,
              loss=loss_object,
              metrics = metrics)

In [0]:
tf.keras.utils.plot_model(
    model,
    to_file="model.png",
    show_shapes=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
)


In [0]:
def scheduler(epoch):
  if epoch <= 5:
    return 0.005
  elif epoch > 5 and epoch <= 20:
    return 0.001
  else:
    return 0.001 * tf.math.exp(0.1 * (10 - epoch))
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="/content/logs")

In [0]:
model_history = model.fit(x=train_generator, 
                epochs=EPOCHS,
                validation_data=test_generator,
                verbose=1,
                callbacks=[scheduler_callback, tensorboard_callback]
                )

In [0]:


def plot_metrics(model_history):

  loss = model_history.history['loss']
  val_loss = model_history.history['val_loss']
  acc = model_history.history['sparse_categorical_accuracy']
  val_acc = model_history.history['val_sparse_categorical_accuracy']
  epochs = range(EPOCHS)

  fig, axs = plt.subplots(2)
  axs[0].plot(epochs, loss, 'r', label='Training loss')
  axs[0].plot(epochs, val_loss, 'bo', label='Validation loss')


  axs[1].plot(epochs, acc, 'r', label='training accuracy')
  axs[1].plot(epochs, val_acc, 'bo', label="vall acc")


  plt.legend()

  
  plt.show()



In [0]:
plot_metrics(model_history)

In [0]:
@tf.function
def test_step(test_images, test_labels):
  return model(test_images)

In [0]:
y_pred = []
y_true = []

for j in range(len(test_generator)):

    test_images, test_labels, weigths = next(test_generator)
    #print(test_labels)
    
    #imgs += len(test_images)
    predictions = test_step(test_images, test_labels)
    predictions_classnum = np.argmax(predictions, axis=1)
    test_labels_ = np.argmax(test_labels, axis=1)
    confidences = np.amax(predictions)

    for item in range(BATCH_SIZE):
        y_pred.append(predictions_classnum[item])
        y_true.append(int(test_labels_[item]))

In [0]:
target_names = ['Normal', 'Pneumonia', 'COVID-19']
print(classification_report(y_true, y_pred, target_names=target_names))


#cm = confusion_matrix(y_true, y_pred)
#print(cm)
#cmn = confusion_matrix(y_true, y_pred, normalize='all')


#df_cm = pd.DataFrame(cm, index = ['normal','pneumonia','covid-19'],
        #          columns = ['normal','pneumonia','covid-19'])

#print(df_cm)

In [0]:
json_config = model.to_json()
with open('model_config.json', 'w') as json_file:
    json_file.write(json_config)
model.save_weights('covid_net_weights2.h5')
files.download('covid_net_weights2.h5')
files.download('model_config.json')

In [0]:
!zip /content/logs.zip /content/logs

In [0]:
files.download("/content/logs.zip")