# Importing libraries

In [None]:
import tensorflow as tf
from keras.applications import VGG16
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
import keras
import os
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import glob

In [None]:
main_path = "../input/chest-xray-pneumonia/chest_xray/"

In [None]:
train_path = os.path.join(main_path,"train")
test_path=os.path.join(main_path,"test")
val_path=os.path.join(main_path,"val")

In [None]:
pneumonia_train_images = glob.glob(train_path+"/PNEUMONIA/*.jpeg")
normal_train_images = glob.glob(train_path+"/NORMAL/*.jpeg")

In [None]:
data = pd.DataFrame(np.concatenate([[0]*len(normal_train_images) , [1] *  len(pneumonia_train_images)]),columns=["class"])

In [None]:
sns.countplot(data['class'],data=data)

### Clear imbalance between normal and pneumonia class

# Defining Image Generators
1. Using **ImageDataGenerator** , to augment our images so as to create a larger dataset for our model to train on.
2. Also to deal with the imbalanced number of data points of the given classes

In [None]:
# define the type of augmentation techniques we will apply.
train_Datagen = ImageDataGenerator(
    rescale =1/255,
    rotation_range = 30,
    shear_range=10,
    zoom_range = 0.2,
    horizontal_flip = True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    fill_mode = 'nearest',
)
val_datagen = ImageDataGenerator(
    rescale = 1/255
)

# Defining model
* Using pre-trained VGG-16 model.
* Freezing all the layers of model to train.
* Adding Squeeze and Excite layers to the model at the top end.
* Using RMSProp as optimiser
* Also  using learning rate decay to optimize the learning process.

**Squeeze and Excite function**

In [None]:
import keras.backend as K

def SqueezeAndExcite(inputs, ratio=16):
    """
    input: a tensor
    Apply squeeze and excitation using avg. pooling and sigmoid activation.
    returns: a tensor
    """
    print("Squeeze and Excite layer.")
    b, h, w, c = inputs.shape
    print(b, h, w, c)
    #Squeeze
    x = tf.keras.layers.GlobalAveragePooling2D()(inputs)
    
    #Excitation
    x = tf.keras.layers.Dense(c//ratio, activation = 'relu', use_bias=False)(x)
    x = tf.keras.layers.Dense(c, activation = 'sigmoid', use_bias=False)(x)
    
    #Scaling
    x = tf.reshape(x, [16, 1, 1, 512])
    x = inputs*x
    
    return x

In [None]:
conv_base = VGG16(include_top = False,weights='imagenet')
for i in conv_base.layers:
    i.trainable=False
X = conv_base.output
X = keras.layers.GlobalAveragePooling2D()(X)
X = keras.layers.Dense(128,activation='relu')(X)
X = keras.layers.Dense(64,activation='relu')(X)
predictions = keras.layers.Dense(1,activation='sigmoid')(X)
model= keras.Model(conv_base.input,predictions)
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)
model.compile(loss='binary_crossentropy',
             optimizer=tf.keras.optimizers.RMSprop(lr_schedule),
             metrics=['accuracy'])

In [None]:
model.summary()

### Creating augmented data from the above defined image data generators

In [None]:
train_generator=train_Datagen.flow_from_directory(
    train_path,
    target_size=(150,150),
    batch_size= 16,
    class_mode='binary'
)
validation_generator = val_datagen.flow_from_directory(
        val_path,
        target_size=(150,150),
        batch_size=16,
        class_mode='binary'
)
test_generator = val_datagen.flow_from_directory(
    test_path,
    target_size=(150,150),
    batch_size=16,
    class_mode='binary'
)

# Callback

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('val_binary_accuracy')>=0.98):
            print("98% accuracy reached.")
            self.model.stop_training = True

# Fitting the model

In [None]:
callbacks = myCallback()

In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch = 20,
    epochs=100,
    validation_data = validation_generator,
)

In [None]:
accuracy=model.evaluate_generator(test_generator,624)[1]

In [None]:
accuracy

# Unfreezing all the layers of the model


In [None]:
conv_base = VGG16(include_top = False,weights='imagenet')
for i in conv_base.layers:
    i.trainable=True
X = conv_base.output
X = SqueezeAndExcite(X)
X = keras.layers.GlobalAveragePooling2D()(X)
X = keras.layers.Dense(128,activation='relu')(X)
X = keras.layers.Dense(64,activation='relu')(X)
predictions = keras.layers.Dense(1,activation='sigmoid')(X)
model= keras.Model(conv_base.input,predictions)

# Finetuning the model
Again training the model with slower learning rate

In [None]:
initial_learning_rate = 1e-5
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr_schedule),  # Very slow learning rate
              loss=keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=[keras.metrics.BinaryAccuracy()])

In [None]:
new_model = model.fit(
    train_generator,
    steps_per_epoch = 50,
    epochs=100,
    validation_data = validation_generator,
    callbacks = [callbacks]
)

In [None]:
final_accuracy = model.evaluate_generator(test_generator,624)[1]

In [None]:
final_accuracy

# Model Metrics

In [None]:
accuracy = history.history['accuracy']
val_accuracy  = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
updated_accuracy = new_model.history['binary_accuracy']
updated_val_accuracy  = new_model.history['val_binary_accuracy']

updated_loss = new_model.history['loss']
updated_val_loss = new_model.history['val_loss']

# Visualizing the train and validation accuracy and loss

In [None]:
plt.figure(figsize=(15,10))

plt.subplot(2, 2, 1)
plt.plot(accuracy, label = "Training accuracy")
plt.plot(val_accuracy, label="Validation accuracy")
plt.legend()
plt.title("Training vs validation accuracy")


plt.subplot(2,2,2)
plt.plot(loss, label = "Training loss")
plt.plot(val_loss, label="Validation loss")

plt.legend()
plt.title("Training vs validation loss")

plt.show()

In [None]:
plt.figure(figsize=(15,10))

plt.subplot(2, 2, 1)
plt.plot(updated_accuracy, label = "Updated Training accuracy")
plt.plot(updated_val_accuracy, label="Updated Validation accuracy")
plt.legend()
plt.title("Updated Training vs validation accuracy")


plt.subplot(2,2,2)
plt.plot(updated_loss, label = "Updated Training loss")
plt.plot(updated_val_loss, label="Updated Validation loss")

plt.legend()
plt.title("Updated Training vs validation loss")

plt.show()