# Imporing Libraries

In [None]:
import numpy as np 
import pandas as pd
import os
import random
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.plotting import plot_confusion_matrix
import keras
from keras import layers
from keras import models
import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation,  AveragePooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import tensorflow as tf
import datetime

my_devices = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices= my_devices, device_type='GPU')

seed = 240
np.random.seed(seed)
tf.random.set_seed(seed)



In [None]:
import tensorflow as tf
from tensorflow.python.client import device_lib

if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")
    # print(device_lib.list_local_devices())

# Pulling the Data From GitHub


In [None]:
!git clone https://github.com/hrmoradi/Workshop_data # goes to your cotent folder

# Import the data

In [None]:
# Take all file names in different directory
base_directory     = '/content/Workshop_data/chest_xray/'

train_pne_dir = os.listdir(base_directory +'/train/PNEUMONIA/')
train_nor_dir = os.listdir(base_directory +'/train/NORMAL/')

val_pne_dir  = os.listdir(base_directory +'/val/PNEUMONIA/')
val_nor_dir  = os.listdir(base_directory +'/val/NORMAL/')

test_pne_dir = os.listdir(base_directory +'/test/PNEUMONIA/')
test_nor_dir = os.listdir(base_directory +'/test/NORMAL/')

## Check the size and pixel ranges of pictures

In [None]:
from PIL import Image
# ! pip install imageio
import imageio

for i in range(0,2):
    pic= train_nor_dir[i]
    pic = imageio.imread(base_directory+'/train/NORMAL/' + pic)
    plt.figure(figsize = (2,2))
    plt.imshow(pic, cmap='gray')
    print()
    print('Type of the image : ' , type(pic))
    print('Shape of the image : {}'.format(pic.shape))
    print('Dimension of Image {}'.format(pic.ndim))
    print('dtype: ', pic.dtype)
    print('Maximum RGB value in this image {}'.format(pic.max()))
    print('Minimum RGB value in this image {}'.format(pic.min()))

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
n_rows = 3
n_cols = 10
plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)
        pic= train_nor_dir[index]
        pic = imageio.imread(base_directory+'/train/NORMAL/' + pic)
        plt.imshow(pic, cmap='gray')
        plt.axis('off')
plt.subplots_adjust(wspace=0.2, hspace=0.5)
# plt.save_fig('ChestImages')
plt.show()

# Import and preprocessing the data

In [None]:
from keras.preprocessing.image import ImageDataGenerator

color_mode="grayscale" # "rgb"
img_dims = 224
batch_size = 32

train_gen = ImageDataGenerator(rescale=1/255.0, zoom_range=0.3, vertical_flip=True, validation_split=0.2)
val_gen = ImageDataGenerator(rescale=1/255.0)
test_gen = ImageDataGenerator(rescale=1/255.0)

train_generator = train_gen.flow_from_directory(
    directory= base_directory+"/train/",
    target_size=(img_dims, img_dims),
    # color_mode = color_mode,
    batch_size=batch_size,
    class_mode="binary",  
    shuffle=True,
    subset='training')

valid_generator = train_gen.flow_from_directory(
    directory= base_directory+"/val/",
    target_size=(img_dims, img_dims),
    # color_mode = color_mode,
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True, 
    subset='validation')
  
test_generator = test_gen.flow_from_directory(
    directory=base_directory+"/test/",
    target_size=(img_dims, img_dims),
    # color_mode= color_mode,
    batch_size=batch_size,  
    class_mode='binary',  
    shuffle=False)


In [None]:
from collections import Counter

train_counter = list(Counter(train_generator.classes).values())
val_counter   = list(Counter(valid_generator.classes).values())
test_counter  = list(Counter(test_generator.classes).values())

summa_dat = pd.DataFrame([train_counter, val_counter, test_counter], columns=('normal', 'pnuemonia'), index = ['train', 'validation', 'test'])
print('Frequency of the normal+infected: \n', summa_dat)
summa_dat.plot(kind='bar', title='Frequency of the normal+infected')
plt.xticks(rotation=0, horizontalalignment="center")

# A basic CNN model

### Creating a basic model using Sequential API

In [None]:
model_1 = keras.Sequential([
   Conv2D(64, 3, input_shape=(img_dims,img_dims, 3), activation='relu'),
   Conv2D(32, 3, activation='relu'),
   MaxPool2D(pool_size=(2,2)),  
   Dropout(0.5),    
   Flatten(),
   Dense(128, activation='relu'),  
   Dense(1, activation='sigmoid')
])
model_1.summary()

In [None]:
keras.utils.plot_model(model_1, "basic_model_1.png", show_shapes=True)

## Compile the model

In [None]:
model_1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

## Training and Evaluating the Model

In [None]:
epochs = 7
model1_hist = model_1.fit(x=train_generator,
                    steps_per_epoch=train_generator.n//batch_size,
                    validation_data=valid_generator,
                    validation_steps=valid_generator.n //batch_size,
                    epochs=epochs,
                    verbose = 1)

In [None]:
def draw_hist_f(hist):
    
    pd.DataFrame(hist.history).plot(figsize=(8, 5))
    plt.title('Learning curve of the model')
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.9))
    plt.show()


In [None]:
draw_hist_f(model1_hist)

### Evaluate the model on the test set


In [None]:
model_1_test = model_1.evaluate(test_generator, steps = test_generator.n//batch_size) # 2 more calls: .predict    .predict_proba. What are the diff?
print(model_1.metrics_names)
print(model_1_test)
print("Accuracy = ",model_1_test[1])

In [None]:
# Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=8)

# Transfer Learning

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/keras/applications
from tensorflow.keras import applications as app

# img_input = tf.keras.layers.Input(shape=(img_dims, img_dims,1))
# img_conc = tf.keras.layers.Concatenate()([img_input, img_input, img_input])  

# tl_model = app.ResNet152V2(input_shape=None, include_top=False, weights='imagenet', input_tensor=img_conc, pooling='avg') # no support for depth 1
tl_model = app.ResNet50V2(input_shape=(224,224,3), include_top=False, weights='imagenet') # no support for depth 1
# tl_model = app.DenseNet201(input_shape=None,include_top=False, weights='imagenet',input_tensor=img_conc, pooling='avg') # no support for depth 1
# tl_model = app.EfficientNetB7(input_shape=None,include_top=False, weights='imagenet',input_tensor=img_conc, pooling='avg')

### training full model

In [None]:
x = tl_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128,activation='relu')(x)
output = tf.keras.layers.Dense(1,activation='sigmoid')(x)

tl_full_model = Model(inputs = tl_model.input, outputs = output)
# tl_full_model.summary()
tl_full_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
tl_full_model_hist = tl_full_model.fit(x=train_generator,
                    steps_per_epoch=train_generator.n//batch_size,
                    validation_data=valid_generator,
                    validation_steps=valid_generator.n //batch_size,
                    epochs=7,
                    callbacks=[early_stopping,lr],
                    verbose = 1)

tl_full_model = tl_full_model.evaluate(test_generator, steps = test_generator.n//batch_size) # 2 more calls: .predict    .predict_proba. What are the diff?
print("Accuracy = ",tl_full_model[1])

### Training Head

In [None]:
tl_freez_model = app.ResNet50V2(input_shape=(224,224,3), include_top=False, weights='imagenet') 

# Make loaded layers as non-trainable
for layer in tl_freez_model.layers:
    layer.trainable = False

x = tl_freez_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128,activation='relu')(x)
output = tf.keras.layers.Dense(1,activation='sigmoid')(x)

# Create model object
tl_freez_full_model = Model(inputs = tl_freez_model.input, outputs = output)
# tl_freez_full_model.summary()
tl_freez_full_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
tl_freez_full_model_hist = tl_freez_full_model.fit(x=train_generator,
                    steps_per_epoch=train_generator.n//batch_size,
                    validation_data=valid_generator,
                    validation_steps=valid_generator.n //batch_size,
                    epochs=7,
                    callbacks=[early_stopping,lr],
                    verbose = 1)

tl_freez_full_model = tl_freez_full_model.evaluate(test_generator, steps = test_generator.n//batch_size) # 2 more calls: .predict    .predict_proba. What are the diff?
print("Accuracy = ",tl_freez_full_model[1])


### training head + some conv layers

In [None]:
tl_freez_model = app.ResNet50V2(input_shape=(224,224,3), include_top=False, weights='imagenet') 

# Make loaded layers as non-trainable
for layer in tl_freez_model.layers:
  if layer.name == "conv4_block1_1_conv":
    print("found conv4_block1_1_conv")
    break
  layer.trainable = False

x = tl_freez_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(128,activation='relu')(x)
output = tf.keras.layers.Dense(1,activation='sigmoid')(x)

# Create model object
tl_freez_full_model = Model(inputs = tl_freez_model.input, outputs = output)
# tl_freez_full_model.summary()
tl_freez_full_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
tl_freez_full_model_hist = tl_freez_full_model.fit(x=train_generator,
                    steps_per_epoch=train_generator.n//batch_size,
                    validation_data=valid_generator,
                    validation_steps=valid_generator.n //batch_size,
                    epochs=7,
                    callbacks=[early_stopping,lr],
                    verbose = 1)

tl_freez_full_model = tl_freez_full_model.evaluate(test_generator, steps = test_generator.n//batch_size) # 2 more calls: .predict    .predict_proba. What are the diff?
print("Accuracy = ",tl_freez_full_model[1])
