## Import libraries & define constants

In [20]:
from math import sqrt, ceil
import os
from os import listdir
import shutil
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras import applications, optimizers, Model

COLAB = False

# TODO2 Do a better code
if not COLAB:
    DATASET_PATH = "../res/dataset/"
    
    MODEL_CP_DIR = "../res/modelcp/"
    WEIGHT_PATH = {"local": MODEL_CP_DIR+"local/", "remote": MODEL_CP_DIR+"remote/"}
    
    CONFIG = {"load": True, "load_remote": True, "load_model": True}
else:
    MODEL_CP_DIR = ""
    WEIGHT_PATH = {"local": MODEL_CP_DIR+"", "remote": MODEL_CP_DIR+""}
    CONFIG = {"load": False, "load_remote": True, "load_model": True}

PATHS = {"train": DATASET_PATH+"train/", "val": DATASET_PATH+"val/", "test": DATASET_PATH+"test/"}
PATH_LABELS = {"normal": "NORMAL/", "bacteria": "BACTERIA/", "virus": "VIRUS/", "pneumonia": "PNEUMONIA/"}

LABELS = [item.lower() for item in os.listdir(PATHS["train"])]

## Load the data

In [21]:
def separate_pneuomia_classes(dir):
    label_dir = {"pneumonia": dir+PATH_LABELS["pneumonia"], "bacteria": dir+PATH_LABELS["bacteria"], "virus": dir+PATH_LABELS["virus"]}
    
    if os.path.exists(label_dir["bacteria"]) or os.path.exists(label_dir["virus"]):
        return
    else:
        os.mkdir(label_dir["bacteria"])
        os.mkdir(label_dir["virus"])
    
    for f in listdir(label_dir["pneumonia"]):
        shutil.move(label_dir["pneumonia"]+f, label_dir["virus"] if "virus" in f else label_dir["bacteria"])
        
    shutil.rmtree(label_dir["pneumonia"])

separate_pneuomia_classes(PATHS["train"])
separate_pneuomia_classes(PATHS["val"])
separate_pneuomia_classes(PATHS["test"])

BATCH_SIZE = 16 # TODO change?

COLOR_MODE = "grayscale"
CLASS_MODE = "categorical"
RESCALE = 1./255
INPUT_SHAPE = (150, 150, 1)
if tf.keras.backend.image_data_format() == 'channels_first':
    INPUT_SHAPE = (INPUT_SHAPE[-1], INPUT_SHAPE[0], INPUT_SHAPE[1])
INPUT_SHAPE3 = INPUT_SHAPE[0:2]+(3,)
if tf.keras.backend.image_data_format() == 'channels_first':
    INPUT_SHAPE3 = (INPUT_SHAPE3[-1], INPUT_SHAPE3[0], INPUT_SHAPE3[1])
    
POOL_SIZE = (2,2)
KERNEL_SIZE = (3,3)
ACTIVATION_HIDDEN_LAYERS = 'relu'
ACTIVATION_OUTPUT_LAYER = 'softmax'
LOSS = 'categorical_crossentropy'
OPTIMIZER = 'rmsprop'
METRICS = ['accuracy']
EPOCH = 50 # TODO increase

datagen = ImageDataGenerator(rescale=RESCALE)

# Remember: data augmentation only on the train_generator --> Create 2 datagens
train_generator = datagen.flow_from_directory(PATHS["train"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)
val_generator = datagen.flow_from_directory(PATHS["val"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)
test_generator = datagen.flow_from_directory(PATHS["test"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)

image_count = {"train": len(train_generator.filenames), "val": len(val_generator.filenames), "test": len(test_generator.filenames)}
steps_per_epoch = {"train": np.ceil(image_count["train"]/BATCH_SIZE), "val": np.ceil(image_count["val"]/BATCH_SIZE), "test": np.ceil(image_count["test"]/BATCH_SIZE)}

Found 5216 images belonging to 3 classes.
Found 16 images belonging to 3 classes.
Found 624 images belonging to 3 classes.


### Show batch

In [None]:
def show_batch(image_batch, label_batch, size):
  plt.figure(figsize=(10,10))
  rows_cols = ceil(sqrt(size))
  
  for n in range(size):
      ax = plt.subplot(rows_cols,rows_cols,n+1)
      plt.imshow(image_batch[n][:,:,0], cmap="gray")
      plt.title(LABELS[(label_batch[n]==1).tolist().index(True)])
      plt.axis('off')
      
image_batch, label_batch = next(train_generator)
show_batch(image_batch, label_batch, BATCH_SIZE)

## Model

### Simple CNN

In [None]:
model = Sequential()

model.add(Conv2D(32, KERNEL_SIZE, input_shape=INPUT_SHAPE))
model.add(Activation(ACTIVATION_HIDDEN_LAYERS))
model.add(MaxPooling2D(pool_size=POOL_SIZE))

model.add(Conv2D(32, KERNEL_SIZE))
model.add(Activation(ACTIVATION_HIDDEN_LAYERS))
model.add(MaxPooling2D(pool_size=POOL_SIZE))

model.add(Conv2D(64, KERNEL_SIZE))
model.add(Activation(ACTIVATION_HIDDEN_LAYERS))
model.add(MaxPooling2D(pool_size=POOL_SIZE))

model.add(Flatten()) # converts 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation(ACTIVATION_HIDDEN_LAYERS))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation(ACTIVATION_OUTPUT_LAYER))

if CONFIG["load"]:
    if CONFIG["load_remote"]:
        path = WEIGHT_PATH["remote"]
    else:
        path = WEIGHT_PATH["local"]
    
    if CONFIG["load_model"]:    
        model = load_model(path+"model.h5")
    else:
        model.load_weights(path+"weights.h5")
    
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
else:
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
    model.fit_generator(train_generator, validation_data=val_generator, epochs=EPOCH, steps_per_epoch=steps_per_epoch["train"], validation_steps=steps_per_epoch["val"])
    
    model.save_weights(WEIGHT_PATH["local"]+'weights.h5')
    model.save(WEIGHT_PATH["local"]+'model.h5')
    
#predict = model.predict_generator(test_generator, steps = steps_per_epoch["test"])
val_score = model.evaluate_generator(val_generator, steps_per_epoch["val"])
test_score = model.evaluate_generator(test_generator, steps_per_epoch["test"])

print(val_score)
print(test_score)
# rmsprop
#[1.1486241817474365, 0.4375]
#[2.109978681955582, 0.6618589743589743]
# adam
#[3.2800145149230957, 0.625]
#[2.9862829867081766, 0.6907051282051282]

### Transfer learning (VGG16)

#### No fine-tuning

In [29]:
#def gray_to_rgb(img):
#    return np.repeat(img, 3, 2)
    
COLOR_MODE = 'rgb'
WEIGHTS = "imagenet"

#datagen = ImageDataGenerator(rescale=RESCALE, preprocessing_function=gray_to_rgb)
train_generator = datagen.flow_from_directory(PATHS["train"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)
val_generator = datagen.flow_from_directory(PATHS["val"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)
test_generator = datagen.flow_from_directory(PATHS["test"], target_size = INPUT_SHAPE[0:2], batch_size=BATCH_SIZE, color_mode=COLOR_MODE, class_mode=CLASS_MODE)

vgg16 = applications.VGG16(input_shape=INPUT_SHAPE3, include_top=False, weights=WEIGHTS)

for layer in vgg16.layers:
    layer.trainable=False
    
vgg16.summary()

model = Sequential([
    vgg16,
    
    Flatten(),
    Dense(512, activation=ACTIVATION_HIDDEN_LAYERS),
    BatchNormalization(),
    Dropout(0.5),
    
    Dense(64, activation=ACTIVATION_HIDDEN_LAYERS),
    BatchNormalization(),
    Dropout(0.5),
    
    Dense(3, activation=ACTIVATION_OUTPUT_LAYER)
])

model.summary()

# TODO2 Create a function
if CONFIG["load"]:
    if CONFIG["load_remote"]: # TODO1 Test it
        path = WEIGHT_PATH["remote"]
    else:
        path = WEIGHT_PATH["local"]
    
    if CONFIG["load_model"]:    
        model = load_model(path+"model_vgg16_no_fine-tuning.h5")
    else:
        model.load_weights(path+"weights_vgg16_no_fine-tuning.h5")
    
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
else:
    model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
    history = model.fit_generator(train_generator, validation_data=val_generator, epochs=EPOCH, steps_per_epoch=steps_per_epoch["train"], validation_steps=steps_per_epoch["val"])
    
    model.save_weights(WEIGHT_PATH["local"]+'weights_vgg16_no_fine-tuning.h5')
    model.save(WEIGHT_PATH["local"]+'model_vgg16_no_fine-tuning.h5')
    
val_score = model.evaluate_generator(val_generator, steps_per_epoch["val"])
test_score = model.evaluate_generator(test_generator, steps_per_epoch["test"])

print(val_score)
print(test_score)
#[0.6954203844070435, 0.75]
#[2.080299531037991, 0.67948717]

Found 5216 images belonging to 3 classes.
Found 16 images belonging to 3 classes.
Found 624 images belonging to 3 classes.
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 150, 150, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
____

#### Fine-tuning

##### Before fine-tuning

In [25]:
# load the VGG16 network, ensuring the head FC layer sets are left off
baseModel = applications.VGG16(weights=WEIGHTS, include_top=False, input_shape=INPUT_SHAPE3)

# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output # TODO1 usual code
headModel = Flatten()(headModel)
headModel = Dense(512, activation=ACTIVATION_HIDDEN_LAYERS)(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(3, activation=ACTIVATION_OUTPUT_LAYER)(headModel)
 
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)

for layer in baseModel.layers:
	layer.trainable = False
    
model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRICS)
history = model.fit_generator(train_generator, validation_data=val_generator, epochs=EPOCH, steps_per_epoch=steps_per_epoch["train"], validation_steps=steps_per_epoch["val"])

model.save_weights(WEIGHT_PATH["local"]+'weights_vgg16_dual_before_fine-tuning.h5') # TODO1 Change
model.save(WEIGHT_PATH["local"]+'model_vgg16_dual_before_fine-tuning.h5')

Epoch 1/50
  3/326 [..............................] - ETA: 29:17 - loss: 6.0073 - accuracy: 0.4167

KeyboardInterrupt: 

##### After tine-tuning

In [None]:
LR = 1e-4
MOMENTUM = 0.9
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers[15:]:
	layer.trainable = True
    
# for the changes to the model to take affect we need to recompile
# the model, this time using SGD with a *very* small learning rate
OPT = optimizers.SGD(lr=LR, momentum=MOMENTUM)
model.compile(loss=LOSS, optimizer=OPT, metrics=METRICS)

history = model.fit_generator(train_generator, validation_data=val_generator, epochs=EPOCH, steps_per_epoch=steps_per_epoch["train"], validation_steps=steps_per_epoch["val"])

model.save_weights(WEIGHT_PATH["local"]+'weights_vgg16_dual_after_fine-tuning.h5') # TODO1 Change
model.save(WEIGHT_PATH["local"]+'model_vgg16_dual_after_fine-tuning.h5')

#### Plot learning

In [None]:
def plot_learning(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(acc))
    
    # Plot training and validation accuracy per epoch
    plt.plot(epochs, acc)
    plt.plot(epochs, val_acc)
    plt.title('Training and validation accuracy')
    plt.figure()
    
    # Plot training and validation loss per epoch
    plt.plot(epochs, loss)
    plt.plot(epochs, val_loss)
    plt.title('Training and validation loss')
    
if COLAB:
    plot_learning(history)