# Transfer learning malaria dataset with a pretrained CNN

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
keras = tf.keras
tf.executing_eagerly()

### Data download

In [0]:
import tensorflow_datasets as tfds
tfds.disable_progress_bar()

###Import malaria dataset from TensorFlow datasets


In [0]:
(raw_train,raw_test, raw_validation), info = tfds.load(
    'malaria',
    split=['train[:80%]', 'train[80%:99%]', 'train[99%:]'],
    with_info=True,
    as_supervised=True,
)

In [0]:
info

In [0]:
print('Train dataset  size : ',len(list(raw_train)))
print('Test dataset size : ',len(list(raw_test)))
print('Validation dataset size : ',len(list(raw_validation)))

### Check train, validation and test datasets shapes 

In [0]:
print("train dataset shape  : ", raw_train)
print("validation dataset shape : ",raw_validation)
print("test dataset shape  : ",raw_test)

### Show sample infected and uninfected images 

In [0]:
get_label_name = info.features['label'].int2str
fig=plt.figure(figsize=(15, 15))
columns = 5
rows = 5
i=0
for image, label in raw_train.take(25):
  fig.add_subplot(rows, columns, i+1)
  plt.imshow(image)
  plt.title(get_label_name(label))
  i+=1
plt.show()

### Get image shape

In [0]:
for image, label in raw_train.take(1):
  pass
image.shape

###  Resize the images to a fixed input size, and rescale the input channels to a range of `[-1,1]`


In [0]:
IMG_SIZE = 128 # keep the image size to comply with pre-trained models

def format(image, label):
  image = tf.cast(image, tf.float32)
  image = (image/255)
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
  return image, label

In [0]:
def show_conf_matrix(con_mat):
  con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
  figure = plt.figure(figsize=(8, 8))
  sns.heatmap(con_mat_norm, annot=True,cmap=plt.cm.Blues)
  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.show()

Apply this function to each item in the dataset using the map method:

In [0]:
train = raw_train.map(format)
validation = raw_validation.map(format)
test = raw_test.map(format)

Now shuffle and batch the data.

In [0]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 10000

In [0]:
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)

#### Check one batch 

In [0]:
for image_batch, label_batch in train_batches.take(1):
   pass

image_batch.shape

## Begin pre-trained base model 

### Get first results from simple CNN

In [0]:
from tensorflow.keras import datasets, layers, models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.2))
model.add(layers.Dense(512))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(1))
model.summary()

In [0]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [0]:
def show_performance(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()),1])
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    plt.ylim([0,1.0])
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

In [0]:
history = model.fit(train_batches,
                    epochs=40,
                    validation_data=test_batches)

In [0]:
model.evaluate(validation_batches, steps = 20)

In [0]:
show_performance(history)

In [0]:
model.save('base_model.h5')

In [0]:
def show_maps(model,img,ixs): 
  outputs = [model.layers[i].output for i in ixs]
  model = tf.compat.v1.keras.Model(inputs=model.inputs, outputs=outputs)
  feature_maps = model.predict(img)
  ix = 1
  # plot the output from each block
  square = 5
  for fmap in feature_maps:
    # plot all 64 maps in an 8x8 squares
    ix = 1
    for _ in range(square):
      for _ in range(square):
        # specify subplot and turn of axis
        ax = plt.subplot(square, square, ix)
        ax.set_xticks([])
        ax.set_yticks([])
        # plot filter channel in grayscale
        plt.imshow(fmap[0, :, :, ix-1])
        ix += 1
    # show the figure
    plt.show()

In [0]:
from numpy import expand_dims
for image,label in test.shuffle(1000).take(1):
    pass
img = expand_dims(image, axis=0)    
print(img.shape)
show_maps(model,img, [1,3])

In [0]:
import os
os.path.getsize('base_model.h5')/(1024*1024)

### Load MobileNet model

In [0]:

IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# Create the base model from the pre-trained model MobileNet
base_model = tf.keras.applications.MobileNet(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

#### Show base model architecture

In [0]:
base_model.summary()

#### Inspect features size extracted from example batch 

In [0]:
feature_batch = base_model(image_batch)
print(feature_batch.shape)






## Feature extraction


### Freeze the convolutional base



In [0]:
base_model.trainable = False

### Add a classification head

In [0]:
model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(512,activation="relu"))
model.add(layers.Dense(1))asdf
model.summary()


### Compile the model





1.   List item
2.   List item



In [0]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

### Train the model


In [0]:
initial_epochs = 12
validation_steps=20


In [0]:
history = model.fit(train_batches,
                    epochs=initial_epochs,
                    validation_data=test_batches )

In [0]:
show_performance(history)


### Evaluate base model

In [0]:
eval =  model.evaluate(validation_batches)

### Base model performance

In [0]:
show_performance(history)

In [0]:


y_pred=model.predict(test_batches)
image,labels =  next(iter(raw_test.take(len(y_pred))))

#con_mat = tf.math.confusion_matrix(labels=labels, predictions= (y_pred > 0.5).astype(int)).numpy()

#show_conf_matrix(con_mat)



In [0]:
 model.save('base_transfer_model.h5')

In [0]:
import os
os.path.getsize('base_transfer_model.h5')/(1024*1024)

####Discussion
As seen from the performance chart the overall accuracy looks good, however model needs serious fin tuning to eliminate overfitting. 

In [0]:
eval

### Show sample misclassified items

In [0]:
import matplotlib.image as mpimg
image,labels =  next(iter(test_batches))
pred = model.predict(image)

# CatasDog = df['filename'][(df.y==0)&(df.y_pred==1)]
# fig=plt.figure(figsize=(15, 6))
# columns = 7
# rows = 3
# for i in range(columns*rows):
#     #img = mpimg.imread()
#     img = image.load_img('test/'+CatasDog.iloc[i], target_size=(64, 64))
#     fig.add_subplot(rows, columns, i+1)
#     plt.imshow(img)

# plt.show()

### Show feature maps with applied filters for random image

In [0]:
for image,label in test.shuffle(1000).take(1):
    pass
img = expand_dims(image, axis=0)    
print(img.shape)
show_maps(base_model,img, [2,8,34,65,78])




## Fine tuning


### Un-freeze the top layers of the model


#### Number of layers in base model

In [0]:
len(base_model.layers)

In [0]:
base_model.layers[84]

In [0]:
base_model.trainable = True

In [0]:
index = 0
for layer in base_model.layers:
  print(index)
  print(layer.name)
  index+=1

In [0]:

# Fine-tune from this layer onwards
fine_tune_at = 50

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

### Compile the model

Compile the model using lower learning rate.

In [0]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()
fine_tune_epochs = 10
total_epochs =  initial_epochs + fine_tune_epochs

history_fine = model.fit(train_batches,
                         epochs=total_epochs,
                         initial_epoch =  history.epoch[-1],
                         validation_data=test_batches)

In [0]:
show_performance(history_fine)

#### Accuracy does not improve as the loss





## Model HyperParameters tuning  
Let's check if regularization techniques and droput works. Another option could be adding momentum. Use tensorflow gridsearch analog: hp library
Search through optimizers, regularization parameters and dropout values 


In [0]:
from tensorboard.plugins.hparams import api as hp


HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.2, 0.5))
HP_EPOCHS = hp.HParam('epoch', hp.Discrete([10,20,30,40]))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd','RMSprop']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.001,.01))
METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_DROPOUT,HP_L2 ,HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )



def train_test_model(hparams):
  model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(64,kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2])),
        keras.layers.Dropout(hparams[HP_DROPOUT]),
        keras.layers.Dense(1,activation='sigmoid')
      ])



  model.compile(optimizer=hparams[HP_OPTIMIZER],
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])

  model.fit(train_batches,
            validation_data=test_batches,
            epochs=8)
  _, accuracy = model.evaluate(validation_batches)
  return accuracy

def run(dir,hparams):
  with tf.summary.create_file_writer(dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)




session_num = 0
# for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
#       for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
#         for optimizer in HP_OPTIMIZER.domain.values:
          
#           hparams = {
#               HP_DROPOUT: dropout_rate,
#               HP_L2: l2,
#               HP_OPTIMIZER: optimizer
              
#           }
#           run_name = "run-%d" % session_num
#           print('--- Starting trial: %s' % run_name)
#           print({h.name: hparams[h] for h in hparams})
#           run('logs/hparam_tuning/' + run_name, hparams)
#           session_num += 1


In [0]:
model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalMaxPooling2D(),
        tf.keras.layers.Dense(512,kernel_regularizer=tf.keras.regularizers.l2(0.07)),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(1)
      ])


model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.000001,momentum=0.06),
                  loss="binary_crossentropy",
                  metrics=['accuracy'])
history_1 = model.fit(train_batches,
                         epochs=20,
                         validation_data=test_batches)

## Data augmentation 
Let's check if model accuracy could be improved with image augmentation treaks




In [0]:
def tr_format(image, label):
  image = tf.cast(image, tf.float32)
  image = (image/255)
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
  image = tf.image.random_flip_up_down(image)
  image = tf.image.random_crop(image,[90,90,3])
  return image, label
 

In [0]:
train = raw_train.map(tr_format)
train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
history = model.fit(train_batches,
                    epochs=40,
                    validation_data=test_batches )



### Compare MobileNet best model performance with ResNet 
According to papers , ResNet has the best performance in this problem


In [0]:
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# Create the base model from the pre-trained model ResNet
resnet = tf.keras.applications.ResNet152V2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

resnet.trainable = False


model = tf.keras.Sequential([
        resnet,
        tf.keras.layers.GlobalMaxPooling2D(),
        keras.layers.Dense(60, activation = 'relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1,activation='sigmoid')
      ])


model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001,momentum= 0.05),
                  loss="binary_crossentropy",
                  metrics=['accuracy'])
history_1 = model.fit(train_batches,
                         epochs=20,
                         validation_data=test_batches)