# Image Augmentation - tf.data - Transfer Learning

## Imports 

In [None]:
import tensorflow as tf
import pathlib
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
import PIL
from functools import partial
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook as tqdm

We will use the github repo from to load the pre-trained models. Note that there are other sources where you can get pre-trained models. e.g.:
- tf.keras.applications has some pre-trained models
- https://github.com/qubvel/efficientnet has weights for efficient-net
- ...

One of the most important thing to look at when doing transfer learning is to know if any pre-processing has been done in the original training. Most libraries will have a `preprocess_input` method or function which you should find!

In [None]:
# !pip install git+https://github.com/qubvel/classification_models.git
!pip install -U --quiet git+https://github.com/qubvel/efficientnet

  Building wheel for efficientnet (setup.py) ... [?25l[?25hdone


We will use the albumentations library which has an extensive collection of augmentation operations

In [None]:
!pip install -U --quiet git+https://github.com/albumentations-team/albumentations

  Building wheel for albumentations (setup.py) ... [?25l[?25hdone


In [None]:
# from classification_models.keras import Classifiers
import efficientnet.keras as efn 
from albumentations import (
    Compose, RandomBrightnessContrast, OneOf, ShiftScaleRotate, HueSaturationValue, 
    HorizontalFlip, ToGray, Resize, GaussNoise)

The dataset we will use in this lecture (oxford pets) is not stored in a nice directory format. This is an opportunity to show how to create a `tf.data.Dataset` object from scratch.

## Data

In [None]:
dataset_url ="https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz"
data_dir = tf.keras.utils.get_file(origin=dataset_url, 
                                   fname='images', 
                                   untar=True,
                                   cache_dir='/content/')

pathlib.Path('/content/datasets/images.tar.gz').unlink() 

Downloading data from https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz


In [None]:
data_dir = pathlib.Path(data_dir)
images_list = data_dir.glob('*.jpg')
images_list = [str(x) for x in images_list]
np.random.shuffle(images_list)
def get_label_from_path(path):
  label = '_'.join(path.split(os.sep)[-1].split('_')[:-1]).lower()
  return label
labels_list = [get_label_from_path(x) for x in images_list]
labels = list(set(labels_list))
labels_to_num = dict(zip(labels, range(len(labels))))
labels_list = [labels_to_num[x] for x in labels_list]
print(len(labels))

37


In [None]:
images_list[:5], labels_list[:5]

(['/content/datasets/images/Persian_41.jpg',
  '/content/datasets/images/Russian_Blue_94.jpg',
  '/content/datasets/images/beagle_30.jpg',
  '/content/datasets/images/pomeranian_147.jpg',
  '/content/datasets/images/english_setter_116.jpg'],
 [34, 16, 28, 14, 23])

In [None]:
# Data source = list of paths + labels
all_data = tf.data.Dataset.from_tensor_slices((images_list, labels_list))
print(len(all_data))

7390


In [None]:
data_train = all_data.take(5000) # 5000
data_val = all_data.skip(5000) # 2390

In [None]:
BATCH_SIZE = 32
IMG_SIZE = (160, 160)

In [None]:
transforms_train = Compose([
            Resize(IMG_SIZE[0], IMG_SIZE[1], p=1),
            GaussNoise(p=0.1),
            OneOf([RandomBrightnessContrast(), HueSaturationValue()], p=0.7),
            HorizontalFlip(p=0.5),
            ToGray(p=0.2),
            ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=30, p=0.5)
        ])

transforms_val = Compose([
            Resize(IMG_SIZE[0], IMG_SIZE[1], p=1)
        ])

def aug_fn(image, train):
    data = {"image": image}
    if train:
      data = transforms_train(**data)
    else:
      data = transforms_val(**data)
    aug_img = data["image"]
    aug_img = efn.preprocess_input(aug_img) # Don't forget the preprocess function!
    return aug_img

def parse(path, label):
    image = tf.io.read_file(path)
    image = tf.io.decode_jpeg(image, channels=3)
    label = tf.one_hot(label, 37)
    return image, label

def process_data_train(image, label):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image, True], 
                                Tout=tf.float32)
    return aug_img, label

def process_data_val(image, label):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image, False], 
                                Tout=tf.float32)
    return aug_img, label

`tf.data.Dataset` have 4 important methods:
- shuffle: shuffles the data (not globally, but keeps a buffer_size of shuffled data)
- map: maps a function to the dataset, i.e. execute on it every element, but not when you run the cell, but during training. 
- cache: puts the dataset in a cache during the first epoch, and re-uses the cache for the next epochs. Only use for small datasets ... /!\ do not cache image augmentation and batching
- batch: creates a batch of desired size
- prefetch: populates an ordered buffer even while the forward/nackward pass are running (on GPU)

https://www.tensorflow.org/guide/data_performance

We will use `tf.data.experimental.AUTOTUNE` to auto-tune the prefetching and parllel calls. Tensorflow will try a few values and automatically set the best parameters to optimize performance.

In [None]:
from tensorflow.data.experimental import AUTOTUNE

data_train = data_train.shuffle(buffer_size=BATCH_SIZE*4).map(parse, 
                  num_parallel_calls=AUTOTUNE).cache().map(process_data_train,
                  num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)
                  
data_val = data_val.shuffle(buffer_size=BATCH_SIZE*4).map(parse, 
                  num_parallel_calls=AUTOTUNE).cache().map(process_data_val,
                  num_parallel_calls=AUTOTUNE).batch(BATCH_SIZE).prefetch(AUTOTUNE)

In [None]:
def view_image_batch(ds):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image, _ = next(iter(ds)) # extract 1 batch from the dataset
    image = np.clip((image.numpy()*std)+mean,0,1) # Doing the efn.preprocess_input inverse 

    fig = plt.figure(figsize=(22, 22))
    for i in range(20):
        ax = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
        ax.imshow(image[i])

view_image_batch(data_train)

Output hidden; open in https://colab.research.google.com to view.

## Model

In [None]:
IMG_SHAPE = IMG_SIZE + (3,)

base_model = efn.EfficientNetB0(input_shape=IMG_SHAPE, weights='imagenet', include_top=False)

base_model.trainable = False

inputs = tf.keras.Input(shape=IMG_SHAPE)
x = base_model(inputs, training=False) # BN Will always be in inference mode, i.e. using running stats
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)  
outputs = tf.keras.layers.Dense(37)(x)
model = tf.keras.Model(inputs, outputs)

model.summary()

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_4 ( (None, 1280)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 37)                47397     
Total params: 4,096,961
Trainable params: 47,397
Non-trainable params: 4,049,564
_________________________________________________________________


In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(x=data_train, validation_data=data_val, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f5f15bfdb38>

In [None]:
base_model.trainable = True 
# BN is still in inference mode
# Compile here | IMPORTANT
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), # Low LR, but maybe can do better?
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()
model.fit(x=data_train, validation_data=data_val, epochs=10)

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_4 ( (None, 1280)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 37)                47397     
Total params: 4,096,961
Trainable params: 4,054,945
Non-trainable params: 42,016
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7f5f15bfd240>

We start to overfit when we unfreeze the entire model, but the validation accuracy is slightly better. We will see later how this can be solved using learning rate schedules for example.

## You want to unfreeze BN?

In [None]:
IMG_SHAPE = IMG_SIZE + (3,)

base_model = efn.EfficientNetB0(input_shape=IMG_SHAPE, weights='imagenet', include_top=False)

base_model.trainable = False

inputs = tf.keras.Input(shape=IMG_SHAPE)
x = base_model(inputs) # No BN inference forcing
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)  
outputs = tf.keras.layers.Dense(37)(x)
model = tf.keras.Model(inputs, outputs)

model.summary()

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(x=data_train, validation_data=data_val, epochs=10)

Model: "functional_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_28 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_13  (None, 1280)              0         
_________________________________________________________________
dropout_14 (Dropout)         (None, 1280)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 37)                47397     
Total params: 4,096,961
Trainable params: 47,397
Non-trainable params: 4,049,564
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch

<tensorflow.python.keras.callbacks.History at 0x7f5ee925fb38>

In [None]:
base_model.trainable = True 
# BN is in training mode
# Compile here | IMPORTANT
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), # This is different! Tuned! Otherwise The perf goes down too much
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()
model.fit(x=data_train, validation_data=data_val, epochs=10)

Model: "functional_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_28 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_13  (None, 1280)              0         
_________________________________________________________________
dropout_14 (Dropout)         (None, 1280)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 37)                47397     
Total params: 4,096,961
Trainable params: 4,054,945
Non-trainable params: 42,016
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch

<tensorflow.python.keras.callbacks.History at 0x7f5ef7533da0>

This is not a good idea, freezing BN in the first stage and releasing it in the next stage will lead to abrupt changes.

You need to be very careful with LR!

To fix this, one solution is to unfreeze all BN layers in the first stage already.

Another trick that also solves this is to use a warm-up phase (one-cycle) and figure out a good LR with LR finder. This usually gives good LR values and avoids those abrubt changes.

## Unfreeze all BNs from first stage

In [None]:
IMG_SHAPE = IMG_SIZE + (3,)

base_model = efn.EfficientNetB0(input_shape=IMG_SHAPE, weights='imagenet', include_top=False)

base_model.trainable = False

inputs = tf.keras.Input(shape=IMG_SHAPE)
x = base_model(inputs) # No BN inference forcing
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)  
outputs = tf.keras.layers.Dense(37)(x)
model = tf.keras.Model(inputs, outputs)

base_model.trainable = True
for layer in base_model.layers:
  if 'bn' in layer.name:
    layer.trainable = True
  else:
    layer.trainable = False

model.summary()

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(x=data_train, validation_data=data_val, epochs=10)

Model: "functional_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_30 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_14  (None, 1280)              0         
_________________________________________________________________
dropout_15 (Dropout)         (None, 1280)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 37)                47397     
Total params: 4,096,961
Trainable params: 89,413
Non-trainable params: 4,007,548
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch

<tensorflow.python.keras.callbacks.History at 0x7f5ee40bb860>

In [None]:
base_model.trainable = True 
# Compile here | IMPORTANT
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), # Low LR, but maybe can do better?
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()
model.fit(x=data_train, validation_data=data_val, epochs=10)

Model: "functional_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_22 (InputLayer)        [(None, 160, 160, 3)]     0         
_________________________________________________________________
efficientnet-b0 (Functional) (None, 5, 5, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_10  (None, 1280)              0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 1280)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 37)                47397     
Total params: 4,096,961
Trainable params: 4,054,945
Non-trainable params: 42,016
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch

<tensorflow.python.keras.callbacks.History at 0x7f5ef7533a58>

## Helper snipets

In [None]:
# Unfreeze only Block7
# Because of recursive passing of the trainable attribute
# if a child layer is trainable but its parent layer is not trainable
# the child will not be trainable
base_model.trainable = True # First unfreeze the entire base model
for layer in base_model.layers:
  if 'block7' in layer.name:
    layer.trainable = True # Don't really need this line but makes the code readable
  else:
    layer.trainable = False # Freeze the other blocks

In [None]:
# Unfreeze Batch Norm Layers all together
base_model.trainable = True
for layer in base_model.layers:
  if 'bn' in layer.name:
    layer.trainable = True
  else:
    layer.trainable = False