##  Train a classification model based on the (pretrained) VGG16 model.

Author: Anh Trung Tra    
Email: tratrunganh001@gmail.com

**Environment**:
- Ubuntu 16.04
- Python3.5
- TensorFlow 2.0

**Refs**:
https://www.tensorflow.org/alpha/tutorials/load_data/images  

**TODO:**  
[X] Build the data pipeline for the train and val set.        
[X] Create the classification model based on pretrained VGG16 model.      
[X] Train and save the dog/cat prediction model.

Updated by: Le Trung Phong - letrungphong95@gmail.com

## Let import some things ...

In [1]:
import tensorflow as tf
keras = tf.keras

print("Tensorflow version: {}".format(tf.__version__))

# optional params
_IMAGE_SIZE = 224

%matplotlib inline

Tensorflow version: 2.0.0-alpha0


## Build the data pipeline for the train and val set, ready for training the model

- Train data pipeline:  
tfrecord --> Parse --> Augment --> Resize --> Preprocess --> Shuffle --> Batch --> Prefetch
- Val data pipeline:  
tfrecord --> Parse --> Resize --> Preprocess --> Batch --> Prefetch

Ref: 
https://www.tensorflow.org/alpha/tutorials/load_data/images  

In [2]:
# Create a dictionary describing the features.  
image_feature_description = {
    'height': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64),
    'image_raw': tf.io.FixedLenFeature([], tf.string),
}

# Helper functions
def _parse_function(example_proto):
    """ Parse the data from given `example_proto`. """
    parsed_example = tf.io.parse_single_example(example_proto, image_feature_description)
    image_string = parsed_example['image_raw']
    label = parsed_example['label']
    height = parsed_example['height']
    weight = parsed_example['width']
    
    # Don't use tf.image.decode_image, or the output shape will be undefined
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image_decoded, tf.float32) # convert to float values in [0, 1]
    
    return image, label, height, weight

def _augment_image(image, label, height, weight):
    """ Augment image for training."""    
    image = tf.image.random_flip_left_right(image)        
    image = tf.image.random_crop(image, [height, weight, 3])
    image = tf.image.random_brightness(image, max_delta=32.0 / 255.0)
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
    image = tf.clip_by_value(image, 0.0, 1.0)                
    
    return image, label, height, weight
      
def _resize_image(image, label, height, weight, size):
    """ Resize image to meet the input size of the classification model. """
    resized_image = tf.image.resize_with_pad(image, size, size)   

    return resized_image, label

def _preprocess_image(image, label):
    """ Preprocess image to meet the VGG16 image preprocessing method."""
    # scale to [0, 255]
    preprocessed_image = 255.0*image
    
    # convert RGB to BGR
    preprocessed_image = preprocessed_image[...,::-1]
    
    # subtract the mean
    preprocessed_image = preprocessed_image - [103.939, 116.779, 123.68]
    
    return preprocessed_image, label    

In [3]:
# train data pipeline
train_dataset = (tf.data.TFRecordDataset('data/PetImages/train.tfrecord')
                     .map(_parse_function)
                     .map(_augment_image)
                     .map(lambda im, l, h, w: _resize_image(im, l, h, w, size=_IMAGE_SIZE))
                     .map(_preprocess_image)
                     .shuffle(1000)
                     .batch(32)
                     .prefetch(1)  # make sure you always have one batch ready to serve
                )

# val data pipeline
val_dataset = (tf.data.TFRecordDataset('data/PetImages/val.tfrecord')
                     .map(_parse_function)
                     .map(lambda im, l, h, w: _resize_image(im, l, h, w, size=_IMAGE_SIZE))
                     .map(_preprocess_image)
                     .batch(32)
                     .prefetch(1)  # make sure you always have one batch ready to serve                
              )
print(train_dataset)
print(val_dataset)

<PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>
<PrefetchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>


## Create the classification model based on pretrained VGG16 model.

In [4]:
def model_fn(num_class=None, image_size=None):
    """
    This function creates initiative CNN model using VGG16 convolutional layer 
    and add some new layer on top to finetune our Dataset.  
    
    Arguments:
    -num_class: int, number of class for output of our model 
    (E.g: num_class=2 for Cat and Dog dataset)
    -image_size: int, size of input image from dataset for our model 
    
    Returns:
    - model: keras sequential model class, intiative model from keras   
    """
    # Define keras model 
    model = keras.models.Sequential()
    # Add the vgg16 convolutional base model
    model.add(keras.applications.VGG16(weights='imagenet', 
                                       include_top=False, 
                                       input_shape=(image_size, image_size, 3)))
    # Add new layers on top
    model.add(keras.layers.Conv2D(512, 1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(256, 1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(128, 1))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.GlobalAveragePooling2D())
    model.add(keras.layers.Dense(num_class, activation='softmax'))
    
    return model

# Create initiative model
model = model_fn(num_class=2, image_size=_IMAGE_SIZE)

# Define model with optimizer method and loss function
model.compile(optimizer=tf.optimizers.Adam(learning_rate=1e-5),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Show a summary of the model. Check the number of trainable parameters
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
conv2d (Conv2D)              (None, 7, 7, 512)         262656    
_________________________________________________________________
activation (Activation)      (None, 7, 7, 512)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 7, 256)         131328    
_________________________________________________________________
activation_1 (Activation)    (None, 7, 7, 256)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 128)         32896     
_________________________________________________________________
activation_2 (Activation)    (None, 7, 7, 128)         0

## Train and save the dog/cat prediction model.

In [5]:
# compile the model
model.compile(optimizer=tf.optimizers.Adam(1e-5),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# train the model 
model.fit_generator(train_dataset, 
                    epochs=1, 
                    steps_per_epoch = 100)

# Evaluate on the val dataset 
val_loss, val_acc = model.evaluate_generator(val_dataset)
print("Val loss = {:.4f}, val acc = {:.4f}".format(val_loss, val_acc))

# Save keras model to .h5 file after training 
model.save('experiments/VGG16_based_classification/vgg16_catdog.h5')

Val loss = 0.0610, val acc = 0.9771
