# Dogs vs Cats Transfer Learning

Transfer learning can be used to use a pre-trained to learn new classes. The lesson uses networks trained on ImageNet to learn a new classifier to predict if an image is a dog or a cat

This notebook uses the previously generated TFRecords for data and the VGG16 model as a base to learn new classes

In [1]:
import os
import tensorflow as tf
import urllib
import glob
import pickle

  return f(*args, **kwds)


In [2]:
tf.__version__

'1.4.0'

## Data

We converted the directories into TFRecords in the DataPreparation.ipynb previously so will use them as our data source

In [3]:
data_dir = os.path.expanduser('~/data/DogsVsCats')

dev_data_dir = os.path.join(data_dir, 'dev')
train_data_dir = os.path.join(data_dir, 'train')
test_data_dir = os.path.join(data_dir, 'test1')
validation_data_dir = os.path.join(data_dir, 'validation')

Dev dataset contains only 10 images and is useful for ensuring network runs correctly. Handy to not have to wait a long time while testing the network and can switch to train and validation once entire architecture is ensured to work as expected

In [4]:
dev_record_filenames = os.path.join(data_dir, 'dev.tfrecord')
train_record_filenames = glob.glob(os.path.join(data_dir, 'train-*.tfrecord'))
validation_record_filenames = glob.glob(os.path.join(data_dir, 'validation-*.tfrecord'))
test_record_filenames = os.path.join(data_dir, 'test.tfrecord')

Dictionaries to map from integer label and string label for classification

In [5]:
with open(os.path.join(data_dir, 'class2idx.p'), 'rb') as p:
    class_name2id = pickle.load(p)
id2class = {v:k for v, k in enumerate(class_name2id)}
NUM_CLASSES = len(id2class)

Helper function to construct out data `input_fn` methods to be used later when passed to the `Experiment`

# lesson 1

In [6]:
def data_input_fn(filenames, num_epochs=1, batch_size=64, shuffle=False):
    
    def _input_fn():
        def _parse_record(tf_record):
            features = {
                'image': tf.FixedLenFeature([], dtype=tf.string),
                'label': tf.FixedLenFeature([], dtype=tf.int64)
            }
            record = tf.parse_single_example(tf_record, features)

            image_raw = tf.decode_raw(record['image'], tf.float32)
            image_raw = tf.reshape(image_raw, shape=(224, 224, 3))

            label = tf.cast(record['label'], tf.int32)
            label = tf.one_hot(label, depth=NUM_CLASSES)

            return { 'image': image_raw }, label
        
        # For TF dataset blog post, see https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html
        dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.map(_parse_record)

        if shuffle:
            dataset = dataset.shuffle(buffer_size=256)

        dataset = dataset.repeat(num_epochs)
        dataset = dataset.batch(batch_size)

        iterator = dataset.make_one_shot_iterator()
        features, labels = iterator.get_next()

        return features, labels
    
    return _input_fn

## Model

In [7]:
def vgg16_model_fn(features, mode, params):
    
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    
    with tf.name_scope('vgg_base'):
        # Use a pre-trained VGG16 model and drop off the top layers as we will retrain 
        # with our own dense output for our custom classes
        vgg16_base = tf.keras.applications.VGG16(
            include_top=False,
            input_shape=(224, 224, 3),
            input_tensor=features['image'],
            pooling='avg')

        # Disable training for all layers to increase speed for transfer learning
        # If new classes significantely different from ImageNet, this may be worth leaving as trainable = True
        for layer in vgg16_base.layers:
            layer.trainable = False

        x = vgg16_base.output
    
    with tf.variable_scope("fc"):
        x = tf.layers.flatten(x)
        x = tf.layers.dense(x, units=4096, activation=tf.nn.relu, trainable=is_training, name='fc1')
        x = tf.layers.dense(x, units=4096, activation=tf.nn.relu, trainable=is_training, name='fc2')
        x = tf.layers.dropout(x, rate=0.5, training=is_training)
        
    # Finally add a 2 dense layer for class predictions
    with tf.variable_scope("Prediction"):
        x = tf.layers.dense(x, units=NUM_CLASSES, trainable=is_training)
        return x

In [8]:
def model_fn(features, labels, mode, params):
    
    tf.summary.image('images', features['image'], max_outputs=6)
    
    logits = vgg16_model_fn(features, mode, params)
    
    # Dictionary with label as outcome with greatest probability
    # Softmax will provide probabilities of each label
    predictions = {
        'class': tf.argmax(logits, axis=1, output_type=tf.int32),
        'probabilities': tf.nn.softmax(logits)
    }
    
    # Return our EstimatorSpec for predict mode
    if mode == tf.estimator.ModeKeys.PREDICT:
        # A tensor so the graph can access the string label for the class given it's integer index label
#         class_tensor = tf.constant([id2class[label] for label in range(NUM_CLASSES)])
        
#         export_outputs = {
#             'predict_output': tf.estimator.export.ClassificationOutput(
#                 scores=scores,
#                 classes=tf.contrib.lookup.index_to_string(predicted_class, mapping=class_tensor, default_value='UNKNOWN')
#             )
#         }
        
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions
        )

    # Softmax loss
    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
    accuracy = tf.metrics.accuracy(
        labels=tf.argmax(labels, axis=1),
        predictions=predictions['class'],
        name='accuracy'
    )
    
    # Return the EstimatorSpec for training mode
    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=params['learning_rate'],
            optimizer=tf.train.AdamOptimizer
        )
        
        tf.summary.scalar('train_accuracy', accuracy[1])
        
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op
        )
    
    assert mode == tf.estimator.ModeKeys.EVAL
    
    # Setup evaluation metrics
    eval_metrics = { 'accuracy': accuracy }
    
    # Return EstimatorSpec for evaluation mode
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        eval_metric_ops=eval_metrics
    )

In [9]:
params = {
    'learning_rate': 2e-3
}

## Estimator

In [10]:
# run_config = tf.contrib.learn.RunConfig()
# run_config = run_config.replace(model_dir='/tmp/DogsVsCats')

run_config = tf.estimator.RunConfig(model_dir='/tmp/DogsVsCats')

dog_cat_estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    config=run_config,
    params=params
)

INFO:tensorflow:Using config: {'_model_dir': '/tmp/DogsVsCats', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12074cba8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [11]:
train_spec = tf.estimator.TrainSpec(
    input_fn=data_input_fn(train_record_filenames, num_epochs=None, batch_size=10, shuffle=True), 
    max_steps=12)
eval_spec = tf.estimator.EvalSpec(
    input_fn=data_input_fn(validation_record_filenames)
)

In [12]:
tf.estimator.train_and_evaluate(dog_cat_estimator, train_spec, eval_spec)

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/DogsVsCats/model.ckpt.
INFO:tensorflow:loss = 0.693403, step = 1
INFO:tensorflow:Saving checkpoints for 12 into /tmp/DogsVsCats/model.ckpt.
INFO:tensorflow:Loss for final step: 0.0.


TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("vgg_base/Placeholder:0", shape=(3, 3, 3, 64), dtype=float32) is not an element of this graph.

## Serving

In [None]:
feature_spec = {
    'image': tf.placeholder(tf.float32, shape=(None, 224, 224, 3))
}
input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec)

dog_cat_estimator.export_savedmodel(
    export_dir_base='/tmp/DogsVsCatsExport',
    serving_input_receiver_fn=input_receiver_fn
)

# lesson 3

Data augmentation of image set. Try augmentation within `input_fn` via operations available in [`tf.image`](https://www.tensorflow.org/versions/r1.0/api_docs/python/tf/image) such as
* `random_flip_lef_right` 
* `random_flip_up_down`
* random rotation
* random skew

Also to try add Batch Normalization to input features

## Data Augmentation

In [7]:
def data_augmentation_input_fn(filenames, num_epochs=1, batch_size=64, shuffle=False):
    
    def _input_fn():
        def _parse_record(tf_record):
            features = {
                'image': tf.FixedLenFeature([], dtype=tf.string),
                'label': tf.FixedLenFeature([], dtype=tf.int64)
            }
            record = tf.parse_single_example(tf_record, features)

            image_raw = tf.decode_raw(record['image'], tf.float32)
            image_raw = tf.reshape(image_raw, shape=(224, 224, 3))
            
            # Data augmentation
            image_raw = tf.image.random_flip_left_right(image_raw)
            # fastai also does random rotation, width/height shift, shear, zoom

            label = tf.cast(record['label'], tf.int32)
            label = tf.one_hot(label, depth=NUM_CLASSES)

            return { 'image': image_raw }, label
        
        # For TF dataset blog post, see https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html
        dataset = tf.data.TFRecordDataset(filenames)
        dataset = dataset.map(_parse_record)

        if shuffle:
            dataset = dataset.shuffle(buffer_size=256)

        dataset = dataset.repeat(num_epochs)
        dataset = dataset.batch(batch_size)

        iterator = dataset.make_one_shot_iterator()
        features, labels = iterator.get_next()

        return features, labels
    
    return _input_fn