# Dogs vs Cats Transfer Learning

Transfer learning can be used to use a pre-trained to learn new classes. The lesson uses networks trained on ImageNet to learn a new classifier to predict if an image is a dog or a cat

This notebook uses the previously generated TFRecords for data and the VGG16 model as a base to learn new classes

In [1]:
import os
import tensorflow as tf
import urllib
import glob
import pickle

In [2]:
tf.__version__

'1.3.0'

## Data

We converted the directories into TFRecords in the DataPreparation.ipynb previously so will use them as our data source

In [3]:
data_dir = os.path.expanduser('~/data/DogsVsCats')

dev_data_dir = os.path.join(data_dir, 'dev')
train_data_dir = os.path.join(data_dir, 'train')
test_data_dir = os.path.join(data_dir, 'test1')
validation_data_dir = os.path.join(data_dir, 'validation')

Dev dataset contains only 10 images and is useful for ensuring network runs correctly. Handy to not have to wait a long time while testing the network and can switch to train and validation once entire architecture is ensured to work as expected

In [4]:
dev_record_filenames = os.path.join(data_dir, 'dev.tfrecord')
train_record_filenames = glob.glob(os.path.join(data_dir, 'train-*.tfrecord'))
validation_record_filenames = glob.glob(os.path.join(data_dir, 'validation-*.tfrecord'))
test_record_filenames = os.path.join(data_dir, 'test.tfrecord')

Dictionaries to map from integer label and string label for classification

In [5]:
with open(os.path.join(data_dir, 'class_name2id.p'), 'rb') as p:
    class_name2id = pickle.load(p)
id2class = {v:k for k, v in enumerate(class_name2id)}
NUM_CLASSES = len(id2class)

Helper function to construct out data `input_fn` methods to be used later when passed to the `Experiment`

In [6]:
def data_input_fn(filenames, num_epochs=1, batch_size=64, shuffle=False):
    
    def _input_fn():
        def _parse_record(tf_record):
            features = {
                'image': tf.FixedLenFeature([], dtype=tf.string),
                'label': tf.FixedLenFeature([], dtype=tf.int64)
            }
            record = tf.parse_single_example(tf_record, features)

            image_raw = tf.decode_raw(record['image'], tf.float32)
            image_raw = tf.reshape(image_raw, shape=(224, 224, 3))

            label = tf.cast(record['label'], tf.int32)
            label = tf.one_hot(label, depth=NUM_CLASSES)

            return { 'image': image_raw }, label
        
        # For TF dataset blog post, see https://developers.googleblog.com/2017/09/introducing-tensorflow-datasets.html
        dataset = tf.contrib.data.TFRecordDataset(filenames)
        dataset = dataset.map(_parse_record)

        if shuffle:
            dataset = dataset.shuffle(buffer_size=256)

        dataset = dataset.repeat(num_epochs)
        dataset = dataset.batch(batch_size)

        iterator = dataset.make_one_shot_iterator()
        features, labels = iterator.get_next()

        return features, labels
    
    return _input_fn

## Model

In [7]:
def vgg16_model_fn(features, mode, params):
    
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    
    with tf.variable_scope('vgg_base'):
        # Use a pre-trained VGG16 model and drop off the top layers as we will retrain 
        # with our own dense output for our custom classes
        vgg16_base = tf.contrib.keras.applications.VGG16(include_top=False,
                                                         input_shape=(224, 224, 3),
                                                         input_tensor=features['image'],
                                                         pooling='avg')

        # Disable training for all layers to increase speed for transfer learning
        # If new classes significantely different from ImageNet, this may be worth leaving as trainable = True
#         vgg16_base.trainable = False
        for layer in vgg16_base.layers:
            layer.trainable = False

        x = vgg16_base.output
    
    # Add an average pooling, dense and dropout layers on the VGG base
    with tf.variable_scope("fc"):
#         x = tf.layers.average_pooling2d(vgg16_base,
#                                        pool_size=(3, 3),
#                                        strides=2) # Taken care of by vgg16 pooling='avg'?
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, units=4096, activation=tf.nn.relu, trainable=is_training, name='fc1')
        x = tf.layers.dense(x, units=4096, activation=tf.nn.relu, trainable=is_training, name='fc2')
        x = tf.layers.dropout(x, rate=0.5, training=is_training)
        
    # Finally add a 2 dense layer for class predictions
    with tf.variable_scope("Prediction"):
        x = tf.layers.dense(x, units=NUM_CLASSES, trainable=is_training)
        return x

In [8]:
def model_fn(features, labels, mode, params):
    
    logits = vgg16_model_fn(features, mode, params)

    scores = tf.nn.softmax(logits)
    predicted_class = tf.argmax(logits, axis=1, output_type=tf.int32)
    
    # Dictionary with label as outcome with greatest probability
    # Softmax will provide probabilities of each label
    predictions = {
        'class': predicted_class,
        'probabilities': scores
    }
    
    # Return our EstimatorSpec for predict mode
    if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {
            'predict_output': tf.estimator.export.PredictOutput(predictions)
        }
        
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs=export_outputs
        )

    # Softmax loss
    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
    
    # Return the EstimatorSpec for training mode
    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=params['learning_rate'],
            optimizer=tf.train.AdamOptimizer
        )
        
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op
        )
    
    assert mode == tf.estimator.ModeKeys.EVAL
    
    # Setup evaluation metrics
    eval_metrics = {
        'accuracy': tf.metrics.accuracy(
            labels=tf.argmax(labels),
            predictions=predicted_class,
            name='accuracy'
        )
    }
    
    # Return EstimatorSpec for evaluation mode
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=loss,
        eval_metric_ops=eval_metrics
    )

## Experiment

In [9]:
params = {
    'learning_rate': 2e-3
}

run_config = tf.contrib.learn.RunConfig()
run_config = run_config.replace(model_dir='/tmp/DogsVsCats')

dog_cat_estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    config=run_config,
    params=params
)

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1216fe4a8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/DogsVsCats'}


In [10]:
experiment = tf.contrib.learn.Experiment(
    dog_cat_estimator,
    train_input_fn=data_input_fn(train_record_filenames, num_epochs=None, batch_size=10, shuffle=True),
    eval_input_fn=data_input_fn(validation_record_filenames, num_epochs=None, batch_size=10),
    train_steps=10
)

In [None]:
# experiment.train_and_evaluate()
experiment.train()