**based on: train-vgg-imagenet-template version 1.0, Jonas Rauber, 2017-02-20**

Please let me now of any ideas to improve this template. I recommend to not remove the version number and to check for new versions of this template at https://github.com/jonasrauber/tensorflow-imagenet when running into problems or starting a new notebook. VGG19 is just an example that can be easily replaced with other networks. See comments to specify paths, increase data io performance, adjust batch size, control logging or specify the variables to train.

In [None]:
# specify the path to your local clone of https://github.com/tensorflow/models,
# which is used to load ImageNet and apply VGG preprocessing
TENSORFLOW_MODELS_SLIM = 'TODO'

# specify the path of the ImageNet tfrecords files
IMAGENET_DATA = '/gpfs01/bethge/data/imagenet'

# specify the path to your initial checkpoint to restore pretrained models, e.g. VGG19
INITIAL_CHECKPOINT = '/gpfs01/bethge/data/tf-model-checkpoints/vgg_19.ckpt'

# specify the directory where checkpoints and summaries are stored; start TensorBoard with access to this directory
LOGDIR = 'TODO'

In [None]:
# ipython configuration
%load_ext autoreload
%autoreload 2
%matplotlib inline

# standard library
import sys

# third-party packages
import tensorflow as tf
slim = tf.contrib.slim

# local modules
IMPORT_PATHS = [TENSORFLOW_MODELS_SLIM]
sys.path.extend(set(IMPORT_PATHS) - set(sys.path))
from datasets import imagenet
from preprocessing import vgg_preprocessing

In [None]:
def vgg_19_train(inputs,
                 is_training=True,
                 dropout_keep_prob=0.5,
                 scope='vgg_19',
                 reuse=False):
    """VGG19 implementation using fully-connected layers
    
    Fully-connected layers are currently faster than 1x1 convolutions
    and should be used when VGG is part of a training pipeline. During
    evaluation, you might want to use the corresponding fully-convolution
    network to be able to apply it to other image sizes.
    """
    with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc:
        with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d]):
            net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')
            net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')
            net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
            net = slim.max_pool2d(net, [2, 2], scope='pool4')
            net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [2, 2], scope='pool5')
            net = slim.flatten(net)
            net = slim.fully_connected(net, 4096, scope='fc6')
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
            net = slim.fully_connected(net, 4096, scope='fc7')
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7')
            net = slim.fully_connected(net, 1000, activation_fn=None, normalizer_fn=None, scope='fc8')
            return net

In [None]:
def get_training_data():
    with tf.device('/cpu:0'):
        dataset = imagenet.get_split('train', IMAGENET_DATA)
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=2, # controls the speed at which data is loaded
            shuffle=True,
            common_queue_capacity=256,
            common_queue_min=128)
        image, label = provider.get(['image', 'label'])
        
        # preprocess the image
        image = vgg_preprocessing.preprocess_image(
            image,
            224,
            224,
            is_training=True,
            resize_side_min=256,
            resize_side_max=512)
        
        # preprocess the label
        label = tf.sub(label, 1) # 1..1000 to 0..999
        label = tf.reshape(label, (1,))

    images, labels = tf.train.batch(
        [image, label],
        batch_size=32, # specify the batch size here
        num_threads=8, # controls the speed at which images are preprocessed
        capacity=128)
    return images, labels

In [None]:
g = tf.Graph()
with g.as_default():
    # load the data
    images, labels = get_training_data()
    
    # apply the model
    predictions = vgg_19_train(images, is_training=True)
    
    # define the loss
    loss = slim.losses.sparse_softmax_cross_entropy(predictions, labels)
    total_loss = slim.losses.get_total_loss()
    
    # define the optimizer
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    
    # create the train op
    train_op = slim.learning.create_train_op(total_loss, optimizer) # specify variables to train here
    
    # summaries
    tf.summary.histogram('predictions', predictions)
    tf.summary.scalar('total_loss', total_loss)
    summary_op = tf.summary.merge_all()
    
    # define an init function that restores the pretrained VGG
    init_fn = slim.assign_from_checkpoint_fn(
        INITIAL_CHECKPOINT,
        slim.get_model_variables(),
        reshape_variables=True) # reshape variables because the checkpoint is for a fully-convolutional network

In [None]:
# this will run forever: stop it using Kernel -> Interrupt
# the first few steps will take longer, until the queues are filled
slim.learning.train(
    train_op,
    LOGDIR,
    graph=g,
    init_fn=init_fn,
    log_every_n_steps=1, # increase to avoid too many log statements
    summary_op=summary_op,
    save_summaries_secs=60)