In [1]:
%matplotlib inline

In [2]:
import tensorflow as tf
import numpy as np
import os

In [3]:
images_dir = '/hdd/datasets/mscoco/dataset/mscoco/train2014/'
images = [os.path.join(images_dir, f) for f in os.listdir(images_dir)]

image_height = 299
image_width = 299


In [4]:
def _depthwise_separable_conv(inputs,
                                  num_pwc_filters,
                                  width_multiplier,
                                  sc,
                                  stride=1):
        """ Helper function to build the depth-wise separable convolution layer.
      """
        num_pwc_filters = round(num_pwc_filters * width_multiplier)
        from tensorflow.contrib.slim.python.slim.nets.inception_v3 import inception_v3_base

        slim = tf.contrib.slim

        # skip pointwise by setting num_outputs=None
        depthwise_conv = slim.separable_convolution2d(inputs,
                                                      num_outputs=None,
                                                      stride=stride,
                                                      depth_multiplier=1,
                                                      kernel_size=[3, 3],
                                                      scope=sc + '/depthwise_conv')

        bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm')
        pointwise_conv = slim.convolution2d(bn,
                                            num_pwc_filters,
                                            kernel_size=[1, 1],
                                            scope=sc + '/pointwise_conv')
        return slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm')

In [5]:
ssd_model_file = 'ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
if not os.path.isfile(ssd_model_file):
    from subprocess import call
    url = 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz'
    call(['wget', '-nc', url])
    tar = 'ssd_mobilenet_v1_coco_2017_11_17.tar.gz'
    call(['tar', '-xf', tar, '-C', './'])

In [6]:
orig_graph_def = tf.GraphDef()
with open('/hdd/models/im2txt_2016_10_05.1000000/const_model.ckpt-1000000.pb', 'rb') as f:
    orig_graph_def.ParseFromString(f.read())
orig_image_embedding_node_name = 'image_embedding/image_embedding/MatMul'
orig_graph_def=tf.graph_util.extract_sub_graph(orig_graph_def, [orig_image_embedding_node_name])

In [7]:
ssd_graph_def = tf.GraphDef()
with tf.gfile.GFile(ssd_model_file, 'rb') as fid:
    serialized_graph = fid.read()
ssd_graph_def.ParseFromString(serialized_graph)

29112121

In [8]:

dataset = tf.data.Dataset.from_tensor_slices(images)
def map_to_image(p):
    pc = tf.read_file(p)
    img = tf.image.decode_jpeg(pc, 3)
    img = tf.image.resize_images(img, (image_height, image_width))
    return {'images': tf.cast(img, dtype=tf.uint8), 'keepThis': tf.shape(img)[-1] == 3}, {'images': img * (2.0/255) - 1.0}
dataset = dataset.map(map_to_image, 8)
# dataset = dataset.filter(lambda f, l: f['keepThis'])
# dataset = dataset.cache('/hdd/tmp/only-ssd-cache')
dataset = dataset.repeat(100000000)


In [15]:
def model_fn(features, labels, mode, params):
    tf.import_graph_def(graph_def=orig_graph_def, input_map={'ExpandDims_4': labels['images']}, name='orig')
    orig_image_embedding = tf.get_default_graph().get_tensor_by_name("orig/" + orig_image_embedding_node_name + ':0')

    feature_layers = ['FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6:0']

    res = tf.import_graph_def(ssd_graph_def, name='', input_map={'image_tensor:0':features['images']}, return_elements=feature_layers)
    
    net = res[0]

    width_multiplier = 1
    net = _depthwise_separable_conv(net, 1024, width_multiplier, stride=2, sc='x_conv_ds_11')
    net = _depthwise_separable_conv(net, 1024, width_multiplier, stride=3, sc='x_conv_ds_12')
    net = _depthwise_separable_conv(net, 512, width_multiplier, stride=2, sc='x_conv_ds_13')

    net = tf.layers.flatten(net)

    net = tf.layers.dense(net, 1024, activation=tf.nn.sigmoid)

    image_embeddings = tf.layers.dense(net, 512)
    

    tf.summary.histogram('image_embedding/orig', orig_image_embedding)
    tf.summary.histogram('image_embedding/self', image_embeddings)
    tf.summary.histogram('image_embedding/diff', orig_image_embedding - image_embeddings)

    total_loss = tf.losses.mean_squared_error(orig_image_embedding ,image_embeddings)
    tf.summary.scalar("losses/copy", total_loss)
    
    global_step = tf.train.get_global_step()
    learning_rate = params['learning_rate']
    loss = tf.losses.get_total_loss()
    tf.summary.scalar('learning_rate', learning_rate)
    tf.summary.scalar('batch_size', tf.shape(image_embeddings)[0])
    train_op = tf.train.AdagradOptimizer(learning_rate,
        params['learning_rate']).minimize(loss, global_step=global_step)

    return tf.estimator.EstimatorSpec(
        mode, 
#         predictions=predictions,
        loss=loss,
        train_op=train_op
    )

In [16]:
model_dir = '/hdd/train/only-ssd/'
model_dir += str(len(os.listdir(model_dir)))
os.path.exists(model_dir) or os.makedirs(model_dir)
estimator = tf.estimator.Estimator(
    model_fn=model_fn,
    model_dir=model_dir, 
    params={
        'learning_rate': 0.05,
    }, 
    warm_start_from=tf.estimator.WarmStartSettings('/hdd/train/only-ssd/32'),
    config=tf.estimator.RunConfig(log_step_count_steps=100, keep_checkpoint_max=30))
estimator

INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 30, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f5e4d7e0cd0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_device_fn': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/hdd/train/only-ssd/34', '_train_distribute': None, '_save_summary_steps': 100}


<tensorflow.python.estimator.estimator.Estimator at 0x7f5e4d7e0c50>

In [None]:
for batch_size in range(2, 1000, 2):
    step_count = len(images) / batch_size
    estimator.train(input_fn=lambda: dataset.batch(batch_size, drop_remainder=True).make_one_shot_iterator().get_next(), steps=step_count)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='/hdd/train/only-ssd/32', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: ('/hdd/train/only-ssd/32',)
INFO:tensorflow:Warm-starting variable: x_conv_ds_11/dw_batch_norm/beta; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: x_conv_ds_11/pointwise_conv/biases; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: x_conv_ds_13/depthwise_conv/biases; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dense_1/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: x_conv_ds_11/depthwise_conv/depthwise_weights; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dense/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: x_conv_ds_12/pointwise_conv/biases; prev_var_name