In [1]:
import tensorflow as tf
import numpy as np
import pickle

from utils.Param import get_default_param
from utils.eval import fpr, retrieval_recall_K

from network.model_fn import triplet_model_fn
from network.dataset.sem_patchdata import input_fn
from network.train import TripletEstimator

  from ._conv import register_converters as _register_converters


In [2]:
# set seed for reproduction
np.random.seed(2019)
tf.set_random_seed(2019)

In [3]:
# parameters (adjust as needed)
log_dir = './log/campus'
param = get_default_param(mode='AUSTIN', log_dir=log_dir)

In [4]:
param.data_dir = './Data/austin'
param.train_datasets = 'campus_patch'
param.test_datasets = 'scene_patch'
param.batch_size = 128
param.n_epoch = 100
param.train_log_every = 1000

### data pipeline

In [5]:
tf.logging.info("Preparing data pipeline ...")
with tf.device('/cpu:0'), tf.name_scope('input'):
    train_dataset, train_data_sampler = input_fn(
        data_dir=param.data_dir,
        base_patch_size=param.base_patch_size,
        patches_per_row=param.patches_per_row,
        patches_per_col=param.patches_per_col,
        batch_size=param.batch_size,
        patch_size=param.patch_size,
        n_channels=param.n_channels
    )
    test_dataset, test_data_sampler = input_fn(
        data_dir=param.data_dir,
        base_patch_size=param.base_patch_size,
        patches_per_row=param.patches_per_row,
        patches_per_col=param.patches_per_col,
        batch_size=param.batch_size,
        patch_size=param.patch_size,
        n_channels=param.n_channels
    )
    data_iterator = tf.data.Iterator.from_structure(
        train_dataset.output_types,
        train_dataset.output_shapes
    )
    train_dataset_init = data_iterator.make_initializer(train_dataset)
    test_dataset_init = data_iterator.make_initializer(test_dataset)
    batch_data = data_iterator.get_next()

INFO:tensorflow:Preparing data pipeline ...


### load data

In [6]:
train_data_sampler.load_dataset(
    dir_name=param.train_datasets,
    ext='bmp',
    patch_size=param.patch_size,
    n_channels=param.n_channels,
    debug=True
)

Loading dataset campus_patch: 100%|██████████| 4500/4500 [00:33<00:00, 135.96it/s]
Load triplet samples: 1000000it [00:01, 540000.25it/s]
Load matched pairs: 100000it [00:00, 411713.25it/s]
Load retrieval set: 27000it [00:00, 706067.76it/s]

-- Dataset loaded   : campus_patch
-- # patches        : 351000
-- # triplet samples: 1000000
-- # matched pairs  : 50000
-- # retrieval set  : 27000





In [7]:
test_data_sampler.load_dataset(
    dir_name=param.test_datasets,
    ext='bmp',
    patch_size=param.patch_size,
    n_channels=param.n_channels,
    debug=True
)

Loading dataset scene_patch: 100%|██████████| 2550/2550 [00:18<00:00, 139.32it/s]
Load triplet samples: 1000000it [00:01, 651593.59it/s]
Load matched pairs: 100000it [00:00, 465318.74it/s]
Load retrieval set: 15300it [00:00, 730149.63it/s]

-- Dataset loaded   : scene_patch
-- # patches        : 198900
-- # triplet samples: 1000000
-- # matched pairs  : 50000
-- # retrieval set  : 15300





### compute data statistics

In [8]:
tf.logging.info('Loading training stats: %s' % param.train_datasets)
try:
    file = open('./data/stats_%s.pkl' % param.train_datasets, 'r')
    mean, std = pickle.load(file)
except:
    mean, std = train_data_sampler.generate_stats()
    pickle.dump([mean, std], open('./data/stats_%s.pkl' % param.train_datasets, 'wb'))
tf.logging.info('Mean: {:.5f}'.format(mean))
tf.logging.info('Std : {:.5f}'.format(std))
train_data_sampler.normalize_data(mean, std)
test_data_sampler.normalize_data(mean, std)

INFO:tensorflow:Loading training stats: campus_patch
INFO:tensorflow:Mean: 0.36552
INFO:tensorflow:Std : 0.23587


Normalizing data: 100%|██████████| 351000/351000 [00:10<00:00, 33738.77it/s]
Normalizing data: 100%|██████████| 198900/198900 [00:06<00:00, 32880.80it/s]


### build model

In [9]:
tf.logging.info("Creating the model ...")
anchors, positives, negatives = batch_data
spec = triplet_model_fn(
    anchors, positives, negatives, n_feats=param.n_features,
    mode='TRAIN', cnn_name=param.cnn_name, loss_name=param.loss_name,
    optimizer_name=param.optimizer_name,
    margin=param.margin,
    use_regularization_loss=param.use_regularization,
    learning_rate=param.learning_rate,
    shared_batch_layers=True,
    name='triplet-net'
)
estimator = TripletEstimator(spec, save_dir=param.log_dir)

INFO:tensorflow:Creating the model ...
INFO:tensorflow:CNN: ftfy
INFO:tensorflow:LOSS: spreadout
INFO:tensorflow:OPTIMIZER: Momentum


### Training

In [10]:
tf.logging.info('='*80)
tf.logging.info('Start training ...')
tf.logging.info('='*80)
for epoch in range(param.n_epoch):
    tf.logging.info('-'*80)
    tf.logging.info('TRAIN {:d}, {:s} start ...'.format(epoch, param.train_datasets))
    train_data_sampler.set_mode(0)
    train_data_sampler.set_n_triplet_samples(10000) # for debugging
    loss = estimator.train(
        dataset_initializer=train_dataset_init,
        log_every=param.train_log_every
    )
    tf.logging.info('-'*80)

    # for evaluation with training dataset
    tf.logging.info('-'*80)
    tf.logging.info('TEST {:d}, {:s} start ...'.format(epoch, param.train_datasets))
    train_data_sampler.set_mode(1)
    train_data_sampler.set_n_matched_pairs(1000)
    test_match = estimator.run_match(train_dataset_init)
    fpr95 = fpr(test_match.labels, test_match.scores, recall_rate=0.95)
    tf.logging.info('FPR95: {:.5f}'.format(fpr95))
    
    train_data_sampler.set_mode(2)
    test_retrieval = estimator.run_retrieval(train_dataset_init)
    rrr = retrieval_recall_K(
        features=test_retrieval.features,
        labels=train_data_sampler.get_labels(test_retrieval.index),
        is_query=test_retrieval.scores,
        K=[1, 5, 10, 20, 30, 40, 50]
    )
    tf.logging.info('Retrieval: {}'.format(rrr))
    tf.logging.info('-'*80)
    
    break

INFO:tensorflow:Start training ...
INFO:tensorflow:--------------------------------------------------------------------------------
INFO:tensorflow:TRAIN 0, campus_patch start ...


UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node triplet-net_1/conv1/Conv2D (defined at /ftfy-official/models/network/model/ftfy_cnn.py:40)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/triplet-net_1/conv1/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, triplet-net/conv1/kernel/read)]]
	 [[{{node triplet-loss/add_4/_89}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_7685_triplet-loss/add_4", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'triplet-net_1/conv1/Conv2D', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 832, in start
    self._run_callback(self._callbacks.popleft())
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 605, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-190cc457dcb0>", line 11, in <module>
    name='triplet-net'
  File "/ftfy-official/models/network/model_fn.py", line 81, in triplet_model_fn
    bn_prefix="" if shared_batch_layers else "p_")
  File "/ftfy-official/models/network/model/base.py", line 21, in __call__
    return self.call(images, is_training, **kwargs)
  File "/ftfy-official/models/network/model/ftfy_cnn.py", line 40, in call
    net = tf.layers.conv2d(images,  32, 3, 1, name='conv1', **conv_args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/convolutional.py", line 417, in conv2d
    return layer.apply(inputs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 817, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/layers/base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/engine/base_layer.py", line 757, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/layers/convolutional.py", line 194, in call
    outputs = self._convolution_op(inputs, self.kernel)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py", line 868, in __call__
    return self.conv_op(inp, filter)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py", line 520, in __call__
    return self.call(inp, filter)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/nn_ops.py", line 204, in __call__
    name=self.name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 957, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

UnknownError (see above for traceback): Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node triplet-net_1/conv1/Conv2D (defined at /ftfy-official/models/network/model/ftfy_cnn.py:40)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/triplet-net_1/conv1/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, triplet-net/conv1/kernel/read)]]
	 [[{{node triplet-loss/add_4/_89}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_7685_triplet-loss/add_4", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
