# TensorTools Runtime Demo

Demonstrates the usage of the runtime using a simple autoencoder model.

In [None]:
# Force matplotlib to use inline rendering
%matplotlib inline

import os
import sys

# add path to libraries for ipython
sys.path.append(os.path.expanduser("~/libs"))

import numpy as np
import tensorflow as tf
import tensortools as tt

In [None]:
TRAIN_DIR = "/work/sauterme/train-examples/runtime-demo"

BATCH_SIZE = 24
REG_LAMBDA = 5e-4
INITIAL_LR = 0.001
LR_DECAY_STEP_INTERVAL = 10000
LR_DECAY_RATE = 0.5
NUM_GPUS = 2

In [None]:
DATA_ROOT = "/work/sauterme/data"
dataset_train = tt.datasets.mnist.MNISTTrainDataset(DATA_ROOT)
dataset_valid = tt.datasets.mnist.MNISTValidDataset(DATA_ROOT)
dataset_test = tt.datasets.mnist.MNISTTestDataset(DATA_ROOT)

In [None]:
class SimpleAutoencoderModel(tt.model.AbstractModel):    
    def __init__(self, reg_lambda=0.0):
        super(SimpleAutoencoderModel, self).__init__(reg_lambda)
        
    @tt.utils.attr.override
    def inference(self, inputs, targets, feeds,
                  is_training, device_scope, memory_device):
        x = tf.contrib.layers.flatten(inputs)
        encoded = tt.network.fc("FC_Enc", x, 64,
                                weight_init=tf.contrib.layers.xavier_initializer(),
                                bias_init=0.0,
                                regularizer=tf.contrib.layers.regularizers.l2_regularizer(self.reg_lambda),
                                activation=tf.nn.relu,
                                device=memory_device)
        representation = encoded
        decoded = tt.network.fc("FC_Dec", representation, x.get_shape()[1],
                                weight_init=tf.contrib.layers.xavier_initializer(),
                                bias_init=0.0,
                                regularizer=tf.contrib.layers.regularizers.l2_regularizer(self.reg_lambda),
                                activation=tf.nn.sigmoid,
                                device=memory_device)

        return tf.reshape(decoded, [-1] + targets.get_shape().as_list()[1:])
    
    @tt.utils.attr.override
    def loss(self, predictions, targets, device_scope):
        loss1 = tt.loss.mse(predictions, targets)
        loss2 = tt.loss.bce(predictions, targets)
        
        tf.add_to_collection(tt.core.LOG_LOSSES, loss1)
        tf.add_to_collection(tt.core.LOG_LOSSES, loss2)
        
        return tf.add(0.5 * loss1, 0.5 * loss2, name="25mse_75bce")
    
    @tt.utils.attr.override
    def evaluation(self, predictions, targets, device_scope=None):
        psnr = tt.image.psnr(predictions, targets)
        sharpdiff = tt.image.sharp_diff(predictions, targets)
        ssim = tt.image.ssim(predictions, targets, L=1.0)
        
        return {"psnr": psnr, "sharpdiff": sharpdiff, "ssim": ssim}

In [None]:
class SimpleFullyConvolutionalAutoencoderModel(tt.model.AbstractModel):    
    def __init__(self, reg_lambda=0.0):
        super(SimpleFullyConvolutionalAutoencoderModel, self).__init__(reg_lambda)
        
    @tt.utils.attr.override
    def inference(self, inputs, targets, feeds,
                  is_training, device_scope, memory_device):
        
        with tf.variable_scope("Encoder"):
            # 1: Conv
            conv1 = tt.network.conv2d("Conv1", inputs,
                                      8, (5, 5), (2, 2),
                                      weight_init=tf.contrib.layers.xavier_initializer_conv2d(),
                                      bias_init=0.01,
                                      regularizer=tf.contrib.layers.l2_regularizer(self.reg_lambda),
                                      activation=tf.nn.relu)
            
            # test-summary for the fist conv-layer:
            tt.board.activation_summary(conv1, True, scope="Conv1")
            tt.board.conv_image_summary("conv1_out", conv1)
    
            with tf.variable_scope("Conv1", reuse=True):
                # hack to access the kernel-weights
                kernel = tf.get_variable("W")
                tt.board.conv_filter_image_summary("conv1_filters", kernel)
            
            # 2: Conv
            conv2 = tt.network.conv2d("Conv2", conv1,
                                      16, (3, 3), (2, 2),
                                      weight_init=tf.contrib.layers.xavier_initializer_conv2d(),
                                      bias_init=0.01,
                                      regularizer=tf.contrib.layers.l2_regularizer(self.reg_lambda),
                                      activation=tf.nn.relu)
            encoder_out = conv2

        with tf.variable_scope("Decoder"):
            # 3: Deconv
            conv3t = tt.network.conv2d_transpose("Deconv1", encoder_out,
                                                 8, (3, 3), (2, 2),
                                                 weight_init=tt.init.bilinear_initializer(),
                                                 bias_init=0.01,
                                                 regularizer=tf.contrib.layers.l2_regularizer(self.reg_lambda),
                                                 activation=tf.nn.relu)
            # 4: Deconv
            conv4t = tt.network.conv2d_transpose("Deconv2", conv3t,
                                                 1, (5, 5), (2, 2),
                                                 weight_init=tt.init.bilinear_initializer(), 
                                                 bias_init=0.01,
                                                 regularizer=tf.contrib.layers.l2_regularizer(self.reg_lambda),
                                                 activation=tf.nn.sigmoid)
            decoder_out = conv4t
            
        return decoder_out
    
    @tt.utils.attr.override
    def loss(self, predictions, targets, device_scope):
        return tt.loss.bce(predictions, targets) + tt.loss.mgdl(predictions, targets)
    
    @tt.utils.attr.override
    def evaluation(self, predictions, targets, device_scope=None):
        psnr = tt.image.psnr(predictions, targets)
        sharpdiff = tt.image.sharp_diff(predictions, targets)
        ssim = tt.image.ssim(predictions, targets, L=1.0)
        
        return {"psnr": psnr, "sharpdiff": sharpdiff, "ssim": ssim}

In [None]:
tt.hardware.set_cuda_devices([2])
runtime = tt.core.DefaultRuntime(train_dir=TRAIN_DIR)
#runtime = tt.core.MultiGpuRuntime(NUM_GPUS, train_dir=TRAIN_DIR)
#runtime.register_model(SimpleAutoencoderModel(reg_lambda=REG_LAMBDA))
runtime.register_model(SimpleFullyConvolutionalAutoencoderModel(reg_lambda=REG_LAMBDA))
optimizer = tt.training.Optimizer('adam', INITIAL_LR,
                                  LR_DECAY_STEP_INTERVAL, LR_DECAY_RATE)
runtime.register_optimizer(optimizer)

In [None]:
runtime.register_datasets(dataset_train, dataset_valid, dataset_test)
runtime.build(is_autoencoder=True)

print("Global step: {}".format(runtime.gstep))

In [None]:
def on_valid(rt, gstep):
    print ("On-Validate Hook...")

In [None]:
runtime.train(BATCH_SIZE, steps=1000, on_validate=on_valid,
              display_steps=25, do_summary=False, do_checkpoints=True)

In [None]:
runtime.validate(batch_size=50)

In [None]:
runtime.test(batch_size=50)

## Visualization / Prediciton

In [None]:
def show(inputs, predictions):
    print("Inputs-Range : [{}, {}]".format(inputs.min(), inputs.max()))
    print("Targets-Range: [{}, {}]".format(predictions.min(), predictions.max()))

    tt.visualization.display_batch(inputs, title="Inputs")
    tt.visualization.display_batch(predictions, title="Predictions")

#### Use random input of same size as in training:

In [None]:
image_shape = dataset_train.input_shape
fake_inputs = np.random.rand(4,image_shape[-3],image_shape[-2],image_shape[-1])

predictions = runtime.predict(fake_inputs)

show(fake_inputs, predictions)

In [None]:
inputs, _ = dataset_train.get_batch(4)

predictions = runtime.predict(inputs)

show(inputs, predictions)

#### Use random image bigger than in training:

In [None]:
SIZE_FACTOR = 2.0
image_shape = dataset_train.input_shape
changed_height = int(image_shape[-3] * SIZE_FACTOR)
changed_width = int(image_shape[-2] * SIZE_FACTOR)
channels = image_shape[-1]
print("Changed-Shape: [{}, {}, {}]".format(changed_height, changed_width, channels))

In [None]:
runtime.unregister_datasets()
runtime.build(is_autoencoder=True, track_ema_variables=False, restore_ema_variables=True,
              input_shape=[changed_height, changed_width, channels])

In [None]:
fake_inputs = np.random.rand(4, changed_height, changed_width, channels)
    
predictions = runtime.predict(fake_inputs)

show(fake_inputs, predictions)

In [None]:
inputs, _ = dataset_train.get_batch(4)
# pad to have full shape
inputs = tt.utils.image.pad_or_crop(inputs, [changed_height, changed_width, channels])

print(inputs.shape)

predictions = runtime.predict(inputs)

show(inputs, predictions)

In [None]:
inputs, _ = dataset_train.get_batch(1)
# pad to have full shape
scaled = tt.utils.image.resize(inputs[0], scale=SIZE_FACTOR)
scaled = np.expand_dims(scaled, 0)

print(scaled.shape)

predictions = runtime.predict(scaled)

show(scaled, predictions)

In [None]:
runtime.close()