In [2]:
import tensorflow as tf
import tensorflow.keras as K
from tensorflow.keras import Model, Sequential
from tensorflow.keras.losses import Loss
from tensorflow.keras.layers import Layer, Conv2D, Input, Conv2DTranspose, MaxPooling2D, Flatten, Dense
import numpy as np
import matplotlib.pyplot as plt

In [16]:
def getwhere(x):
    ''' Calculate the 'where' mask that contains switches indicating which
    index contained the max value when MaxPool2D was applied.  Using the
    gradient of the sum is a nice trick to keep everything high level.'''
    y_prepool, y_postpool = x
    with tf.GradientTape() as tape:
        y = tf.reduce_sum(y_postpool), y_prepool
    return y

In [17]:
def convresblock(x, nfeats=8, ksize=3, deconv=False):
    if not deconv:
        y = Conv2DTranspose(nfeats, [ksize, ksize], padding='SAME', activation='elu')(x)               
    else:
        y = Conv2DTranspose(nfeats, [ksize, ksize], padding='SAME', activation='elu')(x)               
    y = tf.nn.elu(y)
    return y

In [19]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.astype("float32") / 255
y_train = y_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
y_test = y_test.astype("float32") / 255

# The size of the kernel used for the MaxPooling2D
pool_size = 2
# The total number of feature maps at each layer
nfeats = [8, 16, 32, 64, 128]
# The sizes of the pooling kernel at each layer
pool_sizes = np.array([1, 1, 1, 1, 1]) * pool_size
# The convolution kernel size
ksize = 3
# Number of epochs to train for
epochs = 10
# Batch size during training
batch_size = 128

if pool_size == 2:
    # if using a 5 layer net of pool_size = 2
#     x_train = tf.pad(x_train, [[0, 0], [2, 2], [2, 2], [0, 0]],
#                      mode='constant')
#     x_test = tf.pad(x_test, [[0, 0], [2, 2], [2, 2], [0, 0]], mode='constant')
    nlayers = 5
elif pool_size == 3:
    # if using a 3 layer net of pool_size = 3
#     x_train = x_train[:, :, :-1, :-1]
#     x_test = x_test[:, :, :-1, :-1]
    nlayers = 3
else:
    print('Script supports pool_size of 2 and 3.')

In [20]:
# Shape of input to train on (note that model is fully convolutional however)
input_shape = (None, x_train.shape[1], x_train.shape[2])
# The final list of the size of axis=1 for all layers, including input
nfeats_all = [input_shape[-1]] + nfeats

# First build the encoder, all the while keeping track of the 'where' masks

# We push the 'where' masks to the following list
wheres = [None] * nlayers
poolingOutputs = [None] * nlayers
y = Input(shape=input_shape)
for i in range(nlayers):
    y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize)
    poolingOutputs[i] = MaxPooling2D([pool_sizes[i], pool_sizes[i]], [pool_sizes[i], pool_sizes[i]])(y_prepool)
    wheres[i] = getwhere([y_prepool, poolingOutputs[i]])
    y = poolingOutputs[i]

ValueError: Exception encountered when calling layer "max_pooling2d_5" (type MaxPooling2D).

Negative dimension size caused by subtracting 2 from 1 for '{{node max_pooling2d_5/MaxPool}} = MaxPool[T=DT_FLOAT, data_format="NHWC", explicit_paddings=[], ksize=[1, 2, 2, 1], padding="VALID", strides=[1, 2, 2, 1]](Placeholder)' with input shapes: [?,?,1,128].

Call arguments received by layer "max_pooling2d_5" (type MaxPooling2D):
  • inputs=tf.Tensor(shape=(None, None, 1, 128), dtype=float32)

In [None]:
# Now build the decoder, and use the stored 'where' masks to place the features
unpoolingOutputs = [None] * nlayers
for i in range(nlayers):
    ind = nlayers - 1 - i    
    in_shape = y.get_shape().as_list()
    out_shape = [in_shape[1]*pool_sizes[ind], in_shape[2]*pool_sizes[ind]]
    y = tf.image.resize(y, out_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)   
    y = tf.math.multiply(y, wheres[ind][0])
    unpoolingOutputs[ind] = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize, deconv=True)
    y = unpoolingOutputs[ind]
outputs=y

In [None]:
# Define the model and it's mean square error loss, and compile it with Adam
def loss(img_input, y, poolingOutputs, unpoolingOutputs)
    l2m0 = tf.nn.l2_loss(tf.keras.layers.Flatten(unpoolingOutputs[1]) - tf.keras.layers.Flatten(poolingOutputs[0]))
    l2m1 = tf.nn.l2_loss(tf.keras.layers.Flatten(unpoolingOutputs[2]) - tf.keras.layers.Flatten(poolingOutputs[1]))
    l2m2 = tf.nn.l2_loss(tf.keras.layers.Flatten(unpoolingOutputs[3]) - tf.keras.layers.Flatten(poolingOutputs[2]))
    l2m3 = tf.nn.l2_loss(tf.keras.layers.Flatten(unpoolingOutputs[4]) - tf.keras.layers.Flatten(poolingOutputs[3]))
    loss_l2 = tf.nn.l2_loss(tf.keras.layers.Flatten(img_input) -  tf.keras.layers.Flatten(y))
    return loss_l2 + l2m0 + l2m1 + l2m2 + l2m3

In [None]:
model = Model(inputs=y, outputs=outputs)