In [1]:
import sys
sys.path.append('../../NeuralTuringMachine')


import tensorflow as tf
from ntm import NTMCell


if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Default GPU Device:/device:GPU:0


In [2]:
def conv_to_fc(input_tensor):
    n_hidden = tf.reduce_prod([v.value for v in input_tensor.get_shape()[1:]])
    input_tensor = tf.reshape(input_tensor, [-1, n_hidden])
    return input_tensor

def linear(input_tensor, num_hidden, scope):
    n_input = input_tensor.get_shape()[1].value
    with tf.variable_scope(scope):
        weight = tf.get_variable("w", [n_input, num_hidden], initializer=tf.initializers.orthogonal())
        bias = tf.get_variable("b", [num_hidden], initializer=tf.constant_initializer(0.0))
        return tf.matmul(input_tensor, weight) + bias
    
def linear2d(input_tensor, num_hidden, scope):
    b, h, w = input_tensor.shape
    with tf.variable_scope(scope):
        tensors = []
        for i in range(h):
            weight = tf.get_variable("w"+str(i), [w, num_hidden], initializer=tf.initializers.orthogonal())
            bias = tf.get_variable("b"+str(i), [num_hidden], initializer=tf.constant_initializer(0.0))
            tensors.append(tf.matmul(input_tensor[:,i,:], weight) + bias)
        return tf.stack(tensors, axis=1)
    
    
def conv2d(input_tensor, num_filters, filter_size, stride, scope,  **kwargs):
    num_channels = input_tensor.get_shape()[-1].value
    filter_height = filter_width = filter_size
    wshape = [filter_height, filter_width, num_channels, num_filters]
    bshape = [1, 1, 1, num_filters]
    strides = [1, stride, stride, 1]
    with tf.variable_scope(scope):
        weights = tf.get_variable("w", wshape, initializer=tf.initializers.orthogonal())
        bias = tf.get_variable("b", bshape, initializer=tf.constant_initializer(0.0))
        return bias + tf.nn.conv2d(input_tensor, weights, strides=strides, padding='VALID', data_format='NHWC',  **kwargs)

def softmax_2d(tensor):
    b, h, w, c = tensor.shape
    tensor = tf.reshape(tensor, (-1, h * w, c))
    tensor = tf.nn.softmax(tensor, axis=1)
    tensor = tf.reshape(tensor, (-1, h, w, c))
    return tensor
    
def attention_block(tensor, g, scope):
    b, h, w, f = tensor.shape
    ls = tf.reshape(tensor, (-1, h*w, f))
    print("ls",ls.get_shape())
    g_size = g.get_shape()[-1].value
    print("g", g.get_shape())
    
    with tf.variable_scope(scope):
        lsat = linear2d(ls, num_hidden=g_size, scope='lsat') # (-1, h*w, g_size)
        lsat = tf.nn.relu(lsat)
        print("lsat", lsat.get_shape())
        ### TODO is including also the batch dimension correct? ###
        g_tiled = tf.tile(tf.reshape(g, (-1, 1, g_size)), [1, h*w, 1])
        compatibility = tf.reduce_sum(tf.multiply(lsat, g_tiled), axis=-1, keepdims=True) #tf.tensordot(lsat, g_tiled, axes=((-1), (-1))) # (-1, h*w, 1)
#         compatibility = tf.reshape(compatibility, shape=[-1, h*w, 1]) # (-1, h*w)
        print("compatibility", compatibility.get_shape())
        attention = tf.nn.softmax(compatibility, axis=1) # (-1, h*w)
    #     attention = tf.tile(tf.reshape(attention, shape=(-1, h*w, 1)), [1, 1, f]) # (-1, h*w, f)
        attention_tiled = tf.tile(attention, [1, 1, f]) # (-1, h*w, f)
        print("attention", attention_tiled.get_shape())
        weighted_ls = attention_tiled * ls
        return weighted_ls, attention
    
def memory_module(inputs):
#     inputs_placeholder = tf.placeholder(tf.float32, shape=(args.batch_size, None, args.num_bits_per_vector+1))
#     outputs_placeholder = tf.placeholder(tf.float32, shape=(args.batch_size, None, args.num_bits_per_vector))
    _, l = inputs.get_shape()
    inputs = tf.reshape(inputs, (-1, 1, l))
    print("inputs", inputs.get_shape())
    cell = NTMCell(controller_layers=2, controller_units=100, memory_size=128, memory_vector_dim=12, 
                   read_head_num=1, write_head_num=1, addressing_mode='content_and_location', 
                   shift_range=1, reuse=False, output_dim=None, clip_value=20, init_mode='constant')
    
    output_sequence, _ = tf.nn.dynamic_rnn(cell=cell, inputs=inputs, time_major=False,
                                           dtype=tf.float32, initial_state=None)
    
    print("output_sequence", output_sequence.get_shape())
    outputs = tf.sigmoid(output_sequence)
    
    return outputs


def cnn(X, num_classes, **kwargs):
    batch_size = X.shape[0].value
    scaled_images = tf.reshape(X, shape=[-1, 28, 28, 1])
    
    conv1 = conv2d(input_tensor=scaled_images, num_filters=32, filter_size=2, stride=2, scope="conv1")
    conv1 = tf.nn.elu(conv1)
#     conv1 = tf.nn.l2_normalize(conv1, axis=-1)
    conv2 = conv2d(input_tensor=conv1, num_filters=64, filter_size=2, stride=2, scope="conv2")
    conv2 = tf.nn.elu(conv2)
#     conv2 = tf.nn.l2_normalize(conv2, axis=-1)
    
    g = tf.nn.relu(linear(conv_to_fc(conv2), num_hidden=256, scope="ln1"))
    

    lst = memory_module(g)
#     g1, attn1_layer = attention_block(conv1, g, 'attn1') 
#     g1 = conv_to_fc(g1)
#     g2, attn2_layer = attention_block(conv2, g, 'attn2') 
#     g2 = conv_to_fc(g2)
    
#     last = tf.concat((g1, g2), 1)
    
    ln1 = linear(last, num_hidden=num_classes, scope="lnlast")
    
    return tf.nn.relu(ln1)#, attn1_layer, attn2_layer


if 'sess' in locals():
    sess.close()

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Training Parameters
learning_rate = 0.0001
num_steps = 1000
batch_size = 128
display_step = 10

# Network Parameters
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)

logits, attn1_layer, attn2_layer = cnn(X, num_classes)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
sess = tf.Session()

# Run the initializer
sess.run(init)

for step in range(1, num_steps+1):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    # Run optimization op (backprop)
    sess.run(train_op, feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.8})
    if step % display_step == 0 or step == 1:
        # Calculate batch loss and accuracy
        loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                             Y: batch_y,
                                                             keep_prob: 1.0})
        print("Step " + str(step) + ", Minibatch Loss= " + \
              "{:.4f}".format(loss) + ", Training Accuracy= " + \
              "{:.3f}".format(acc))

print("Optimization Finished!")

# Calculate accuracy for 256 MNIST test images
print("Testing Accuracy:", \
    sess.run(accuracy, feed_dict={X: mnist.test.images[:256],
                                  Y: mnist.test.labels[:256],
                                  keep_prob: 1.0}))

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Colocations handled automatically by placer.
inputs (?, 1, 256)
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCell

TypeError: 'Tensor' object cannot be interpreted as an integer

In [None]:
import numpy as np

test_img = mnist.test.images[270:300]
test_y = mnist.test.labels[270:300]

res_op = tf.argmax(prediction, 1)

    
result = sess.run(res_op, feed_dict={X: np.reshape(test_img, (test_img.shape[0], -1))})
print(result, "==\n", np.argmax(test_y, 1), "?")

In [None]:
### net visualization
import matplotlib as mp
%matplotlib inline
import matplotlib.pyplot as plt
import math


extent = 0, 28, 0, 28

def plotNNFilter(units):
    filters = units.shape[3]
    plt.figure(1, figsize=(10,10))
    n_columns = 6
    n_rows = math.ceil(filters / n_columns) + 1
    for i in range(filters):
        plt.subplot(n_rows, n_columns, i+1)
        plt.title('Filter ' + str(i))
        plt.imshow(units[0,:,:,i], interpolation="nearest")
        
def plotAttention(units):
    b, px, rest = units.shape
    print(np.amax(units), np.amin(units))
#     plt.figure(1, figsize=(20,20))
    h = w = int(math.sqrt(px))
    print(h, w)
    units = np.reshape(units, (h, w))
    plt.imshow(units, interpolation="nearest", cmap="viridis", alpha=0.5, extent=extent)

def getActivations(layer,stimuli):
    units = sess.run(layer,feed_dict={X:np.reshape(stimuli,[1,784],order='F'),keep_prob:1.0})
    plotAttention(units)

    
fig = plt.figure(frameon=True)

imageToUse = mnist.test.images[np.random.randint(low=0, high=len(mnist.test.images))]
plt.imshow(np.reshape(imageToUse,[28,28]), interpolation="nearest", cmap="gray", extent=extent)




getActivations(attn1_layer, imageToUse)

plt.show()


In [None]:
dir(tf
   )
