In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

# Load training and eval data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

  from ._conv import register_converters as _register_converters


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
# import matplotlib.pyplot as plt
# %matplotlib inline
# import matplotlib.cm as cm
# fig = plt.figure(figsize=(20,20))
# for i in range(6):
#     ax = fig.add_subplot(1, 6, i+1, xticks=[], yticks=[])
#     ax.imshow(train_data[i].reshape(28,-1), cmap='gray')
#     ax.set_title(str(train_labels[i]))

In [3]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def extend_tensor(tensor1):
    tfmax = tf.reshape(tf.reduce_max(tensor1,axis=1),(-1,1))
    tfmin = tf.reshape(tf.reduce_max(tensor1,axis=1),(-1,1))
    return tf.concat([tensor1, tfmax,tfmin], axis = 1)

def onehot(x,classes = None):
    if classes is None:
        classes = len(np.unique(x))
    onehotencoded = np.zeros((x.shape[0],classes))
    onehotencoded[np.arange(x.shape[0]), x] = 1
    return onehotencoded
# train_labels_onehot = onehot(train_labels)

In [4]:
input_images = tf.placeholder(tf.float32, [None, 784], name='input_images')
input_labels = tf.placeholder(tf.float32, [None, 10], name = 'input_labels')

In [5]:
def basicnet(input_images):
    W = tf.Variable(tf.zeros([784,10]))
    b = tf.Variable(tf.zeros([10]))
    y = tf.nn.softmax(tf.matmul(input_images,W)+b)
    return y

def create_mlp_net(input_images=input_images, reuse=False, istraining=True):
    with tf.variable_scope('mlp', reuse = reuse):
        l1 = tf.layers.dense(input_images, 512, activation=tf.nn.relu)
        print('First dense layer shape:',l1.shape)
#         ld1 = tf.layers.dropout(l1,rate=.2, training = istraining)
        l2 = tf.layers.dense(l1, 512, activation=tf.nn.relu)
        print('Second dense layer shape:',l2.shape)
#         ld2 = tf.layers.dropout(l2,rate=.2, training = istraining)
        y = tf.layers.dense(l2, 10, activation=tf.nn.softmax)
        print('Output shape:',y.shape)
    return y

def manual_create_mlp_net(input_images=input_images, reuse=False, istraining=True):
    with tf.variable_scope('mlp', reuse = reuse):
        W1 = tf.get_variable('w1',shape=[784,512])
        b1 = tf.Variable(tf.zeros([512]))
        l1 = tf.nn.relu(tf.matmul(input_images,W1) + b1)
        print('First dense layer shape:',l1.shape)
        W2 = tf.get_variable('w2',shape=[512,256])
        b2 = tf.Variable(tf.zeros([256]))
        l2 = tf.nn.relu(tf.matmul(l1,W2) + b2)
        print('Second dense layer shape:',l2.shape)
        W3 = tf.get_variable('w3',shape=[256,10])
        b3 = tf.Variable(tf.zeros([10]))
        y = tf.nn.softmax(tf.matmul(l2,W3) + b3)
        print('Output shape:',y.shape)
    return y

def create_complex_functional_net(input_images=input_images, reuse=False, istraining=True):
    with tf.variable_scope('complex_mlp', reuse = reuse):
        W1 = tf.get_variable('w1',shape=[786,510])
        b1 = tf.Variable(tf.zeros([510]))
        l1 = tf.nn.relu(tf.matmul(extend_tensor(input_images),W1) + b1)
        print(l1.shape)
        W2 = tf.get_variable('w2',shape=[512,255])
        b2 = tf.Variable(tf.zeros([255]))
        l2 = tf.nn.relu(tf.matmul(extend_tensor(l1),W2) + b2)
        print(l2.shape)
        W3 = tf.get_variable('w3',shape=[257,10])
        b3 = tf.Variable(tf.zeros([10]))
        y = tf.nn.softmax(tf.matmul(extend_tensor(l2),W3) + b3)
        print(y.shape)
    return y

In [6]:
#y_train = create_mlp_net(input_images,reuse=False,istraining=False)
y_regular = manual_create_mlp_net(input_images,reuse=tf.AUTO_REUSE,istraining=False)
#y_train = basicnet(input_images)
y_complex = create_complex_functional_net(input_images,reuse=tf.AUTO_REUSE,istraining=False)
#y_predict = create_mlp_net(reuse=True,istraining=False)

First dense layer shape: (?, 512)
Second dense layer shape: (?, 256)
Output shape: (?, 10)
(?, 510)
(?, 255)
(?, 10)


In [7]:
#cross_entropy = tf.reduce_mean(-tf.reduce_sum(input_labels * tf.log(y_train), axis=[1]))
cross_entropy_regular = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = input_labels,logits = y_regular)) #Same as above but numerically stable
cross_entropy_complex = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = input_labels,logits = y_complex)) #Same as above but numerically stable
#train_step = tf.train.GradientDescentOptimizer(0.3).minimize(cross_entropy)
train_step_regular = tf.train.RMSPropOptimizer(0.001).minimize(cross_entropy_regular) #Better optimizer than above
train_step_complex = tf.train.RMSPropOptimizer(0.001).minimize(cross_entropy_complex) #Better optimizer than above

In [10]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for _ in range(10000):
        batch_xs, batch_ys = mnist.train.next_batch(1000)
        sess.run(train_step_regular, feed_dict={input_images: batch_xs, input_labels: batch_ys})
        sess.run(train_step_complex, feed_dict={input_images: batch_xs, input_labels: batch_ys})
    correct_prediction_regular = tf.equal(tf.argmax(y_regular,1), tf.argmax(input_labels,1))
    correct_prediction_complex = tf.equal(tf.argmax(y_complex,1), tf.argmax(input_labels,1))
    accuracy_regular = tf.reduce_mean(tf.cast(correct_prediction_regular, tf.float32))
    accuracy_complex = tf.reduce_mean(tf.cast(correct_prediction_complex, tf.float32))
    print('Regular:', sess.run(accuracy_regular, feed_dict={input_images: mnist.test.images, input_labels: mnist.test.labels}))
    print('Complex:', sess.run(accuracy_complex, feed_dict={input_images: mnist.test.images, input_labels: mnist.test.labels}))

Regular: 0.9829
Complex: 0.9833


In [5]:
def create_complex_functional_net(input_images=input_images, reuse=False, istraining=True):
    with tf.variable_scope('mlp', reuse = reuse):
        W1 = tf.get_variable('w1',shape=[786,510])
        b1 = tf.get_variable('b1',shape=[510])
        l1 = tf.nn.relu(tf.matmul(extend_tensor(input_images),W1) + b1)
        print(l1.shape)
        W2 = tf.get_variable('w2',shape=[512,510])
        b2 = tf.get_variable('b2',shape=[510])
        l2 = tf.nn.relu(tf.matmul(extend_tensor(l1),W2) + b2)
        print(l2.shape)
        W3 = tf.get_variable('w3',shape=[512,10])
        b3 = tf.get_variable('b3',shape=[10])
        y = tf.nn.softmax(tf.matmul(extend_tensor(l2),W3) + b3)
        print(y.shape)
    return y

In [52]:
sess = tf.InteractiveSession()
tf0 = tf.constant(train_data[0:3])
tfn = extend_tensor(tf0)
print(tf0.shape,tfn.shape)
sess.close()

(3, 784) (3, 786)


In [27]:
sess = tf.InteractiveSession()
x = tf.constant([[1, 4],[4,5]])
y = tf.constant([[3], [6]])
print(x.shape, y.shape)
b = tf.concat([x, y], axis=1)  # [[1, 2, 3], [4, 5, 6]]
print(b.shape, b.eval())
sess.close()

(2, 2) (2, 1)
(2, 3) [[1 4 3]
 [4 5 6]]


In [5]:
def manual_create_mlp_net(input_images=input_images, reuse=False, istraining=False):
    with tf.variable_scope('mlp', reuse = reuse):
        W1 = tf.get_variable('w1',shape=[784,512])
        b1 = tf.get_variable('b1',shape=[512])
        l1 = tf.nn.relu(tf.matmul(input_images,W1) + b1)
        W2 = tf.get_variable('w2',shape=[512,512])
        b2 = tf.get_variable('b2',shape=[512])
        l2 = tf.nn.relu(tf.matmul(l1,W2) + b2)
        W3 = tf.get_variable('w3',shape=[512,10])
        b3 = tf.get_variable('b3',shape=[10])
        y = tf.nn.softmax(tf.matmul(l2,W3) + b3)
    return y