In [1]:
# GPUs or CPU
import tensorflow as tf

# Check TensorFlow Version
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.4.1
Default GPU Device: /device:GPU:0


In [6]:
import tensorflow as tf
import numpy as np
import sys
from tensorflow.examples.tutorials.mnist import input_data

def convolution(X, W, b, padding, stride):
    n, h, w, c = map(lambda d: d.value, X.get_shape())
    filter_h, filter_w, filter_c, filter_n = [d.value for d in W.get_shape()]
    
    out_h = (h + 2*padding - filter_h)//stride + 1
    out_w = (w + 2*padding - filter_w)//stride + 1

    X_flat = flatten(X, filter_h, filter_w, filter_c, out_h, out_w, stride, padding)
    W_flat = tf.reshape(W, [filter_h*filter_w*filter_c, filter_n])
    
    z = tf.matmul(X_flat, W_flat) + b     # b: 1 X filter_n
    
    return tf.transpose(tf.reshape(z, [out_h, out_w, n, filter_n]), [2, 0, 1, 3])

# Question: Is this the same img2col in NumPy and Cython implementation.
# To compute convolution easily, we do a simple trick called flattening. 
# After flattening, input data will be transformed into a 2D matrix, 
# which allows for matrix multiplication with a filter (which is also flattened into 2D).
def flatten(X, window_h, window_w, window_c, out_h, out_w, stride=1, padding=0):
    
    X_padded = tf.pad(X, [[0,0], [padding, padding], [padding, padding], [0,0]])

    windows = []
    for y in range(out_h):
        for x in range(out_w):
            window = tf.slice(X_padded, [0, y*stride, x*stride, 0], [-1, window_h, window_w, -1])
            windows.append(window)
    stacked = tf.stack(windows) # shape : [out_h, out_w, n, filter_h, filter_w, c]

    return tf.reshape(stacked, [-1, window_c*window_w*window_h])

# Max pooling is not neccessary at all.
def max_pool(X, pool_h, pool_w, padding, stride):
    n, h, w, c = [d.value for d in X.get_shape()]
    
    out_h = (h + 2*padding - pool_h)//stride + 1
    out_w = (w + 2*padding - pool_w)//stride + 1

    X_flat = flatten(X, pool_h, pool_w, c, out_h, out_w, stride, padding)

    pool = tf.reduce_max(tf.reshape(X_flat, [out_h, out_w, n, pool_h*pool_w, c]), axis=3)
    return tf.transpose(pool, [2, 0, 1, 3])

# The simplest possible non-linearity
# ReLU or Leaky ReLU
# Gated Linear unit is also another one of these but with more computational complexity.
# Sigmoid is a gate and resemble the soma non-linearity better.
def relu(X):
    return tf.maximum(X, tf.zeros_like(X))

def softmax(X):
    X_centered = X - tf.reduce_max(X) # to avoid overflow
    X_exp = tf.exp(X_centered)
    exp_sum = tf.reduce_sum(X_exp, axis=1)
    return tf.transpose(tf.transpose(X_exp) / exp_sum)

# def cross_entropy_error(y, t):
#     return -tf.reduce_mean(tf.log(tf.reduce_sum(y * t, axis=1)))

# def accuracy(network, t):
#     t_predict = tf.argmax(network, axis=1)
#     t_actual = tf.argmax(t, axis=1)
#     return tf.reduce_mean(tf.cast(tf.equal(t_predict, t_actual), tf.float32))

In [17]:
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

def feedforward_net(D, H, C):
    X = tf.placeholder(tf.float32, shape=[None, D])
    y = tf.placeholder(tf.float32, shape=[None, C])

    Wxh = tf.Variable(xavier_init([D, H]))
    bxh = tf.Variable(tf.zeros(shape=[H]))

    Whh = tf.Variable(xavier_init([H, H]))
    bhh = tf.Variable(tf.zeros(shape=[H]))

    Why = tf.Variable(xavier_init([H, C]))
    bhy = tf.Variable(tf.zeros(shape=[C]))

    h1 = relu(tf.matmul(X, Wxh) + bxh)
    h2 = relu(tf.matmul(h1, Whh) + bhh)
    prob = softmax(tf.matmul(h2, Why) + bhy)

    loss = -tf.reduce_mean(y * tf.log(prob))

    return X, y, prob, loss

def convnet(D, H, C):
    X = tf.placeholder(tf.float32, shape=[None, *D])
    y = tf.placeholder(tf.float32, shape=[None, C])

    Wconv1 = tf.Variable(xavier_init([3, 3, 1, 10]))
    bconv1 = tf.Variable(tf.zeros(shape=[10]))
#     n, h, w, c = map(lambda d: d.value, X.get_shape())
#     filter_h, filter_w, filter_c, filter_n = [d.value for d in W.get_shape()]

    Wfc1 = tf.Variable(xavier_init([14 * 14 * 10, H]))
    bfc1 = tf.Variable(tf.zeros(shape=[H]))

    Wfc2 = tf.Variable(xavier_init([H, C]))
    bfc2 = tf.Variable(tf.zeros(shape=[C]))

#     hconv1 = relu(tf.nn.conv2d(X, Wconv1, [1, 1, 1, 1], padding='SAME') + bconv1)
    hconv1 = relu(convolution(X=X, W=Wconv1, b=bconv1, stride=1, padding=1) + bconv1)
#     hpool1 = tf.nn.max_pool(hconv1, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    hpool1 = max_pool(X=hconv1, pool_h=2, pool_w=2, padding=1, stride=2)
    
    hpool1 = tf.reshape(hpool1, shape=[-1, 14 * 14 * 10])
    h = relu(tf.matmul(hpool1, Wfc1) + bfc1)
    prob = softmax(tf.matmul(h, Wfc2) + bfc2)

    loss = -tf.reduce_mean(y * tf.log(prob))

    return X, y, prob, loss

In [18]:
def accuracy(y_true, y_pred):
    return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

In [19]:
if __name__ == '__main__':
    alpha = 1e-3

    mnist = input_data.read_data_sets('/home/arasdar/datasets/MNIST_data', one_hot=True)

    X_train, y_train = mnist.train.images, mnist.train.labels
    X_val, y_val = mnist.validation.images, mnist.validation.labels
    X_test, y_test = mnist.test.images, mnist.test.labels

    D, C = X_train.shape[1], y_train.shape[1]
    H = 64
    M = 128

    #     if net_type == 'cnn':
    D = [28, 28, 1]
    X, y, forward_step, loss = convnet(D, H, C)
    X_val = X_val.reshape([-1, 28, 28, 1])
    #     n, h, w, c = map(lambda d: d.value, X.get_shape())
    #     filter_h, filter_w, filter_c, filter_n = [d.value for d in W.get_shape()]
    
    #     #     elif net_type == 'ff':
    #     X, y, forward_step, loss = feedforward_net(D, H, C)

    solver = tf.train.RMSPropOptimizer(alpha)
    train_step = solver.minimize(loss)

    sess = tf.Session()
    #     sess.run(tf.initialize_all_variables())

    #     WARNING:tensorflow:From 
    #     Instructions for updating:
    #     Use `tf.global_variables_initializer` instead.
    sess.run(tf.global_variables_initializer())

    for i in range(5000):
        X_mb, y_mb = mnist.train.next_batch(M)

        #         if net_type == 'cnn':
        X_mb = X_mb.reshape([-1, 28, 28, 1])

        _, loss_val = sess.run([train_step, loss], feed_dict={X: X_mb, y: y_mb})

        if i % 100 == 0:
            y_pred = sess.run(forward_step, feed_dict={X: X_val})
            acc = accuracy(y_val, y_pred)

            print('Iter: {} Loss: {:.4f} Validation: {}'.format(i, loss_val, acc))

Extracting /home/arasdar/datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting /home/arasdar/datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting /home/arasdar/datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting /home/arasdar/datasets/MNIST_data/t10k-labels-idx1-ubyte.gz


TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [28, 28, None, 10]. Consider casting elements to a supported type.