In [1]:
import numpy as np
import tensorflow as tf
from datetime import date

In [2]:
# Load MNIST Data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
valX, valY = mnist.validation.images, mnist.validation.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# Model HyperParameters
num_visible = 28*28
num_classes = 10
num_hidden = 500
batch_size = 1
num_epochs = 100
num_batches = np.shape(trX)[0] / batch_size

if np.shape(trX)[0] % batch_size > 0:
    num_batches += 1

In [4]:
# with tf.device('/gpu:0'):
# DRMB Model
m = np.max([num_hidden, num_classes])
m_sqrt = np.sqrt(m)

# Model inputs
X = tf.placeholder(tf.float32, [None, num_visible])

Y = tf.placeholder(tf.float32, [None, num_classes])

learning_rate = tf.placeholder(tf.float32)

# Model internal parameters
U = tf.Variable(tf.random_uniform([num_hidden, num_classes], minval=-m_sqrt, maxval=m_sqrt), name='hlayer-classes-connections')

W = tf.Variable(tf.random_uniform([num_hidden, num_visible], minval=-m_sqrt, maxval=m_sqrt), name='hlayer-input-connections')

# b = tf.Variable(tf.zeros([num_visible, 1]), name='input-bias')

c = tf.Variable(tf.zeros([num_hidden, 1]), name='hlayer-bias')

d = tf.Variable(tf.zeros([num_classes, 1]), name='classes-bias')

# Batch dimension needed for the equations
#batch_dimension = tf.shape(X)[0]
#batch_normalization = 1.0 / tf.cast(batch_dimension, tf.float32)

batch_normalization = 1.0 / batch_size

# All the classes
Y_all_classes = tf.diag(tf.ones(num_classes, 1))

# U [num_hiden x num_classes] * Y_all_classes [num_classes x num_classes]
# Result : U_all_classes : [batch_dimension x num_hidden x num_classes]
U_all_y = tf.reshape(tf.matmul(U, Y_all_classes), [1, num_hidden, num_classes])

def give_p_all_y_given_x(U_all_y, W, c, d, X):
    # Tensor of shape (None x num_hidden)
    WX = tf.transpose(tf.matmul(W, tf.transpose(X)))
    
#     print S.get_shape()

    # WX + c for all batches
    # Result : O (batch_dimension x num_hidden)
    O = WX + tf.reshape(c, [1, num_hidden])
#     O = WX + tf.tile(tf.reshape(c, [1, num_hidden]), [-1, 1])

    # Extend O on all the classes:
    # Result : O (batch_dimension x num_hidden x num_classes)    
#     O = 
#     O = tf.tile(tf.reshape(O, [-1, num_hidden, 1]), [1, 1, num_classes])

    # Extend U_all_classes for all batches
    # Result : U_all_y_extended (batch_dimension x num_hidden x num_classes)
#     U_all_y_extended
#     U_all_y_extended = tf.tile(tf.reshape(U_all_y, [1, num_hidden, num_classes]), [-1, 1, 1])
    
    # Resulted O:
    # Result: O (batch_dimension x num_hidden x num_classes)
    O = tf.reshape(O, [-1, num_hidden, 1]) + tf.reshape(U_all_y, [1, num_hidden, num_classes])

    # First term in log p(y|x) which is calculated for each x in the batch
    # Result : first_term (batch_dimension x num_classes)
    first_term = tf.transpose(tf.matmul(Y_all_classes, d))
#     first_term = tf.tile(tf.reshape(tf.matmul(Y_all_classes, d), [1, num_classes]), [-1, 1])

    # Second term in log p(y|x) which is calculated for each x in the batch
    # Result : second_term (batch_dimension x num_classes)            
    second_term = tf.reduce_sum(tf.nn.softplus(O), 1)
#     second_term = tf.reduce_sum(tf.nn.softplus(O), 1)

    # Positive part of log p(y|x)  
    # Result: positive_part (batch_dimension x num_classes)
    positive_part = first_term + second_term

    # Use the softmax to calculate the probabilities:
    # Result: p_y_all_given_x (batch_dimension x num_classes)
    p_y_all_given_x = tf.nn.softmax(positive_part)
    
    return WX, O, positive_part, p_y_all_given_x

WX, O_all, positive_part, p_y_all_given_x = give_p_all_y_given_x(U_all_y, W, c, d, X)

# Calculate p(y|x) for concrete x
# Result : p_y_given_x (batch_dimension x 1)
p_y_given_x = tf.reshape(tf.reduce_sum(tf.mul(p_y_all_given_x, Y),1), [-1, 1])

# Training part

# Calculate UY
# Result : U (batch_dimension x num_hidden)
UY = tf.transpose(tf.matmul(U, tf.transpose(Y)))

O = WX + UY

# O_sigma: (batch_dimension x num_hidden)
O_sigma = tf.sigmoid(O)

# O_sigma_all_Y : (batch_dimension x num_hidden x num_classes)
O_sigma_all_Y = tf.sigmoid(O_all)

# O_sigma_all_Y_p : (batch_dimension x num_hidden x num_classes)
# O_sigma_all_Y_p = tf.batch_mul(O_sigma_all_Y, tf.reshape(p_y_all_given_x, 
O_sigma_all_Y_p = tf.mul(O_sigma_all_Y, tf.tile(tf.reshape(p_y_all_given_x, [-1, 1, num_classes]), [1, num_hidden, 1]))                         

# # d_U: (num_hidden x num_classes)
dU_left = tf.matmul(tf.transpose(O_sigma), Y) #tf.matmul(O_sigma, Y)
dU_right = tf.matmul(tf.transpose(tf.reduce_sum(O_sigma_all_Y_p, 2)), Y)# O_sigma_all_Y_p
# dU_left = tf.matmul(tf.transpose(O_sigma), tf.ones([-1, num_classes]))
# dU_right = tf.reduce_sum(O_sigma_all_Y_p, 0)
d_U = batch_normalization * (dU_left - dU_right)
d_U = tf.reshape(d_U, [num_hidden, num_classes])

# d_W : (num_hidden x num_visible)
dW_left = tf.matmul(tf.transpose(O_sigma), X)
dW_right = tf.matmul(tf.transpose(tf.reduce_sum(O_sigma_all_Y_p, 2)), X) 
# dW_right = tf.reduce_sum(tf.batch_matmul(O_sigma_all_Y_p, tf.tile(tf.reshape(X, [-1, num_visible, 1]), [1, 1, num_classes]), adj_y=True), 0)
d_W = batch_normalization * (dW_left - dW_right)
d_W = tf.reshape(d_W, [num_hidden, num_visible])

# d_c : (num_hidden x 1)
dc_left = tf.reduce_sum(O_sigma, 0) #tf.matmul(tf.transpose(O_sigma)) #, tf.ones([-1, 1]))
dc_right = tf.reduce_sum(tf.reduce_sum(O_sigma_all_Y_p, 2), 0)
# dc_right = tf.reduce_sum(tf.reduce_sum(O_sigma_all_Y_p, 2), 
#tf.reshape(tf.reduce_sum(tf.reduce_sum(O_sigma_all_Y_p,0),1), [num_hidden, 1])
d_c = batch_normalization * (dc_left - dc_right)
d_c = tf.reshape(d_c, [num_hidden, 1])

# d_d : (num_classes x 1)
d_d = batch_normalization * (tf.reduce_sum(Y - p_y_all_given_x, 0))
d_d = tf.reshape(d_d, [num_classes, 1])

# updates:
updates = [U.assign_add(learning_rate * d_U), W.assign_add(learning_rate * d_W), c.assign_add(learning_rate * d_c), d.assign_add(learning_rate * d_d)]

# Error classification
# _, _, _, _, predicted_all_y = give_p_all_y_given_x(tf.matmul(U, Y_all_classes), W, c, d, X)

predicted_y = tf.argmax(p_y_all_given_x, 1)
# predicted_y = tf.argmax(predicted_all_y, 1)
ground_truth = tf.argmax(Y, 1)

correct_prediction = tf.equal(predicted_y, ground_truth)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# da = tf.log(p_y_all_given_x)
# loss = -tf.reduce_mean(tf.log(tf.reduce_max(tf.mul(p_y_all_given_x, Y), 1)))

In [5]:
# Running session
config = tf.ConfigProto(allow_soft_placement = True)
sess = tf.Session(config = config)
# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
init = tf.initialize_all_variables()
sess.run(init)

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

In [None]:
train_accuracies = np.zeros([num_epochs, num_batches + 1])
validation_accuracies = np.zeros([num_epochs, num_batches + 1])
test_accuracies = np.zeros([num_epochs, 1])
today = str(date.today())
print today

2016-12-09


In [None]:
# Debugging
for epoch in range(1, num_epochs + 1):
#     sess.run(init)
    j = 1
    for i in range(1, num_batches + 1):
        with tf.device('/gpu:0'):
            tr_x, tr_y = mnist.train.next_batch(batch_size)
            alpha = 0.05
#             alpha = np.exp(0.05)
#             if epoch != 1:
#                 alpha = 0.00005
#             else :
#                 alpha = min(0.00005, 100.0 / i) #0.001 #min(0.01, 20000/i)
            sess.run([updates], feed_dict={X: tr_x, Y: tr_y, learning_rate: alpha})            
            
        if i % 5000 == 0:
            with tf.device('/cpu:0'):                
                train_accuracy = sess.run([accuracy], feed_dict={X: trX, Y: trY})                            
                val_accuracy = sess.run([accuracy], feed_dict={X: valX, Y: valY})            
                
                print 'epoch : ', epoch
                print 'i : ', i
                print 'train_accuracy : ', train_accuracy[0]
                print 'validation_accuracy : ', val_accuracy[0]
                
                save_path = saver.save(sess, "./models/"+today+"_"+str(epoch)+"_"+str(j)+"_model.ckpt")
                print("Model saved in file: %s" % save_path)                
                train_accuracies[epoch - 1][j] = train_accuracy[0]
                validation_accuracies[epoch - 1][j] = val_accuracy[0]
                j = j+1           
    train_acc = sess.run([accuracy], feed_dict={X: trX, Y: trY})      
    val_acc = sess.run([accuracy], feed_dict={X: valX, Y: valY})  
    test_acc = sess.run([accuracy], feed_dict={X: teX, Y: teY})  
    train_accuracies[epoch - 1, 0] = train_acc[0]
    validation_accuracies[epoch - 1, 0] = val_acc[0]
    test_accuracies[epoch - 1, 0] = test_acc[0]
    
    print 'One epoch finished'
    print 'epoch : ', epoch
    print 'train_accuracy : ', train_acc[0]
    print 'validation_accuracy : ', val_acc[0]
    print 'test_accuracy : ', test_acc[0]
    
    save_path = saver.save(sess, "./models/"+today+"_"+str(epoch)+"_"+str(0)+"_model.ckpt")
    print("Model saved in file: %s" % save_path)             

epoch :  1
i :  5000
train_accuracy :  0.573491
validation_accuracy :  0.5812
Model saved in file: ./models/2016-12-09_1_1_model.ckpt
epoch :  1
i :  10000
train_accuracy :  0.689218
validation_accuracy :  0.6986
Model saved in file: ./models/2016-12-09_1_2_model.ckpt
epoch :  1
i :  15000
train_accuracy :  0.737018
validation_accuracy :  0.7426
Model saved in file: ./models/2016-12-09_1_3_model.ckpt
epoch :  1
i :  20000
train_accuracy :  0.761291
validation_accuracy :  0.7642
Model saved in file: ./models/2016-12-09_1_4_model.ckpt
epoch :  1
i :  25000
train_accuracy :  0.785491
validation_accuracy :  0.7914
Model saved in file: ./models/2016-12-09_1_5_model.ckpt
epoch :  1
i :  30000
train_accuracy :  0.802346
validation_accuracy :  0.803
Model saved in file: ./models/2016-12-09_1_6_model.ckpt
epoch :  1
i :  35000
train_accuracy :  0.821037
validation_accuracy :  0.8202
Model saved in file: ./models/2016-12-09_1_7_model.ckpt
epoch :  1
i :  40000
train_accuracy :  0.825837
validati

In [None]:
# trX = trX * 255
# Batch Normalization
# mean_image = np.reshape(np.mean(trX,axis=0),[1,-1])
# trX = (trX - mean_image) # / np.reshape(np.std(trX, axis=0), [1,-1])