In [1]:
import numpy as np
import cPickle
import os, sys
import tensorflow as tf
import matplotlib.pyplot as plt
import random
from mnist_helper import *

In [2]:
NUM_CLASSES = 10
NUM_SAMPLES_PER_DIGIT = 60
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
BATCH_SIZE = 40
NUM_BATCHES = 4000

In [3]:
# Load data
data_path = 'mnist.pkl'
_, (valid_imgs, valid_labels), (test_imgs, test_labels) = cPickle.load(file(data_path, 'rb'))
train_imgs, train_labels = gen_small_mnist(600,data_path)

# Load DBM pretrained data

# 3 Layers
pretrain_path = 'fc_3L/test/rbm_pretrain.pkl'
[W_1, vb_1, hb_1, W_2, vb_2, hb_2, W_3, vb_3, hb_3] = cPickle.load(file(pretrain_path, 'rb'))
'''
# 1 Layers
pretrain_path = 'fc_1L/test/rbm_pretrain.pkl'
[W_1, vb_1, hb_1] = cPickle.load(file(pretrain_path, 'rb'))
'''


"\n# 1 Layers\npretrain_path = 'fc_1L/test/rbm_pretrain.pkl'\n[W_1, vb_1, hb_1] = cPickle.load(file(pretrain_path, 'rb'))\n"

In [4]:
# Defined function for calculating losses
def vhv(images):
    dbm_h1 = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    dbm_h2 = tf.nn.sigmoid(tf.matmul(dbm_h1,W_2)+hb_2)
    dbm_h3 = tf.nn.sigmoid(tf.matmul(dbm_h2,W_3)+hb_3)
    dbm_v3 = tf.nn.sigmoid(tf.matmul(dbm_h3,np.transpose(W_3)+vb_3))
    dbm_v2 = tf.nn.sigmoid(tf.matmul(dbm_v3,np.transpose(W_2)+vb_2))
    dbm_v1 = tf.nn.sigmoid(tf.matmul(dbm_v2,np.transpose(W_1)+vb_1))
    return dbm_v1
    
def vhv_get_loss(images,y_):
    dbm_output = vhv(images)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [784, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count
    
                           
# Build computation graph
# Input: images (batch_size * 784)
def get_loss_plain(images,y_):
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [784, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(images, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def get_loss_1L(images,y_):
    # Fixed dbm part - as constant
    dbm_output = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [1000, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def get_loss_2L(images,y_):
    # Fixed dbm part - as constant
    dbm_h1 = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    dbm_output = tf.nn.sigmoid(tf.matmul(dbm_h1,W_2)+hb_2)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [500, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def get_loss_3L(images,y_):
    # Fixed dbm part - as constant
    dbm_h1 = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    dbm_h2 = tf.nn.sigmoid(tf.matmul(dbm_h1,W_2)+hb_2)
    dbm_output = tf.nn.sigmoid(tf.matmul(dbm_h2,W_3)+hb_3)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [1000, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def get_loss_dropout(images,y_,train=False):
    # Fixed dbm part - as constant
    dbm_h1 = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    dbm_h2 = tf.nn.sigmoid(tf.matmul(dbm_h1,W_2)+hb_2)
    dbm_output = tf.nn.sigmoid(tf.matmul(dbm_h2,W_3)+hb_3)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [1000, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        if train is not None:
          dbm_output = tf.nn.dropout(dbm_output, 0.5)
        
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def get_loss_last_two_layers(images,y_):
    # Fixed dbm part - as constant
    dbm_h1 = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    dbm_h2 = tf.nn.sigmoid(tf.matmul(dbm_h1,W_2)+hb_2)
    dbm_h3 = tf.nn.sigmoid(tf.matmul(dbm_h2,W_3)+hb_3)
    dbm_output = tf.concat(1, [dbm_h2, dbm_h3])
    # Hidden 1
    with tf.variable_scope('hidden1'):
        weights = tf.get_variable('weights',
                                  shape = [1500, 10],
                                  initializer=tf.truncated_normal_initializer(0, 0.01))
        biases = tf.get_variable('biases',
                                 shape = [10],
                                 initializer=tf.constant_initializer(0.0))
        y = tf.nn.softmax(tf.matmul(dbm_output, weights) + biases)
        if y_ != None:
            cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y,y_))
        else:
            cross_entropy = None
        
        # Check training accuracy
        correct_count = evaluation(y,y_)
        
    return cross_entropy,correct_count

def evaluation(logits, labels):
    correct = tf.nn.in_top_k(logits, labels, 1)
    # Return the number of true entries.
    return tf.reduce_sum(tf.cast(correct, tf.int32))

In [5]:
# Parameters
steps_per_decay = 300
steps_per_output = 50

# Create test samples
batch_test_imgs = test_imgs
batch_test_labels = test_labels

batch_images = np.array(batch_test_imgs)
batch_labels = np.array(batch_test_labels)
    
# Finish computation graph
batch_x = tf.placeholder(tf.float32, shape=(None, 784))
batch_y = tf.placeholder(tf.int32, shape=(None))
train = tf.placeholder(tf.bool)
lr_holder = tf.placeholder(tf.float32)
loss, correct_count = get_loss_3L(batch_x,batch_y)
lr = 0.5
#train_step = tf.train.GradientDescentOptimizer(lr_holder).minimize(loss)
train_step = tf.train.AdamOptimizer().minimize(loss)

# Start a session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

with sess.as_default():
    tf.initialize_all_variables().run()
    avg_loss = 0.0
    avg_accuracy = 0.0
    for i in xrange(NUM_BATCHES):
        # Generate data batch
        batch_images = []
        batch_labels = []
        
        for j in xrange(BATCH_SIZE):
            img,l = random.choice(zip(train_imgs,train_labels))
            batch_images.append(img)
            #l_a = np.zeros(10)
            #l_a[l] = 1.0
            #batch_labels.append(l_a)
            batch_labels.append(l)
        
        batch_images = np.array(batch_images)
        batch_labels = np.array(batch_labels)
        
        # Train model
        loss_val, correct_num, _ = sess.run(
            [loss, correct_count, train_step],
            feed_dict={batch_x:batch_images, batch_y:batch_labels})
        
        avg_loss += loss_val/50
        avg_accuracy += correct_num/float(BATCH_SIZE*50)
        
        # Learning rate decay
        if i!=0 and i%steps_per_decay == 0:
            lr/=2.0
            
        if i!=0 and i%steps_per_output == 0:
            # Testing
            loss_val, correct_num = sess.run(
                [loss, correct_count],
                feed_dict={batch_x:batch_test_imgs, batch_y:batch_test_labels})
            # Print out result
            print "Result@Batch %d:"%i
            print "Average Training Loss: %f"%avg_loss
            print "Average Training Accuracy: %f"%avg_accuracy
            print "Validation Loss: %f"%loss_val
            print "Validation Accuracy: %f"%(correct_num/float(10000))
            avg_loss = 0.0
            avg_accuracy = 0.0

Result@Batch 50:
Average Training Loss: 2.122103
Average Training Accuracy: 0.551500
Validation Loss: 1.851574
Validation Accuracy: 0.788100
Result@Batch 100:
Average Training Loss: 1.775086
Average Training Accuracy: 0.826500
Validation Loss: 1.685846
Validation Accuracy: 0.895300
Result@Batch 150:
Average Training Loss: 1.669995
Average Training Accuracy: 0.891500
Validation Loss: 1.635653
Validation Accuracy: 0.907300
Result@Batch 200:
Average Training Loss: 1.631470
Average Training Accuracy: 0.907000
Validation Loss: 1.609655
Validation Accuracy: 0.917300
Result@Batch 250:
Average Training Loss: 1.592457
Average Training Accuracy: 0.933500
Validation Loss: 1.590941
Validation Accuracy: 0.926100
Result@Batch 300:
Average Training Loss: 1.584694
Average Training Accuracy: 0.935500
Validation Loss: 1.581855
Validation Accuracy: 0.927800
Result@Batch 350:
Average Training Loss: 1.576952
Average Training Accuracy: 0.929000
Validation Loss: 1.574225
Validation Accuracy: 0.929000
Result@