In [1]:
import numpy as np
import cPickle
import os, sys
import tensorflow as tf
import matplotlib.pyplot as plt
import random
from mnist_helper import *

In [2]:
NUM_CLASSES = 10
NUM_SAMPLES_PER_DIGIT = 60
IMAGE_SIZE = 28
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
BATCH_SIZE = 40
NUM_BATCHES = 8000

In [3]:
# Load data
data_path = 'mnist.pkl'
_, (valid_imgs, valid_labels), (test_imgs, test_labels) = cPickle.load(file(data_path, 'rb'))
train_imgs, train_labels = gen_small_mnist(60,data_path)

# Load DBM pretrained data
'''
# 1 Layer
pretrain_path = 'fc_1L/test/rbm_pretrain.pkl'
[W_1, vb_1, hb_1] = cPickle.load(file(pretrain_path, 'rb'))
'''
# 3 Layers
pretrain_path = 'fc_3L/test/rbm_pretrain.pkl'
[W_1, vb_1, hb_1, W_2, vb_2, hb_2, W_3, vb_3, hb_3] = cPickle.load(file(pretrain_path, 'rb'))

In [4]:
def get_loss_1L(images,y_):
    # Fixed dbm part - as constant
    dbm_output = tf.nn.sigmoid(tf.matmul(images,W_1)+hb_1)
    # Hidden 1
    with tf.variable_scope('hidden1'):
        w1_init = tf.constant(W_1)
        w1 = tf.get_variable('weights',initializer=w1_init)
        b1_init = tf.constant(hb_1)
        b1 = tf.get_variable('biases',initializer=b1_init)
        # Calculation
        y1 = tf.nn.sigmoid(tf.matmul(images,w1)+b1)
    # Hidden 2
    with tf.variable_scope('hidden2'):
        w2 = tf.get_variable('weights',
                              shape = [1000, 10],
                              initializer=tf.truncated_normal_initializer(0, 0.01))
        b2 = tf.get_variable('biases',
                              shape = [10],
                              initializer=tf.constant_initializer(0.0))
        y2 = tf.nn.softmax(tf.matmul(y1, w2) + b2)
        
    if y_ != None:
        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y2,y_))
    else:
        cross_entropy = None

    # Check training accuracy
    correct_count = evaluation(y2,y_)
        
    return cross_entropy,correct_count

def get_loss_3L(images,y_):
    # Hidden 1
    with tf.variable_scope('hidden1'):
        w1_init = tf.constant(W_1)
        w1 = tf.get_variable('weights',initializer=w1_init)
        b1_init = tf.constant(hb_1)
        b1 = tf.get_variable('biases',initializer=b1_init)
        # Calculation
        y1 = tf.nn.sigmoid(tf.matmul(images,w1)+b1)
    # Hidden 2
    with tf.variable_scope('hidden2'):
        w2_init = tf.constant(W_2)
        w2 = tf.get_variable('weights',initializer=w2_init)
        b2_init = tf.constant(hb_2)
        b2 = tf.get_variable('biases',initializer=b2_init)
        y2 = tf.nn.sigmoid(tf.matmul(y1, w2) + b2)
        
    # Hidden 3
    with tf.variable_scope('hidden3'):
        w3_init = tf.constant(W_3)
        w3 = tf.get_variable('weights',initializer=w3_init)
        b3_init = tf.constant(hb_3)
        b3 = tf.get_variable('biases',initializer=b3_init)
        y3 = tf.nn.sigmoid(tf.matmul(y2, w3) + b3)
        
    # Hidden 4
    with tf.variable_scope('hidden4'):
        w4 = tf.get_variable('weights',
                              shape = [1000, 10],
                              initializer=tf.truncated_normal_initializer(0, 0.01))
        b4 = tf.get_variable('biases',
                              shape = [10],
                              initializer=tf.constant_initializer(0.0))
        y4 = tf.nn.softmax(tf.matmul(y3, w4) + b4)
        
    if y_ != None:
        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y4,y_))
    else:
        cross_entropy = None
        
    # Check training accuracy
    correct_count = evaluation(y4,y_)
        
    return cross_entropy,correct_count

def evaluation(logits, labels):
    correct = tf.nn.in_top_k(logits, labels, 1)
    # Return the number of true entries.
    return tf.reduce_sum(tf.cast(correct, tf.int32))

In [5]:
# Parameters
steps_per_decay = 300
steps_per_output = 50
partial_train_steps = 2000

# Create test samples
batch_test_imgs = test_imgs
batch_test_labels = test_labels

batch_images = np.array(batch_test_imgs)
batch_labels = np.array(batch_test_labels)
    
# Finish computation graph
batch_x = tf.placeholder(tf.float32, shape=(None, 784))
batch_y = tf.placeholder(tf.int32, shape=(None))
train = tf.placeholder(tf.bool)
lr_holder = tf.placeholder(tf.float32)
loss, correct_count = get_loss_3L(batch_x,batch_y)
lr = 0.0001

last_layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='hidden4')
opt1 = tf.train.AdamOptimizer()
train_step1 = opt1.minimize(loss, var_list=last_layer_vars)
opt2 = tf.train.GradientDescentOptimizer(lr_holder)
train_step2 = opt1.minimize(loss)
# Start a session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

with sess.as_default():
    tf.initialize_all_variables().run()
    avg_loss = 0.0
    avg_accuracy = 0.0
    for i in xrange(NUM_BATCHES):
        # Generate data batch
        batch_images = []
        batch_labels = []
        
        for j in xrange(BATCH_SIZE):
            img,l = random.choice(zip(train_imgs,train_labels))
            batch_images.append(img)
            batch_labels.append(l)
        
        batch_images = np.array(batch_images)
        batch_labels = np.array(batch_labels)
        
        # Train model
        if i<=partial_train_steps:
            loss_val, correct_num, _ = sess.run(
                [loss, correct_count, train_step1],
                feed_dict={batch_x:batch_images, batch_y:batch_labels})
        else:
            loss_val, correct_num, _ = sess.run(
                [loss, correct_count, train_step2],
                feed_dict={batch_x:batch_images, batch_y:batch_labels, lr_holder:lr})
        
        avg_loss += loss_val/50
        avg_accuracy += correct_num/float(BATCH_SIZE*50)
        
        # Learning rate decay
        if i>partial_train_steps and i%steps_per_decay == 0:
            lr/=2.0
            
        if i!=0 and i%steps_per_output == 0:
            # Testing
            loss_val, correct_num = sess.run(
                [loss, correct_count],
                feed_dict={batch_x:batch_test_imgs, batch_y:batch_test_labels})
            # Print out result
            print "Result@Batch %d:"%i
            print "Average Training Loss: %f"%avg_loss
            print "Average Training Accuracy: %f"%avg_accuracy
            print "Validation Loss: %f"%loss_val
            print "Validation Accuracy: %f"%(correct_num/float(10000))
            avg_loss = 0.0
            avg_accuracy = 0.0
        
        if i==partial_train_steps:
            print "Finish pretrain."

ResourceExhaustedError: OOM when allocating tensor with shape[10000,500]
	 [[Node: hidden1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_recv_Placeholder_0/_78, hidden1/weights/read)]]
	 [[Node: Sum/_90 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_15_Sum", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'hidden1/MatMul', defined at:
  File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 160, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 433, in _handle_events
    self._handle_recv()
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 465, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 407, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-244d87ee3e6c>", line 18, in <module>
    loss, correct_count = get_loss_3L(batch_x,batch_y)
  File "<ipython-input-4-58e604acf9eb>", line 40, in get_loss_3L
    y1 = tf.nn.sigmoid(tf.matmul(images,w1)+b1)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1352, in matmul
    name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1296, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2317, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1239, in __init__
    self._traceback = _extract_stack()
