In [1]:
import os
import urllib
import numpy as np
from os import listdir
from os.path import isfile, join

In [2]:
class DataSet(object):
    def __init__(self, images, labels):
        assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape, labels.shape))
        self._num_examples = images.shape[0]

        images = images.reshape(images.shape[0], images.shape[1] * images.shape[2])

        # Convert from [0, 255] -> [0.0, 1.0].
        images = images.astype(np.float32)
        images = np.multiply(images, 1.0 / 255.0)

        self._images = images
        self._labels = labels
        self._epochs_completed = 0
        self._index_in_epoch = 0

    @property
    def images(self):
        return self._images
    
    @property
    def labels(self):
        return self._labels

    @property
    def num_examples(self):
        return self._num_examples

    @property
    def epochs_completed(self):
        return self._epochs_completed

    def next_batch(self, batch_size, fake_data=False):
        """Return the next `batch_size` examples from this data set."""
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            # Finished epoch
            self._epochs_completed += 1
            # Shuffle the data
            perm = np.arange(self._num_examples)
            np.random.shuffle(perm)
            self._images = self._images[perm]
            self._labels = self._labels[perm]
            # Start next epoch
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._images[start:end], self._labels[start:end]

In [3]:
from os import listdir
from os.path import isfile, join
from scipy import misc

In [4]:
max_up = 768 
tr_size = 512
te_size = 128
va_size = 128

In [5]:
import random as rand

def extract_images_labels(folder_name):
    images = [f for f in listdir(folder_name) if isfile(join(folder_name, f)) and f.endswith('png')][:max_up]
    rand.shuffle(images)
    rets = np.zeros((len(images), 512, 512))
    lbs = np.zeros((len(images), 10))
    counter = 0
    for image in images:
        img_data = misc.imread(join(folder_name, image))
        rets[counter][:] = img_data
        vals = np.zeros(10)
        vals[int(image[0])] = 1
        lbs[counter,:] = vals
        counter += 1
    return rets, lbs;

In [6]:
def read_data_sets(train_dir):
    class DataSets(object):
        pass
    
    data_sets = DataSets()
    TRAIN_SIZE = tr_size
    TEST_SIZE = te_size
    VALIDATION_SIZE = max_up - TRAIN_SIZE - TEST_SIZE
    images, labels = extract_images_labels(train_dir)

    train_images = images[:TRAIN_SIZE]
    train_labels = labels[:TRAIN_SIZE]
    test_images = images[TRAIN_SIZE:TRAIN_SIZE+TEST_SIZE]
    test_labels = labels[TRAIN_SIZE:TRAIN_SIZE+TEST_SIZE]
    validation_images = images[TRAIN_SIZE+TEST_SIZE:]
    validation_labels = labels[TRAIN_SIZE+TEST_SIZE:]
    data_sets.train = DataSet(train_images, train_labels)
    data_sets.validation = DataSet(validation_images, validation_labels)
    data_sets.test = DataSet(test_images, test_labels)
    return data_sets

In [7]:
mnist = read_data_sets("/home/agah/TF/CSE253Project/spoken_numbers")

print("mnist returned")

mnist returned


In [8]:
import tensorflow as tf
from tensorflow.python.ops.constant_op import constant
from tensorflow.models.rnn import rnn, rnn_cell
import numpy as np

In [9]:
'''
To classify images using a bidirectional reccurent neural network, we consider every image row as a sequence of pixels.
Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample.
'''

# Parameters
learning_rate = 0.001
training_iters = 12800
batch_size = 128
display_step = 10

In [10]:
# Network Parameters
n_input = 512 # MNIST data input (img shape: 28*28)
n_steps = 512 # timesteps
n_hidden = 256 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)

In [11]:
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate_fw = tf.placeholder("float", [None, 2*n_hidden])
istate_bw = tf.placeholder("float", [None, 2*n_hidden])
y = tf.placeholder("float", [None, n_classes])

In [12]:
# Define weights
weights = {
    # Hidden layer weights => 2*n_hidden because of foward + backward cells
    'hidden': tf.Variable(tf.random_normal([n_input, 2*n_hidden])),
    'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([2*n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [13]:
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len):

    # BiRNN requires to supply sequence_length as [batch_size, int64]
    # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set
    # For a better implementation with latest version of tensorflow, check below
    _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64))

    print("before transpose")
    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    print("Done matmul")
    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    print("before outputs")

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                            initial_state_fw=_istate_fw,
                                            initial_state_bw=_istate_bw,
                                            sequence_length=_seq_len)
    print("before return");
    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']

In [14]:
print("Calling BiRNN")
pred = BiRNN(x, istate_fw, istate_bw, weights, biases, batch_size, n_steps)
print("Calling BiRNN Done")

Calling BiRNN
before transpose
Done matmul
before outputs
before return
Calling BiRNN Done


In [15]:
# NOTE: The following code is working with current master version of tensorflow
#       BiRNN sequence_length parameter isn't required, so we don't define it
#
# def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):
#
#     # input shape: (batch_size, n_steps, n_input)
#     _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
#     # Reshape to prepare input to hidden activation
#     _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
#     # Linear activation
#     _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
#
#     # Define lstm cells with tensorflow
#     # Forward direction cell
#     lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
#     # Backward direction cell
#     lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
#     # Split data because rnn cell needs a list of inputs for the RNN inner loop
#     _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
#
#     # Get lstm cell output
#     outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
#                                             initial_state_fw=_istate_fw,
#                                             initial_state_bw=_istate_bw)
#
#     # Linear activation
#     # Get inner loop last output
#     return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
#
# pred = BiRNN(x, istate_fw, istate_bw, weights, biases)

In [16]:
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer

In [17]:
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [None]:
# Initializing the variables
init = tf.initialize_all_variables()

In [None]:
print("Launching the graph")
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
        # Fit training using batch data
	tmpVar = np.zeros((batch_size, 2*n_hidden))
        sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys,
                                       istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                       istate_bw: np.zeros((batch_size, 2*n_hidden))})
	if step % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
                                                istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                                istate_bw: np.zeros((batch_size, 2*n_hidden))})
	    # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
                                             istate_fw: np.zeros((batch_size, 2*n_hidden)),
                                             istate_bw: np.zeros((batch_size, 2*n_hidden))})
            print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
                  ", Training Accuracy= " + "{:.5f}".format(acc)
        step += 1
    print "Optimization Finished!"
    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
                                                             istate_fw: np.zeros((test_len, 2*n_hidden)),
                                                             istate_bw: np.zeros((test_len, 2*n_hidden))})

Launching the graph
