In [1]:
from __future__ import division

from skdata.mnist.views import OfficialVectorClassification
from tqdm import tqdm
import numpy as np
import tensorflow as tf
import pandas as pd

In [2]:
data = OfficialVectorClassification()

In [3]:
df_images = (data.all_vectors / 255).astype('float32')

In [4]:
trIdx = data.sel_idxs[:]

In [5]:
df_labels = pd.get_dummies(data.all_labels)

In [6]:
df_labels = df_labels.astype('float32')

In [7]:
writer = tf.python_io.TFRecordWriter("mnist.tfrecords")

In [11]:
for example_idx in tqdm(trIdx):
    # features = data.all_vectors[example_idx]
    features = df_images[example_idx]
    label = df_labels.iloc[example_idx]
    # construct the Example proto boject
    example = tf.train.Example(
        # Example contains a Features proto object
        features=tf.train.Features(
          # Features contains a map of string to Feature proto objects
          feature={
            # A Feature contains one of either a int64_list,
            # float_list, or bytes_list
            'label': tf.train.Feature(
                float_list=tf.train.FloatList(value=label.astype("float"))),
            'image': tf.train.Feature(
                float_list=tf.train.FloatList(value=features.astype("float"))),
    }))
    
    # use the proto object to serialize the example to a string
    serialized = example.SerializeToString()
    # write the serialized object to disk
    writer.write(serialized)

100%|██████████| 60000/60000 [01:36<00:00, 624.96it/s]


In [12]:
# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
num_batch = 128
num_hidden = 3 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# Parameters
learning_rate = 0.001

In [13]:
filename_queue = tf.train.string_input_producer(["mnist.tfrecords"], num_epochs=None)

In [14]:
reader = tf.TFRecordReader()

In [15]:
_, serialized_example = reader.read(filename_queue)

In [16]:
features = tf.parse_single_example(
        serialized_example,
        features={
            # We know the length of both fields. If not the
            # tf.VarLenFeature could be used
            'label': tf.FixedLenFeature([num_classes], tf.float32),
            'image': tf.FixedLenFeature([num_input*num_input], tf.float32)
        })

In [17]:
label = features['label']
image = features['image']

In [18]:
# groups examples into batches randomly
images_batch, labels_batch = tf.train.shuffle_batch(
    [image, label], batch_size=num_batch,
    capacity=2000,
    min_after_dequeue=1000)

In [19]:
print images_batch.get_shape()
print labels_batch.get_shape()

(128, 784)
(128, 10)


In [20]:
images_batch = tf.reshape(images_batch, [num_batch, 784, 1])

In [21]:
labels_batch = tf.reshape(labels_batch, [num_batch, num_classes])

In [22]:
print images_batch.get_shape()
print labels_batch.get_shape()

(128, 784, 1)
(128, 10)


In [23]:
cell = tf.nn.rnn_cell.LSTMCell(num_hidden, state_is_tuple=True)
val, state = tf.nn.dynamic_rnn(cell, images_batch, dtype=tf.float32)

In [24]:
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

weight = tf.Variable(tf.truncated_normal([num_hidden, num_classes]))
bias = tf.Variable(tf.truncated_normal([num_classes]))

# Functions definition 
pred = tf.matmul(last, weight) + bias

# Define loss and optimizer
cost = tf.sqrt(tf.reduce_mean(tf.square(labels_batch - pred)), name='RMSE')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels_batch, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [25]:
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
tf.train.start_queue_runners(sess=sess)

[<Thread(Thread-5, started daemon 123145334517760)>,
 <Thread(Thread-6, started daemon 123145338724352)>]

In [26]:
for i in range(10):
  # pass it in through the feed_dict
  _, loss_val = sess.run([optimizer, cost])
  print loss_val

1.35295
1.36025
1.33954
1.34952
1.35219
1.33809
1.35225
1.34652
1.3378
1.35596


In [43]:
filename = "mnist.tfrecords"
for serialized_example in tf.python_io.tf_record_iterator(filename):
    example = tf.train.Example()
    example.ParseFromString(serialized_example)

    # traverse the Example format to get data
    image = example.features.feature['image'].float_list.value
    label = example.features.feature['label'].float_list.value
    # do something
    # print image, label

In [51]:
np.argmax(label)

5

In [45]:
image_input = np.array(image)

In [46]:
image_input = image_input.reshape(1, 784)

In [47]:
zeros = np.zeros((127, 784))
test = np.concatenate((image_input, zeros), axis=0)

In [48]:
test.shape

(128, 784)

In [50]:
feed_dict = {images_batch: test.reshape(128, 784, 1)}
classification = sess.run(tf.nn.softmax(pred), feed_dict)
print np.argmax(classification[0])

5
