### MNIST data set learning using TensorFlow

Guide link: https://www.tensorflow.org/get_started/mnist/beginners

In [6]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [5]:
import tensorflow as tf

# Implementing model

# Input value for TF running session. Would be a flatten image from MNIST dataset. 784 = 28x28 pixel image
# placeholder - a value that we'll input when we ask TensorFlow to run a computation.
x = tf.placeholder(tf.float32, [None, 784]) 

# Weight - zeros paramas: diamensional image vectors | 10-dimensional vectors represent number classes in range of 0-9
W = tf.Variable(tf.zeros([784, 10])) 

b = tf.Variable(tf.zeros([10])) # Bias added to a to output

# Create regression model using values above. 
y = tf.matmul(x, W) + b


In [6]:
# This set will be storing predictions
# Each row is a one-hot 10-dimensional vector indicating which digit class 
# (zero through nine) the corresponding MNIST image belongs to.
y_ = tf.placeholder(tf.float32, [None, 10])


# Cross entropy - the average length of communicating an event from one distribution 
# with the optimal code for another distribution. Ref.: http://colah.github.io/posts/2015-09-Visual-Information/

# In this case, it will help to predict how close TF prediction of a number is to the actual correct answer.
# Loss function indicates how bad the model's prediction was on a single example; 
# we try to minimize that while training across all the examples.
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

# Pick optimization algorithm.
# GradientDescentOptimizer defines a rate at which TF is going to descend the cross entropy. 
# We set it to 0.5, but should be adjusted.
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

# Create TF study session. Session is connecting to TF backend written in C++ to do heavy computation.
# InteractiveSession is a convenient way to build cumputation graphs. If you not using InteractiveSession,
# you will have to build 
sess = tf.InteractiveSession()
# Initialize variables for TF
tf.global_variables_initializer().run()

# We are going to run training step 1000 times. Again, it should be adjusted for optimal learning time.
for _ in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(50) # Picks 100 random data points from set. 
                                                   # More data TF algorithms use to train, better accuracy is produces.
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # Run training session

In [7]:
# Evaluating model
# Handpick correct predictions from y_ set
# tf.argmax(y,1) is the label our model thinks is most likely for each input, 
# while tf.argmax(y_,1) is the true label.
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

# Evaluate accuracy on the test data
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Print correct prediction rate 
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

0.9122


### Improving accuracy with neural networking

Tutorial ref.: https://www.tensorflow.org/get_started/mnist/pros

In [8]:
# Functions to generate weight and bias nodes used for Neural Network model.

def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1) # slightly increase starting weight to 0.1 to prevent 0 gradients.
  return tf.Variable(initial)

def bias_variable(shape):
  # it is also good practice to initialize neurons with a slightly positive initial bias to avoid "dead neurons". 
  initial = tf.constant(0.1, shape=shape) 
  return tf.Variable(initial)

In [9]:
# Convolution and Pooling functions
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [10]:
# First Layer - consist of convolution, followed by max pooling.

# The first two dimensions are the patch size, the next is the number of input channels,
# and the last is the number of output channels.
W_conv1 = weight_variable([5, 5, 1, 32]) 

# a bias vector with a component for each output channel
b_conv1 = bias_variable([32])

# reshape x to 4d tensor where second and third dimensions is image width and height, and last is the number of color channels
x_image = tf.reshape(x, [-1, 28, 28, 1])

# We then convolve x_image with the weight tensor, add the bias, apply the ReLU function, and finally max pool. 
# The max_pool_2x2 method will reduce the image size to 14x14.
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [11]:
# Second Convolutional Layer
# In order to build a deep network, we stack several layers of this type. 
# The second layer will have 64 features for each 5x5 patch.

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [12]:
# Densely Connected Layer
# Now that the image size has been reduced to 7x7, we add a fully-connected layer with 1024 neurons 
# to allow processing on the entire image. We reshape the tensor from the pooling layer into a batch of vectors, 
# multiply by a weight matrix, add a bias, and apply a ReLU.

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [13]:
# Dropout - to reduce overfitting

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [14]:
# Readout Layer

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

In [None]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  for i in range(500):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
      train_accuracy = accuracy.eval(feed_dict={
          x: batch[0], y_: batch[1], keep_prob: 1.0})
      print('step %d, training accuracy %g' % (i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

  print('test accuracy %g' % accuracy.eval(feed_dict={
      x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

step 0, training accuracy 0.12
step 100, training accuracy 0.9
step 200, training accuracy 0.92
step 300, training accuracy 0.94
step 400, training accuracy 1


# Keras library to simplify solution above

It seems to be difficult to understand code above for someone who is not experienced in
python programming and for data scientists it is not necessary as well. 
Keras library provide modules that abstract away complex code of Tensorflow. 
This is cutting time in learning how to build models from data we supply,
setting up neaural network for this model, and training & testing model itself. 

I've found a handy tutorial on how to build basic NN with Keras, fitting model to it and train the program 
on MNIST dataset. Tutorial: https://elitedatascience.com/keras-tutorial-deep-learning-in-python#step-1

Keras docs: https://keras.io/

In [184]:
import numpy as np

np.random.seed(123)  # for reproducibility, makes randon numbers predictible 

In [185]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

from keras.datasets import mnist
 
# Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [186]:
# 4. Load pre-shuffled MNIST data into train and test sets
#(X_train, Y_train), (X_test, Y_test) = mnist.train.next_batch(7000), mnist.test.next_batch(7000)

print(X_train.shape)

(60000, 28, 28)


In [187]:
# 5. Preprocess input data

X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
#print(X_train)

X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255

print(X_train.shape)

# Convert 1-dimensional class arrays to 10-dimensional class matrices
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

(60000, 1, 28, 28)
(60000, 1, 28, 28)


In [174]:
# 7. Define model architecture
# Related to removing Update to Keras 2 warnings. https://github.com/fchollet/keras/wiki/Keras-2.0-release-notes 
model = Sequential()

# Coonvolution docs: https://keras.io/layers/convolutional/
model.add(Convolution2D(32, (3,3), activation='relu', input_shape=(1,28,28), data_format='channels_first'))
model.add(Convolution2D(64, (5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
 
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [176]:
# 8. Compile model
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
 
# 9. Fit model on training data
model.fit(X_train, Y_train, 
          batch_size=25, epochs=25, verbose=1)
 
# 10. Evaluate model on test data
score = model.evaluate(X_test, Y_test, verbose=0)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25

KeyboardInterrupt: 