# MNIST

The `Hello World` program for machine learning.

Which is used to read handwriting numbers in images.

For example, the following 4 images have handwriting number in each image:
![MNIST](assets/MNIST.png)

And we are going to write a model to recognize them, by using `softmax regression`. 

## Download the data sets

Run the code below to import the functions for data set management

In [3]:
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for downloading and reading MNIST data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gzip
import os
import tensorflow.python.platform
import numpy
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
def maybe_download(filename, work_directory):
  """Download the data from Yann's website, unless it's already here."""
  if not os.path.exists(work_directory):
    os.mkdir(work_directory)
  filepath = os.path.join(work_directory, filename)
  if not os.path.exists(filepath):
    filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  return filepath
def _read32(bytestream):
  dt = numpy.dtype(numpy.uint32).newbyteorder('>')
  return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def extract_images(filename):
  """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2051:
      raise ValueError(
          'Invalid magic number %d in MNIST image file: %s' %
          (magic, filename))
    num_images = _read32(bytestream)
    rows = _read32(bytestream)
    cols = _read32(bytestream)
    buf = bytestream.read(rows * cols * num_images)
    data = numpy.frombuffer(buf, dtype=numpy.uint8)
    data = data.reshape(num_images, rows, cols, 1)
    return data
def dense_to_one_hot(labels_dense, num_classes=10):
  """Convert class labels from scalars to one-hot vectors."""
  num_labels = labels_dense.shape[0]
  index_offset = numpy.arange(num_labels) * num_classes
  labels_one_hot = numpy.zeros((num_labels, num_classes))
  labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
  return labels_one_hot
def extract_labels(filename, one_hot=False):
  """Extract the labels into a 1D uint8 numpy array [index]."""
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    magic = _read32(bytestream)
    if magic != 2049:
      raise ValueError(
          'Invalid magic number %d in MNIST label file: %s' %
          (magic, filename))
    num_items = _read32(bytestream)
    buf = bytestream.read(num_items)
    labels = numpy.frombuffer(buf, dtype=numpy.uint8)
    if one_hot:
      return dense_to_one_hot(labels)
    return labels
class DataSet(object):
  def __init__(self, images, labels, fake_data=False, one_hot=False,
               dtype=tf.float32):
    """Construct a DataSet.
    one_hot arg is used only if fake_data is true.  `dtype` can be either
    `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into
    `[0, 1]`.
    """
    dtype = tf.as_dtype(dtype).base_dtype
    if dtype not in (tf.uint8, tf.float32):
      raise TypeError('Invalid image dtype %r, expected uint8 or float32' %
                      dtype)
    if fake_data:
      self._num_examples = 10000
      self.one_hot = one_hot
    else:
      assert images.shape[0] == labels.shape[0], (
          'images.shape: %s labels.shape: %s' % (images.shape,
                                                 labels.shape))
      self._num_examples = images.shape[0]
      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      assert images.shape[3] == 1
      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2])
      if dtype == tf.float32:
        # Convert from [0, 255] -> [0.0, 1.0].
        images = images.astype(numpy.float32)
        images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
    self._epochs_completed = 0
    self._index_in_epoch = 0
  @property
  def images(self):
    return self._images
  @property
  def labels(self):
    return self._labels
  @property
  def num_examples(self):
    return self._num_examples
  @property
  def epochs_completed(self):
    return self._epochs_completed
  def next_batch(self, batch_size, fake_data=False):
    """Return the next `batch_size` examples from this data set."""
    if fake_data:
      fake_image = [1] * 784
      if self.one_hot:
        fake_label = [1] + [0] * 9
      else:
        fake_label = 0
      return [fake_image for _ in xrange(batch_size)], [
          fake_label for _ in xrange(batch_size)]
    start = self._index_in_epoch
    self._index_in_epoch += batch_size
    if self._index_in_epoch > self._num_examples:
      # Finished epoch
      self._epochs_completed += 1
      # Shuffle the data
      perm = numpy.arange(self._num_examples)
      numpy.random.shuffle(perm)
      self._images = self._images[perm]
      self._labels = self._labels[perm]
      # Start next epoch
      start = 0
      self._index_in_epoch = batch_size
      assert batch_size <= self._num_examples
    end = self._index_in_epoch
    return self._images[start:end], self._labels[start:end]
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32):
  class DataSets(object):
    pass
  data_sets = DataSets()
  if fake_data:
    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)
    data_sets.train = fake()
    data_sets.validation = fake()
    data_sets.test = fake()
    return data_sets
  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
  TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
  VALIDATION_SIZE = 5000
  local_file = maybe_download(TRAIN_IMAGES, train_dir)
  train_images = extract_images(local_file)
  local_file = maybe_download(TRAIN_LABELS, train_dir)
  train_labels = extract_labels(local_file, one_hot=one_hot)
  local_file = maybe_download(TEST_IMAGES, train_dir)
  test_images = extract_images(local_file)
  local_file = maybe_download(TEST_LABELS, train_dir)
  test_labels = extract_labels(local_file, one_hot=one_hot)
  validation_images = train_images[:VALIDATION_SIZE]
  validation_labels = train_labels[:VALIDATION_SIZE]
  train_images = train_images[VALIDATION_SIZE:]
  train_labels = train_labels[VALIDATION_SIZE:]
  data_sets.train = DataSet(train_images, train_labels, dtype=dtype)
  data_sets.validation = DataSet(validation_images, validation_labels,
                                 dtype=dtype)
  data_sets.test = DataSet(test_images, test_labels, dtype=dtype)
  return data_sets

Read data sets into the memory.

In [4]:
mnist = read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## MNIST Data Set

http://yann.lecun.com/exdb/mnist/

The data set has been downloaded to ./MNIST_data, which has a training set of 60,000 examples, and a test set of 10,000 examples. The digits have been size-normalized and centered in a fixed-size image (28*28):


![MNIST_Matrix](assets/MNIST-Matrix.png)


We can flatten each array into a 28∗28=784 dimensional vector. Each component of the vector is a value between zero and one describing the intensity of the pixel. Thus, we generally think of MNIST as being a collection of 784-dimensional vectors.

(The image will lose some information after been flattened, but it's not important for this example.)

### The image set

Since each image is a 784 dimensional vector, the whole image set can be seen as a 2 dimensional tensor:
 - the first dimension is the index of images
 - the second dimension is the index of the pixels
 
Then each element in the tensor reprecents one pixel of a image.

![MNIST_mnist-train-xs](assets/mnist-train-xs.png)

### The lable set

We use [0...9] array (the one-hot vector) to represent the numbers from 0 to 9, each image has one of 1 and nine of 0, then we got the lable set as the following:

![MNIST_mnist-train-ys](assets/mnist-train-ys.png)

## Softmax Regression

Now we need to create the connection between the data set and the label set. By using softmax model, we can give different portabilities to different objects.

More information about the softmax function can be found here: http://neuralnetworksanddeeplearning.com/chap3.html#softmax


## Define the Model


Create an `evidence` for the image by generating the weighted sum of each pixel value, and also adds a bias.

Given image `x`, the `evidence` for number `i` is:

\begin{equation}
evidence_x = \sum_j w_{i,j}x_j + b_i
\end{equation}

 - w<sub>i</sub>: the weigh for number `i`
 - b<sub>i</sub>: the bias value for number `i`
 - x<sub>i</sub>: the pixel value of image `x`
 
Then we can use softmax to transform this `evidence` into portabilities.
 
\begin{equation}
y = softmax(evidence)
\end{equation}

Actually we can discribe this as a data graph flow:

![MNIST_softmax-regression-scalargraph](assets/softmax-regression-scalargraph.png)

And also in the matrix form:

\begin{equation}
\begin{vmatrix}y_1\\y_2\\y_3\\...\\y_{10}\end{vmatrix} = softmax \left( \begin{vmatrix}x_1\\x_2\\x_3\\...\\x_{784}\end{vmatrix}\begin{vmatrix}w_{1,1}&w_{1,2}&w_{1,3}&...&w_{1,10}\\w_{2,1}&w_{2,2}&w_{2,3}&...&w_{2,10}\\w_{3,1}&w_{3,2}&w_{3,3}&...&w_{3,10}\\...&...&...&...&...\\w_{784,1}&w_{784,2}&w_{784,3}&...&w_{784,10}\end{vmatrix}
 +
\begin{vmatrix}b_1\\b_2\\b_3\\...\\b_{10}\end{vmatrix}
\right)
\end{equation}

Or in a simpler way:

\begin{equation}
y = softmax \left( Wx + b \right)
\end{equation}

So `softmax` is the `activation` function for this model.

## Implementing the Regression with Tensorflow

In [None]:
import tensorflow as tf

# Define x as placeholders because we want to apply different images
x = tf.placeholder(tf.float32, [None, 784])

# Define W and b as variables, their values are what the model needs to learn
W = tf.Variable(tf.random_normal([784,10]))
b = tf.Variable(tf.zeros([10]))

# Apply activation function
y = tf.nn.softmax(tf.matmul(x,W) + b)

# And it's done!

## Train the Model

To train the model, we need a `standard` to tell us that if the model is good or bad.

In machine learning we typically define what it means for a model to be bad. We call this the `cost`, or the `loss`, and it represents how far off our model is from our desired outcome. We try to minimize that error, and the smaller the error margin, the better our model is.

One very common, very nice function to determine the loss of a model is called `cross-entropy`. Cross-entropy arises from thinking about information compressing codes in information theory but it winds up being an important idea in lots of areas, from gambling to machine learning. It’s defined as:

\begin{equation}
H_{y’}y=\sum_i y’_ilog(y)
\end{equation}

Where `y` is our predicted probability distribution, and `y’` is the true distribution (the one-hot vector with the digit labels). In some rough sense, the cross-entropy is measuring how inefficient our predictions are for describing the truth. Going into more detail about cross-entropy is beyond the scope of this tutorial, but it’s well worth understanding.


In [None]:
# Define y’
y_ = tf.placeholder("float", [None,10])
# Define cross entropy
cross_entropy = tf.reduce_sum(y_*tf.log(y))
# Define train step
# Here we use `gradient descent algorithm` for the regression
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

# Init variables
init = tf.global_variables_initializer()

# Start session and train
sess = tf.Session()
sess.run(init)
for i in range(1000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print('done trainning')

Each step of the loop, we get a `batch` of 100 random data points from our training set. We run train_step feeding in the batches data to replace the placeholders.

Using small batches of random data is called `stochastic training` – in this case, `stochastic gradient descent`. 

Ideally, we’d like to use all our data for every step of training because that would give us a better sense of what we should be doing, but that’s expensive. So, instead, we use a different subset every time. Doing this is cheap and has much of the same benefit.



## Evaluate the Model

Use the test data set to evaluate the model.

Well, first let’s figure out where we predicted the correct label. `tf.argmax` is an extremely useful function which gives you the index of the highest entry in a tensor along some axis. For example, tf$argmax(y, 1L) is the label our model thinks is most likely for each input, while tf$argmax(y_, 1L) is the correct label. We can use tf$equal to check if our prediction matches the truth.

In [None]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
sess.close()

## Improve the model

The accurity is pretty bad, because The raw formulation of cross-entropy

```
cross_entropy = tf.reduce_sum(y_*tf.log(y))
```
can be numerically unstable.

So here we use tf.losses.sparse_softmax_cross_entropy on the raw.


In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('./MNIST')

# Create the model
x = tf.placeholder(tf.float32, [None, 784], name='x')
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.add(tf.matmul(x, W), b, name='model')

# Define loss and optimizer
y_ = tf.placeholder(tf.int64, [None])

# The raw formulation of cross-entropy,
#
#   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
#                                 reduction_indices=[1]))
#
# can be numerically unstable.
#
# So here we use tf.losses.sparse_softmax_cross_entropy on the raw
# outputs of 'y', and then average across the batch.
cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Train
for _ in range(1000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(
  accuracy, feed_dict={
      x: mnist.test.images,
      y_: mnist.test.labels
  }))

  from ._conv import register_converters as _register_converters


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./MNIST/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ./MNIST/train-labels-idx1-ubyte.gz
Extracting ./MNIST/t10k-images-idx3-ubyte.gz
Extracting ./MNIST/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
0.9188


The accuracy is much better now but still very poor for production usage.

To further improve the accuracy, we need to change the model.

See who is the best in MNIST: http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#4d4e495354

## Save the Model

In [2]:
import tensorflow as tf
from tensorflow.python.util import compat
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.saved_model import tag_constants

builder = saved_model_builder.SavedModelBuilder('./models/mnist/1')
builder.add_meta_graph_and_variables(
      sess, [tag_constants.SERVING],
      signature_def_map={
        "model": tf.saved_model.signature_def_utils.predict_signature_def(
          inputs= {"x": x },
          outputs= {"y": y }
        )
      })
builder.add_meta_graph([tag_constants.SERVING], strip_default_attrs=True)
builder.save()
print('Model saved')
sess.close()

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ./models/mnist/1/saved_model.pb
Model saved


## Use the model

### Conver a image



In [3]:
from PIL import Image, ImageFilter


def imageprepare(argv):
    """
    This function returns the pixel values.
    The imput is a png file location.
    """
    im = Image.open(argv).convert('L')
    width = float(im.size[0])
    height = float(im.size[1])
    newImage = Image.new('L', (28, 28), (255))  # creates white canvas of 28x28 pixels

    if width > height:  # check which dimension is bigger
        # Width is bigger. Width becomes 20 pixels.
        nheight = int(round((20.0 / width * height), 0))  # resize height according to ratio width
        if (nheight == 0):  # rare case but minimum is 1 pixel
            nheight = 1
            # resize and sharpen
        img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
        wtop = int(round(((28 - nheight) / 2), 0))  # calculate horizontal position
        newImage.paste(img, (4, wtop))  # paste resized image on white canvas
    else:
        # Height is bigger. Heigth becomes 20 pixels.
        nwidth = int(round((20.0 / height * width), 0))  # resize width according to ratio height
        if (nwidth == 0):  # rare case but minimum is 1 pixel
            nwidth = 1
            # resize and sharpen
        img = im.resize((nwidth, 20), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
        wleft = int(round(((28 - nwidth) / 2), 0))  # caculate vertical pozition
        newImage.paste(img, (wleft, 4))  # paste resized image on white canvas

    # newImage.save("sample.png

    tv = list(newImage.getdata())  # get pixel values

    # normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
    tva = [(255 - x) * 1.0 / 255.0 for x in tv]
#     print(tva)
    return tva

x1=imageprepare('./assets/5.jpg') # 28*28
x2=imageprepare('./assets/4.png') # screenshot
x3=imageprepare('./assets/2.png') #file path here
print(len(x3))# mnist IMAGES are 28x28=784 pixels

784


### Load a saved model and test the image

In [6]:
import tensorflow as tf
import numpy as np
from tensorflow.saved_model import tag_constants

export_dir = './models/mnist/1'
with tf.Session(graph=tf.Graph()) as sess:  
  tf.saved_model.loader.load(sess, [tag_constants.SERVING], export_dir)
  graph = tf.get_default_graph()
  x = graph.get_tensor_by_name('x:0')
  y = graph.get_tensor_by_name('model:0')
  result = tf.argmax(tf.nn.softmax(y), 1)
  print(sess.run(
  result, feed_dict={
      x: [x1, x2, x3],
  }))
  print('done')

INFO:tensorflow:Restoring parameters from ./models/mnist/1/variables/variables
[5 4 2]
done
