Copyright 2016 Google Inc. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

--------------------------------------

## Simple getting started

In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import os
import time

print(tf.__version__)

# define a utility function for generating a new directory in which to save 
# model information, so multiple training runs don't stomp on each other.
def get_new_path(name=""):
    base = os.path.abspath("/tmp/tfmodels/mnist_estimators")
    logpath = os.path.join(base, name + "_" + str(int(time.time())))
    print("Logging to {}".format(logpath))
    return logpath

In [None]:
# Train

DATA_SETS = input_data.read_data_sets(
    "/tmp/MNIST_data")

feature_columns = [tf.feature_column.numeric_column(
    "pixels", shape=784)]

m = tf.estimator.LinearClassifier(
        feature_columns=feature_columns, 
        n_classes=10,
        model_dir=get_new_path("linear")
    )

train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'pixels': DATA_SETS.train.images},
        y=DATA_SETS.train.labels.astype(np.int64),
        batch_size=100,
        num_epochs=3,
        shuffle=True)
m.train(input_fn=train_input_fn)

In [None]:
# Evaluate
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'pixels': DATA_SETS.test.images},
        y=DATA_SETS.test.labels.astype(np.int64),
        batch_size=100,
        num_epochs=1,
        shuffle=False)
results = m.evaluate(input_fn=eval_input_fn)

print(results)

In [None]:
# Bonus round 1: predictions

predict_input_fn = tf.estimator.inputs.numpy_input_fn(        
        x={'pixels': DATA_SETS.test.images[5000:5005]},
        batch_size=1,
        num_epochs=1,
        shuffle=False)
predictions = m.predict(input_fn=predict_input_fn)

for prediction in predictions:
    print("Predictions:    {} with probabilities {}\n".format(
        prediction["classes"], prediction["probabilities"]))
print('Expected answers values: {}'.format(
    DATA_SETS.test.labels[5000:5005]))

In [None]:
# Optional Bonus round 2: visualizing our predictions.
# This will fail if matplotlib is not installed. You can just skip it if so.

import matplotlib.pyplot as plt
%matplotlib inline

for i in range(5000,5005):
    sample = np.reshape(DATA_SETS.test.images[i], (28,28))
    plt.figure()
    plt.title("labeled class {}".format(DATA_SETS.test.labels[i]))
    plt.imshow(sample, 'gray')


## In-depth walk through and exploration

Let's look more closely at how to use TensorFlow's high-level Estimator classes to easily build a classifier with multiple hidden layers.

Optionally [download Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist#get-the-data) if you haven't already.

First, do some imports and set some variables:

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import time

import numpy
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# comment out for less info during the training runs.
tf.logging.set_verbosity(tf.logging.INFO)

# confirm what version of TensorFlow you are running
print('Running TensorFlow version {}'.format(tf.__version__))

In [None]:
# Set locations of data files
MNIST_DATA_DIR = "/tmp/MNIST_data"
# Edit the following to reflect where you put the Fashion-MNIST local dir,
# if you want to experiment with Fashion-MNIST too.
FASHION_DATA_DIR = "your-fashion_mnist-dir" 

# read in data, downloading first as necessary
DATA_SETS = input_data.read_data_sets(MNIST_DATA_DIR)

In [None]:
BATCH_SIZE = 40
# call with generate_input_fn(DATA_SETS.train) or generate_input_fn(DATA_SETS.test)

# These default settings will generate samples in the order of the file, forever.
def generate_input_fn(dataset, 
                      epochs=None, 
                      shuffle=False, 
                      batch_size=BATCH_SIZE):
    X = dataset.images
    Y = dataset.labels.astype(numpy.int64)
    return tf.estimator.inputs.numpy_input_fn(
        x={'pixels': X},
        y=Y,
        batch_size=batch_size,
        num_epochs=epochs,
        shuffle=shuffle
    )


We'll first define a function that adds a LinearClassifier and runs its `train()` method, which will train the model. Note that we didn't need to explicitly define a model graph or a training loop ourselves.  

Once we've trained the model, we run the `evaluate()` method, which uses the trained model. To do this, it loads the most recent checkpointed model info available.  The model checkpoint(s) will be generated during the training process.

In [None]:
def define_and_run_linear_classifier(num_steps, 
                                     logdir, 
                                     batch_size=BATCH_SIZE):
    """Run a linear classifier."""

    feature_columns = [tf.feature_column.numeric_column(
            "pixels", shape=784)]
    
    classifier = tf.estimator.LinearClassifier(
                    feature_columns=feature_columns, 
                    n_classes=10,
                    model_dir=logdir
                )
    classifier.train(input_fn=generate_input_fn(
        DATA_SETS.train, 
        epochs=3, 
        shuffle=True,
        batch_size=batch_size),
        steps=num_steps
        )
    
    print("Finished training.")
    
    # Evaluate accuracy.
    accuracy_score = classifier.evaluate(
        input_fn=generate_input_fn(
        DATA_SETS.test, 
        batch_size=batch_size, 
        shuffle=False, 
        epochs=1))['accuracy']
    
    print('Linear Classifier Accuracy: {0:f}'.format(accuracy_score))

Next, add a function that defines a `DNNClassifier`, and runs its `train()` method, which will train the model. Again note that we didn't need to explicitly define a model graph or a training loop ourselves.  

Then after we've trained the model, we run the classifier's `evaluate()` method, which uses the trained model. 

In [None]:
def define_and_run_dnn_classifier(num_steps, logdir, lr=.1, batch_size=40):
    """Run a DNN classifier."""
    feature_columns = [tf.feature_column.numeric_column(
        "pixels", shape=784)]
    
    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns, 
        n_classes=10,
        hidden_units=[200, 100, 50],
        optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=lr),
        model_dir=logdir
        )
    # After you've done a training run with optimizer learning rate 0.1,
        # change it to 0.5 and run the training again.  Use TensorBoard to take
        # a look at the difference.  You can see both runs by pointing it to the
        # parent model directory, which by default is:
        #
        #   tensorboard --logdir=/tmp/tfmodels/mnist_estimators
        
    classifier.train(input_fn=generate_input_fn(
        DATA_SETS.train, 
        epochs=3, 
        shuffle=True,
        batch_size=batch_size),
        steps=num_steps)

    print("Finished running the deep training via the train() method")
    
    accuracy_score = classifier.evaluate(input_fn=generate_input_fn(
        DATA_SETS.test, batch_size=batch_size, shuffle=False, epochs=1))['accuracy']

    print('DNN Classifier Accuracy: {0:f}'.format(accuracy_score))
    return classifier

Now we can call the functions that define and train our classifiers. (It takes a moment to set up the input data queue before the training starts).

Let's start with the LinearClassifier, which won't be very accurate. 


In [None]:
print("Running Linear classifier ...")
define_and_run_linear_classifier(num_steps=500, 
                                 logdir=get_new_path("linear"), 
                                 batch_size=40)
# With 1000 steps and a batch size of 40, we see accuracy of approx 91% for MNIST

Now, let's run the DNN Classifier.  First, let's try it with a .1 learning rate.

In [None]:
print("Running DNN classifier with .1 learning rate...")
classifier = define_and_run_dnn_classifier(
    num_steps=2000, 
    logdir=get_new_path("deep01"), 
    lr=.1)
# With 2000 steps and a batch size of 40, we see accuracy of approx 95% on MNIST

Optional: If you downloaded Fashion-MNIST, let's see what MNIST and Fashion-MNIST results look like side by side. Change the data directory param to point to your fashion-mnist dataset, and run the training again. Note that we're changing the model path (via the `get_new_path()` call) so that we don't write these results into the same directory as above.
(If you didn't yet download Fashion-MNIST, this cell will give an error).

In [None]:
DATA_SETS = input_data.read_data_sets(FASHION_DATA_DIR)
print("Running DNN classifier with Fashion-MNIST data and a .1 learning rate...")
fclassifier = define_and_run_dnn_classifier(
    num_steps=2000, 
    logdir=get_new_path("deep01f"), 
    lr=.1)

If you ran the Fashion-MNIST training, you can see that the accuracy is significantly worse. This dataset is harder! 

Now, let's run training with a .5 learning rate. This will use Fashion-MNIST if you reset the DATA_SET var above; otherwise, it will use regular MNIST.

In [None]:
print("Running DNN classifier with .5 learning rate...")
classifier5 = define_and_run_dnn_classifier(2000, 
                                            get_new_path("deep05"), 
                                            lr=.5)


Did this training run do better or worse than the .1 learning rate?

To compare your results, start up TensorBoard as follows in a new terminal window. (If you get a 'not found' error, make sure you've activated your virtual environment in that new window):

```sh
$ tensorboard --logdir=/tmp/tfmodels/mnist_estimators
```
Look for it at localhost:6006

Or run the following (select Kernel --> Interrupt from the menu when you're done):

In [None]:
!tensorboard --logdir=/tmp/tfmodels/mnist_estimators

We can again make some predictions using our trained models.  Assuming you ran the Fashion-MNIST training, `DATA_SETS` now points to that dataset.

In [None]:
print(DATA_SETS.test.labels[5000:5005])

In [None]:
X = DATA_SETS.test.images[5000:5005]
predict_input_fn = tf.estimator.inputs.numpy_input_fn(        
        x={'pixels': X},
        batch_size=1,
        num_epochs=1,
        shuffle=False)

# if you did not run the Fashion-MNIST training, edit the following to point
# to 'classifier' (the 'regular' MNIST model) instead of 'fclassifier'.
predictions = fclassifier.predict(input_fn=predict_input_fn)

for prediction in predictions:
    print("Predictions:    {} with probabilities {}\n".format(prediction["classes"], prediction["probabilities"]))

In [None]:
# Let's take a look at the images we're predicting on. Again, skip these two cells if
# matplotlib is not installed.
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
for i in range(5000,5005):
    sample = numpy.reshape(DATA_SETS.test.images[i], (28,28))
    plt.figure()
    plt.title("labeled class {}".format(DATA_SETS.test.labels[i]))
    plt.imshow(sample, 'gray')
