# TensorFlow Tutorial #01

# Simple Linear Regression Model

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import confusion_matrix 

In [5]:
# Load data
from mnist import MNIST
data = MNIST(data_dir='data/MNIST/')
print("Size of:")
print("- Training-set:\t\t{}".format(data.num_train))
print("- Validation-set:\t{}".format(data.num_val))
print("- Test-set:\t\t{}".format(data.num_test))

Size of:
- Training-set:		55000
- Validation-set:	5000
- Test-set:		10000


In [6]:
# Store some data information
# The number of pixels in each dimension of image
img_size = None

# The images are stored in one-dimensional arrays of this length.
img_size_flat = None

# Tuple with height and width of images used to reshape arrays
img_shape = None

# Number of classes, one class for each of 10 digits.
num_classes = None

# Number of colour channels for the images: 1 channel for gray-scale.
num_channels = None

## Helper function for plotting images

Function used to plot 9 images in a 3x3 grid, and writing the true and predicted classes below each images

In [None]:
def plot_images(images, cls_true, cls_pred=None):
    assert len(images) == len(cls_true) == 9
    pass

In [None]:
# Testcase for plot_images function
# Get the first 9 images from the test-set
images = None

# Get the true classes for those images.
cls_true = None

# Plot the images and labels using our helper-function above
plot_images(images=images, cls_true=cls_true)

## TensorFlow graph

Why using TensorFlow:

- TensorFlow can be more efficient than NumPy because TensorFlow knows the entire computation graph, while NumPy only knows the computation of a single mathematical operation at a time.

- TensorFlow can also automatically calculate the gradients needed to optimize the variables of the graph so as to make the model perform better (The gradient of entire graph can be calculated using chain-rule for derivatives)

- TensorFlow takes advantages of multi-core CPUs as well as GPUs.

A TensorFlow graph consists of the following parts which will be detailed below:

- **Placeholder variables** used to feed input into the graph.
- **Model variables** that are going to be optimized so far as to make the model perform better.
- **Model** which is essentially just a mathematical function that calculates some output given the input in the **placeholder variables** and the **model variables**.
- **Cost measure** that can be used to guide the optimization variables.
- **An optimization method** which updates the variables of the model.



In [None]:
# Placeholder variables
# Input image: size=img_size_flat
x = None
# Label: size=num_classes (one-hot encoder)
y_true = None
# True class of each image
y_true_cls = None

In [None]:
# Variables to be optimized
# Weights: initialized with zeros and shape is (img_size_flat x num_classes)
weights = None
# Biases: 1-Dimensional tensor of length (num_classes)
biases = None

In [None]:
# Model: Y = XW + B
logits = None

In [None]:
# Softmax: activation function
y_pred = None
# Archive max value of y_pred
y_pred_cls = None

### Cost-function to be optimized

To make the model better at classifying the input images, we must know somehow change the variable for `weights` and `biases`. To do this we first need to know how well the model currently performs by comparing the predicted output of the model `y_pred` and the desired output `y_true`.

The cross-entropy is a performance measure used in classification. The cross-entropy is a continuous function that is always positive and if the predicted output of the model exactly matches the desired output then the cross-entropy equals to zero. The goal of optimization is to minimize the cross-entropy so it gets as close to zero as possible by changine the `weights` and `biases` of the model.

In [None]:
# Cost function: softmax_cross_entropy_with_logits_v2
cross_entropy = None
# Take the average of the cross-entropy for all image classifications
cost = None

In [None]:
# Optimization method: GradientDescentOptimizer
optimizer = None

### Performance measures

We need a few more performance measures to display the progress to the user. This is a vector of booleans whether the predicted class equals the true class of each image.

In [None]:
# Compare y_pred_cls - y_true_cls and return boolean
correct_prediction = None

# Calculate the classification accuracy by type-casting the booleans to floats: False-0 and True-1 
# and then calculating the average of these numbers.
accuracy = None

## TensorFlow Run

In [None]:
# Create TensorFlow session
session = tf.Session()
session.run(tf.global_variables_initializer())

## Helper function to perform the optimization iterations

There are 55000 images in the training-set. It takes a long time to calculate the gradient of the model using all these images. Therefore, we use Stochastic Gradient Descent (SGD) which only uses a small batch of images in each iteration of the optimizer.

In [None]:
batch_size = 100

Function for performing a number of optimization iterations so as to gradually improve the `weights` and `biases` of the model. In each iteration, a new batch of data is selected from the training-set and then TensorFlow executes the optimizer using those training samples.

In [None]:
def optimize(num_iterations):
    for i in range(num_iterations):
        # Get a batch of training examples.
        # x_batch holds a batch of images,
        # y_true_batch are the true labels for those images.
        x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
        
        # Put the batch into a dict for placeholder variables in TensorFlow graph
        # Note that the placeholder for y_true_cls is not set because it is not
        # used during training.
        feed_dict_train = {x: x_batch,
                           y_true: y_true_batch}
        
        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train to the placeholder
        # variables and then run the optimizer.
        session.run(optimizer, feed_dict=feed_dict_train)

## Helper functions to show performance

In [None]:
feed_dict_test = {x: data.x_test,
                  y_true: data.y_test,
                  y_true_cls: data.y_test_cls}

In [None]:
def print_accuracy():
    # Use TensorFlow to compute the accuracy
    acc = session.run(accuracy, feed_dict=feed_dict_test)
    print("Accuracy on test-set: {0:.1%}".format(acc))

In [None]:
def print_confusion_matrix():
    # Get the true classification for the test-set.
    cls_true = None
    
    # Get the predicted classification for the test-set.
    cls_pred = None
    
    # Get the confusion matrix using sklearn.
    cm = confusion_matrix(y_true=cls_true,
                          y_pred=cls_pred)
    print(cm)
    
    # Plor the confusion matrix as an image.
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    
    # Make various adjustments to the plot.
    plt.tight_layout()
    plt.colorbar()
    tick_marks = np.arange(num_classes)
    plt.xticks(tick_marks, range(num_classes))
    plt.yticks(tick_marks, range(num_classes))
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    

In [None]:
def plot_example_errors():
    # Use TensorFlow to get a list of boolean values
    # whether each test-image has been correctly classified,
    # and a list for the predicted class of each image.
    correct, cls_pred = session.run([correct_prediction, y_pred_cls],
                                    feed_dict=feed_dict_test)
    
    # Negate the boolean array.
    incorrect = None
    
    # Get the images from the test-set that have been incorrectly classified
    images = None
    
    # Get predicted classed for those images.
    cls_pred = None
    
    # Get the true classed for those images.
    cls_true = None
    
    # Plot the first 9 images.
    plot_images(images=images[0:9],
                cls_true=cls_true[0:9],
                cls_pred=cls_pred[0:9])

In [None]:
def plot_weights():
    # Get the values for the weights from TensorFlow variable.
    w = session.run(weights)
    
    # Get the lowest and highest values for the weights.
    # This is used to correct the colour intensity across the images so they
    # can be compared with each other.
    w_min = np.min(w)
    w_max = np.max(w)
    
    # Create figure with 3x4 sub-plots, where the last 2 sub-plots are unused.
    fig, axes = plt.subplots(3,4)
    fig.subplots_adjust(hspace=0.3, wspace=0.3)
    
    for i, ax in enumerate(axes.flat):
        # Only use the weights for the first 10 sub-plots.
        if i<10:
            # Get the weights for the i'th digit and reshape it
            # Note that w.shape == (img_size_flat, 10)
            image = w[:,i].reshape(img_shape)
            
            # Set the label for the sub-plot.
            ax.set_xlabel("Weights: {}".format(i))
            
            # Plot the image
            ax.imshow(image, vmin=w_min, vmax=w_max, cmap='seismic')
        
        # Remove ticks from each sub-plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    # Ensure the plot is shown correctly with multiple plots in a single Notebook cell.
    plt.show()
            

## Performance

### After 1 optimization iteration

In [None]:
optimize(num_iterations=1)
print_accuracy()
print("Example errors:")
plot_example_errors()
print("Weights")
plot_weights()

### After 10 optimization iterations

In [None]:
optimize(num_iterations=9)
print_accuracy()
print("Example errors:")
plot_example_errors()
print("Weights")
plot_weights()

### After 1000 optimization iterations

In [None]:
optimize(num_iterations=990)
print_accuracy()
print("Example errors:")
plot_example_errors()
print("Weights")
plot_weights()

In [None]:
print_confusion_matrix()

In [None]:
session.close()