# Question 2: Digit Classifier using Linear Regression
##### with Stochastic Gradient Descent

### Section 0: Data preparation & function definitions

Import relevant packages:

In [1]:
import tensorflow as tf
import numpy as np
import math

import time

# Non-interactive plotting
import matplotlib.pyplot as plt
from IPython import display

# Interactive plotting
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as pyo
from plotly.offline import download_plotlyjs

Configure environment:

In [2]:
np.set_printoptions(precision=3)
np.random.seed(521)

# Defines Global Variable
GRAPH_EXISTS = False

%config InlineBackend.figure_format = 'retina'

# Activate Plotly Offline for Jupyter
pyo.init_notebook_mode(connected=True)

Load Tiny MNIST dataset:

In [3]:
with np.load ("tinymnist.npz") as data:
    trainData, trainTarget = data ["x"], data["y"]
    validData, validTarget = data ["x_valid"], data ["y_valid"]
    testData, testTarget = data ["x_test"], data ["y_test"]

### Creating TensorFlow graph:

In [4]:
def buildGraph(eta, lambda_, reuseBool):
    # Model inputs
    with tf.device('/cpu:0'):
        with tf.variable_scope('linear_regression', reuse=reuseBool):
            with tf.name_scope('placeholders'):
                X = tf.placeholder(tf.float32, shape=[None, None], name='Input')
                Y = tf.placeholder(tf.float32, shape=[None, None], name='Target')
            
            # Model variables
            with tf.name_scope('parameters'):
                W = tf.get_variable('weights', shape=[64, 1], initializer=tf.truncated_normal_initializer(stddev=0.5))
                b = tf.get_variable('biases', shape=[1, 1], initializer=tf.constant_initializer(0.0))

            # Model parameters
            with tf.name_scope('hyperparameters'):
                eta = tf.get_variable('learning_rate', shape=[], initializer=tf.constant_initializer(eta))
                lambda_ = tf.get_variable('L2_regularizer', shape=[], initializer=tf.constant_initializer(lambda_))
#                 eta = tf.constant(eta, name='Learning_Rate')
#                 lambda_ = tf.constant(lambda_, name='L2_Regularizer')

            # Predicted target
            with tf.name_scope('prediction'):
                Y_hat = tf.matmul(X, W) + b

            # Mean squared error
            with tf.name_scope('metrics'):
                MSE = tf.add(tf.scalar_mul(tf.divide(1.0, tf.cast(tf.shape(X)[0], tf.float32)), \
                                      tf.reduce_sum(tf.square(Y_hat - Y))), \
                             tf.scalar_mul(tf.divide(tf.cast(lambda_, tf.float32), 2.0), \
                                           tf.matmul(tf.transpose(W), W)), \
                             name='MSE')

                # Basic accuracy definition (n_correct / n_total)
                Y_hat_thresholded = tf.cast(tf.greater_equal(Y_hat, 0.5), tf.float32, name='pred_thresholded')
                accuracy = tf.divide(tf.reduce_sum(tf.cast(tf.equal(Y_hat_thresholded, Y), tf.float64)), \
                                     tf.cast(tf.shape(X)[0], tf.float64), \
                                    name='accuracy')

        # Basic gradient descent optimizer
        optimizer = tf.train.GradientDescentOptimizer(eta).minimize(MSE)
        
    return W, b, X, Y, Y_hat, MSE, accuracy, optimizer

Builds/Initializes graph for the first time:

In [5]:
assert GRAPH_EXISTS == False
buildGraph(1, 1, GRAPH_EXISTS)

(<tensorflow.python.ops.variables.Variable at 0x1315f2d90>,
 <tensorflow.python.ops.variables.Variable at 0x1315f2e10>,
 <tf.Tensor 'linear_regression/placeholders/Input:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'linear_regression/placeholders/Target:0' shape=(?, ?) dtype=float32>,
 <tf.Tensor 'linear_regression/prediction/add:0' shape=(?, 1) dtype=float32>,
 <tf.Tensor 'linear_regression/metrics/MSE:0' shape=(1, 1) dtype=float32>,
 <tf.Tensor 'linear_regression/metrics/accuracy/truediv:0' shape=() dtype=float64>,
 <tensorflow.python.framework.ops.Operation at 0x13188a450>)

## Section 1: Tuning learning rate, $\eta$

In [6]:
MAX_ITER = 2000
def tuneLearningRate(etaList, batchSize=50, lambda_=1):    
    # Returns the i-th batch of training data and targets
    # Generates a new, reshuffled batch once all previous batches are fed
    def getNextTrainingBatch(currentIter):
        currentBatchNum = currentIter % (trainData.shape[0] / batchSize)
        if currentBatchNum == 0:
            np.random.shuffle(randIdx)
        # print 'Iteration: %4d, BatchCap: %2d, BatchNum: %2d' % (currentIter, trainData.shape[0] / batchSize, currentBatchNum)
        lowerBoundIdx = currentBatchNum * batchSize
        upperBoundIdx = (currentBatchNum + 1) * batchSize 
        return trainData[lowerBoundIdx:upperBoundIdx], trainTarget[lowerBoundIdx:upperBoundIdx]
    
    # Generate updated plots for training and validation MSE
    def plotMSEGraph(MSEList, param):
        label = '$\eta$ = ' + str(param)
        label_classification = ['train.', 'valid.']

        display.clear_output(wait=True)
        plt.figure(figsize=(8,5), dpi=200)
        
        for i, MSE in enumerate(MSEList):
            plt.plot(range(len(MSE)), MSE, '.', markersize=3, label=label+' '+label_classification[i])
        
        plt.axis([0, MAX_ITER, 0, np.amax(MSEList)])
        plt.legend()
        plt.show()
    
    # Calculates the ratio between the n-th average epoch MSE and the (n-1)-th average epoch MSE
    def ratioAverageEpochMSE(currentValidMSE):
        averageN = np.average(currentValidMSE[-(np.arange(epochSize - 1) + 1)])
        averageNlessOne = np.average(currentValidMSE[-(np.arange(epochSize - 1) + epochSize)])
        return averageN / averageNlessOne
    
    # Returns True if the average epoch validation MSE is at least 99% of the previous epoch average.
    # i.e. Returns True if the average learnings between epoch is less than +1%
    # Otherwise, returns False
    def shouldStopEarly(currentValidMSE):
        if currentValidMSE.shape[0] < 2 * epochSize:
            return False
        return True if (ratioAverageEpochMSE(currentValidMSE) >= 0.99) else False
    
    
    # Start of function
    summaryList = []
    randIdx = np.arange(trainData.shape[0])
    epochSize = trainData.shape[0] / batchSize
    
    for eta in etaList:
        W, b, X, Y, Y_hat, MSE, accuracy, optimizer = buildGraph(eta, lambda_, reuseBool=True)

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
            writer = tf.summary.FileWriter('/Users/christophertee/Dropbox/University/MASc/Courses/Winter 2017' + \
                                '/ECE521 (Inference Algorithms & Machine Learning)/Assignment 1/Logs/Train', \
                                graph=sess.graph)
            tf.global_variables_initializer().run()

            # Creates blank training and validation MSE arrays for the Session
            currentTrainMSE = np.array([])[:, np.newaxis]
            currentValidMSE = np.array([])[:, np.newaxis]
    
            # Runs update
            currentIter = 0
            while currentIter <= MAX_ITER:
                inputData, inputTarget = getNextTrainingBatch(currentIter)
                
                _, trainError = sess.run([optimizer, MSE], feed_dict={X: inputData, Y: inputTarget})
                validError = sess.run([MSE], feed_dict={X: validData, Y: validTarget})

                currentTrainMSE = np.append(currentTrainMSE, trainError)
                currentValidMSE = np.append(currentValidMSE, validError)
                
                # Update graph of training and validation MSE arrays
                if (currentIter < 3) or (currentIter % 500 == 0):
                    pass
                    # writer.add_summary(trainSummary, currentIter)
                # plotMSEGraph([currentTrainMSE, currentValidMSE], eta)
                
                # At every epoch, check for early stopping possibilty. If so, breaks from while loop
                if currentIter % epochSize == 0:
                    if shouldStopEarly(currentValidMSE):
                        writer.close()
                        break
                
                currentIter += 1
            
        # Save session results as dictionary and appends to MSEsummaryList
        summaryList.append(
            {
                'eta': eta,
                'B': batchSize,
                'lambda': lambda_,
                'numIter': currentIter + 1,
                'epoch': float(currentIter + 1) / (trainData.shape[0] / batchSize),
                'trainMSE': currentTrainMSE,
                'validMSE': currentValidMSE,
            }
        )
            
    return summaryList

In [7]:
etaList = [0.001, 0.01, 0.1]
tunedEtaSummary = tuneLearningRate(etaList)

# Output summary table
for summary in tunedEtaSummary:
    print 'eta: %.3f, numIter: %d, validMSE: %.3f' % (summary['eta'], summary['numIter'], summary['validMSE'][-1])

eta: 0.001, numIter: 1835, validMSE: 0.293
eta: 0.010, numIter: 365, validMSE: 0.180
eta: 0.100, numIter: 71, validMSE: 0.182


### Produce interactive graph for best learning rate, $\eta$:

In [8]:
def etaIGraph(tunedEtaSummary):
    # Create plot for each summary
    traceList = []
    for summary in tunedEtaSummary:
        traceList.append(
            go.Scatter(
                x = range(summary['numIter'] + 1),
                y = summary['trainMSE'],
                name = '$\\eta = ' + str(summary['eta']) + '$'
            )
        )
    data = go.Data(traceList)
    
    # Create figure layout
    layout = go.Layout(
        title = '$\\textit{Training performance for various learning rates, } \\eta$',
        xaxis = {'title': 'Number of Updates'},
        yaxis = {'title': 'Training MSE'},
    )

    figure = go.Figure(data=data, layout=layout)
    return py.iplot(figure, filename='A1Q2.1_bestEtaGraph')
fig2_1 = etaIGraph(tunedEtaSummary)
fig2_1

## Section 2: Tuning mini-batch size, $B$

In [14]:
MAX_ITER = 6000
def tuneBatchSize(etaList, batchSizeList, lambda_=1):
    # Returns the i-th batch of training data and targets
    # Generates a new, reshuffled batch once all previous batches are fed
    def getNextTrainingBatch(currentIter):
        currentBatchNum = currentIter % (trainData.shape[0] / batchSize)
        if currentBatchNum == 0:
            np.random.shuffle(randIdx)
        # print currentBatchNum + 1
        lowerBoundIdx = currentBatchNum * batchSize
        upperBoundIdx = (currentBatchNum + 1) * batchSize 
        return trainData[lowerBoundIdx:upperBoundIdx], trainTarget[lowerBoundIdx:upperBoundIdx]
    
    # Generate updated plots for training and validation MSE
    def plotMSEGraph(MSEList, param):
        label = '$B$ = ' + str(param[0]) + ', $\eta$: ' + str(param[1])
        label_classification = ['train.', 'valid.']

        display.clear_output(wait=True)
        plt.figure(figsize=(8,5), dpi=200)
        
        for i, MSE in enumerate(MSEList):
            plt.plot(range(len(MSE)), MSE, '.', markersize=3, label=label+'\n'+label_classification[i])
        
        plt.axis([0, MAX_ITER, 0, np.amax(MSEList)])
        plt.legend()
        plt.show()
    
    # Calculates the ratio between the n-th average epoch MSE and the (n-1)-th average epoch MSE
    def ratioAverageEpochMSE(currentValidMSE):
        averageN = np.average(currentValidMSE[-(np.arange(epochSize - 1) + 1)])
        averageNlessOne = np.average(currentValidMSE[-(np.arange(epochSize - 1) + epochSize)])
        return averageN / averageNlessOne
    
    # Returns True if the average epoch validation MSE is at least 99% of the previous epoch average.
    # i.e. Returns True if the average learnings between epoch is less than +1%
    # Otherwise, returns False
    def shouldStopEarly(currentValidMSE):
        if currentValidMSE.shape[0] < 2 * epochSize:
            return False
        return True if (ratioAverageEpochMSE(currentValidMSE) >= 0.99) else False
    
    summaryList = []
    randIdx = np.arange(trainData.shape[0])
    
    for batchSize in batchSizeList:
        epochSize = trainData.shape[0] / batchSize
        batchSummary = []
        for eta in etaList:
            W, b, X, Y, Y_hat, MSE, accuracy, optimizer = buildGraph(eta, lambda_)

            with tf.Session() as sess:
                tf.global_variables_initializer().run()

                # Creates blank training and validation MSE arrays for the Session
                currentTrainMSE = np.array([])[:, np.newaxis]
                currentValidMSE = np.array([])[:, np.newaxis]

                # Runs update
                currentIter = 0
                while currentIter <= MAX_ITER:
                    inputData, inputTarget = getNextTrainingBatch(currentIter)

                    _, trainError = sess.run([optimizer, MSE], feed_dict={X: inputData, Y: inputTarget})
                    validError = sess.run([MSE], feed_dict={X: validData, Y: validTarget})

                    currentTrainMSE = np.append(currentTrainMSE, trainError)
                    currentValidMSE = np.append(currentValidMSE, validError)

                    # Update graph of training and validation MSE arrays
                    if (currentIter < 3) or (currentIter % 500 == 0):
                        pass
                        # plotMSEGraph([currentTrainMSE, currentValidMSE], [batchSize, eta])

                    # At every epoch, check for early stopping possibilty. If so, breaks from while loop
                    if currentIter % epochSize == 0:
                        if shouldStopEarly(currentValidMSE):
                            break

                    currentIter += 1

            # Save session results as dictionary and appends to MSEsummaryList
            batchSummary.append(
                {
                    'eta': eta,
                    'B': batchSize,
                    'lambda': lambda_,
                    'numIter': currentIter + 1,
                    'epoch': float(currentIter + 1) / (trainData.shape[0] / batchSize),
                    'trainMSE': currentTrainMSE,
                    'validMSE': currentValidMSE,
                }
            )
        summaryList.append(batchSummary)
            
    return summaryList

In [19]:
etaList = [0.001, 0.01, 0.1]
batchSizeList = [10, 50, 100, 700]
tunedBatchSizeSummary = tuneBatchSize(etaList, batchSizeList)

# Output summary table:
for batchSummary in tunedBatchSizeSummary:
    for summary in batchSummary:
        print 'B: %5d, eta: %5.3f, numIter: %5d, validMSE: %3.3f' % \
            (summary['B'], summary['eta'], summary['numIter'], summary['validMSE'][-1])

B:    10, eta: 0.001, numIter:  2801, validMSE: 0.228
B:    10, eta: 0.010, numIter:   491, validMSE: 0.213
B:    10, eta: 0.100, numIter:   211, validMSE: 0.231
B:    50, eta: 0.001, numIter:  1877, validMSE: 0.325
B:    50, eta: 0.010, numIter:   323, validMSE: 0.218
B:    50, eta: 0.100, numIter:    57, validMSE: 0.217
B:   100, eta: 0.001, numIter:   806, validMSE: 1.002
B:   100, eta: 0.010, numIter:   260, validMSE: 0.228
B:   100, eta: 0.100, numIter:    43, validMSE: 0.215
B:   700, eta: 0.001, numIter:  6002, validMSE: 0.213
B:   700, eta: 0.010, numIter:  6002, validMSE: 0.213
B:   700, eta: 0.100, numIter:  6002, validMSE: 0.213


### Produce interactive graph for each mini-batch size, $B$:

In [None]:
def batchSizeIGraphSubplot(tunedBatchSizeSummary):
    
    # Define subplot title
    subplotTitle = []
    for batchSummary in tunedBatchSizeSummary:
        subplotTitle.append('Batch Size, B  = ' + str(batchSummary[0]['B']))
    
    # Define subplot figure
    figure = tools.make_subplots(rows=4, cols=1, subplot_titles=(subplotTitle))
    
    # Define color list
    colorList = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    
    # Create plot for each summary
    for i, batchSummary in enumerate(tunedBatchSizeSummary):
        traceList = []
        for j, summary in enumerate(batchSummary):
            trace = go.Scatter(
                x = range(summary['numIter'] + 1),
                y = summary['trainMSE'],
                marker = {'color': colorList[j]},
                name = '$B=' + str(summary['B']) + ', \\eta=' + str(summary['eta']) + '$'
            )
            figure.append_trace(trace, i + 1, 1)
        figure['layout']['xaxis'+str(i+1)].update(title='Number of Updates')
        figure['layout']['yaxis'+str(i+1)].update(title='Training MSE')

    # Create figure layout
    figure['layout'].update(
        height = 1800,
        title = '$\\textit{Model training performance for various batch size, } B' + \
                '\\textit{, and learning rate, } \\eta$',
        showlegend = False
    )

    return py.iplot(figure, filename='A1Q2.2_batch_size_subplot2x2')
fig2_2_visual = batchSizeIGraphSubplot(tunedBatchSizeSummary)
fig2_2_visual

## Section 3: Tuning $\ell_2$ regularizer, $\lambda$, using validation data

In [None]:
MAX_ITER = 2000
def tuneLambda(lambdaList, eta=0.1, batchSize=50):
    # Returns the i-th batch of training data and targets
    # Generates a new, reshuffled batch once all previous batches are fed
    def getNextTrainingBatch(currentIter):
        currentBatchNum = currentIter % (trainData.shape[0] / batchSize)
        if currentBatchNum == 0:
            np.random.shuffle(randIdx)
        lowerBoundIdx = currentBatchNum * batchSize
        upperBoundIdx = (currentBatchNum + 1) * batchSize 
        return trainData[lowerBoundIdx:upperBoundIdx], trainTarget[lowerBoundIdx:upperBoundIdx]
    
    # Generate updated plots for training and validation MSE
    def plotMSEGraph(MSEList, param):
        label = '$\lambda$ = ' + str(param)
        label_classification = ['train.', 'valid.']

        display.clear_output(wait=True)
        plt.figure(figsize=(8,5), dpi=200)
        
        for i, MSE in enumerate(MSEList):
            plt.plot(range(len(MSE)), MSE, '-', label=label+' '+label_classification[i])
        
        plt.axis([0, MAX_ITER, 0, np.amax(MSEList)])
        plt.legend()
        plt.show()
    
    # Calculates the ratio between the n-th average epoch MSE and the (n-1)-th average epoch MSE
    def ratioAverageEpochMSE(currentValidMSE):
        averageN = np.average(currentValidMSE[-(np.arange(epochSize - 1) + 1)])
        averageNlessOne = np.average(currentValidMSE[-(np.arange(epochSize - 1) + epochSize)])
        return averageN / averageNlessOne
    
    # Returns True if the average epoch validation MSE is at least 99% of the previous epoch average.
    # i.e. Returns True if the average learnings between epoch is less than +1%
    # Otherwise, returns False
    def shouldStopEarly(currentValidMSE):
        if currentValidMSE.shape[0] < 2 * epochSize:
            return False
        return True if (ratioAverageEpochMSE(currentValidMSE) >= 0.99) else False
    
    summaryList = []
    randIdx = np.arange(trainData.shape[0])
    epochSize = trainData.shape[0] / batchSize
    
    for lambda_ in lambdaList:
        W, b, X, Y, Y_hat, MSE, accuracy, optimizer = buildGraph(eta, lambda_)

        with tf.Session() as sess:
            tf.global_variables_initializer().run()
            
            # Creates blank training and validation MSE arrays for the Session
            currentTrainMSE = np.array([])[:, np.newaxis]
            currentValidMSE = np.array([])[:, np.newaxis]
            
            # Runs update
            currentIter = 0
            while currentIter <= MAX_ITER:
                inputData, inputTarget = getNextTrainingBatch(currentIter)
                
                _, trainError = sess.run([optimizer, MSE], feed_dict={X: inputData, Y: inputTarget})
                validError = sess.run([MSE], feed_dict={X: validData, Y: validTarget})

                currentTrainMSE = np.append(currentTrainMSE, trainError)
                currentValidMSE = np.append(currentValidMSE, validError)
                
                # Update graph of training and validation MSE arrays
                if (currentIter < 3) or (currentIter % 500 == 0):
                    plotMSEGraph([currentTrainMSE, currentValidMSE], lambda_)
                
                # At every epoch, check for early stopping possibilty. If so, breaks from while loop
                if currentIter % epochSize == 0:
                    if shouldStopEarly(currentValidMSE):
                        break
                
                currentIter += 1
            
            # Compute validation and test accuracy
            validAccuracy = sess.run(accuracy, feed_dict={X: validData, Y: validTarget})
            testAccuracy = sess.run(accuracy, feed_dict={X: testData, Y: testTarget})
            
        # Save session results as dictionary and appends to MSEsummaryList
        summaryList.append(
            {
                'eta': eta,
                'B': batchSize,
                'lambda': lambda_,
                'numIter': currentIter + 1,
                'epoch': float(currentIter + 1) / (trainData.shape[0] / batchSize),
                'trainMSE': currentTrainMSE,
                'validMSE': currentValidMSE,
                'validAccuracy': validAccuracy,
                'testAccuracy': testAccuracy
            }
        )
            
    return summaryList

In [None]:
lambdaList = [0.0, 0.0001, 0.001, 0.01, 0.1, 1.0]
tunedLambdaSummary = tuneLambda(lambdaList)

# Output summary table
for summary in tunedLambdaSummary:
    print 'lambda: %5.4f, numIter: %5d, validMSE: %5.3f, validAcc: %3.3f, testAcc: %3.3f' % \
        (summary['lambda'], summary['numIter'], summary['validMSE'][-1], summary['validAccuracy'], summary['testAccuracy'])

### Produce interactive graph for validation set accuracy vs $\lambda$:

In [None]:
def lambdaIGraph(tunedLambdaSummary):
    # Create plot for each summary
    trace1 = go.Scatter(
        x = [np.log10(summary['lambda'] + 1e-5) for summary in tunedLambdaSummary],
        y = [summary['validAccuracy'] for summary in tunedLambdaSummary],
        name = 'Validation set accuracy'
    )
    
    trace2 = go.Scatter(
        x = [np.log10(summary['lambda'] + 1e-5) for summary in tunedLambdaSummary],
        y = [summary['testAccuracy'] for summary in tunedLambdaSummary],
        name = 'Test set accuracy'
    )
    
    data = go.Data([trace1, trace2])
    
    # Create figure layout
    layout = go.Layout(
        title = '$\\textit{Validation and Test set accuracy vs. } \\lambda$',
        xaxis = {'title': '$\\log_{10}(\\lambda)$'},
        yaxis = {'title': 'Model Accuracy'},
        annotations = [
            dict(
                text = '$\\textit{Used to represent } \\log_{10}(\\lambda=0)$',
                x = -5,
                y = 0.90,
            )
        ]
    )

    figure = go.Figure(data=data, layout=layout)
    return py.iplot(figure, filename='A1Q2.3_accuracyVsLambda')
fig2_3 = lambdaIGraph(tunedLambdaSummary)
fig2_3