# Question 1.1: Binary Cross-Entropy Loss

### Section 0: Package initialisations, environment configuration and function definitions

Import relevant packages:

In [37]:
import tensorflow as tf
import numpy as np
import math

import time
import datetime

# Non-interactive plotting
import matplotlib.pyplot as plt
from IPython import display

# Interactive plotting
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.offline as pyo
from plotly.offline import download_plotlyjs

Configure environment:

In [38]:
%config InlineBackend.figure_format = 'retina'
np.set_printoptions(precision=3)

# Global Variables
CURRENT_DIR = '/Users/christophertee/Dropbox/University/MASc/Courses/Winter 2017' + \
'/ECE521 (Inference Algorithms & Machine Learning)/Assignment 2'
LOG_DIR = '/Logs'

# Activate Plotly Offline for Jupyter
pyo.init_notebook_mode(connected=True)

Load two-class notMNIST dataset into training, validation and testing datasets:

In [39]:
# Load notMNIST dataset
'''
 Training Set: 3500 images
 Validation Set: 100 images
 Test Set: 145 images
 Images are 28 x 28 (normalised) pixels
'''
with np.load("notMNIST.npz") as data:
    Data, Target = data ["images"], data["labels"]
    
    # Subsetting data for classes 'C' (2) and 'J' (9)
    posClass = 2 # 'C'
    negClass = 9 # 'J'
    dataIndx = (Target==posClass) + (Target==negClass)
    
    Data = Data[dataIndx] / 255.
    Target = Target[dataIndx].reshape(-1, 1)
    
    # Converts target labels to 'C' (0) and 'J' (1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    
    # Set random seed
    np.random.seed(521)
    
    # Generate and shuffle random index
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    
    Data = Data[randIndx]
    Target = Target[randIndx]
    
    # Flatten arrays of dimension m x 28 x 28 into array of dimension m x 784
    Data = Data.reshape(Data.shape[0], -1)
    
    # Standardizing inputs of dataset
    Data -= np.mean(Data, axis=0)
    Data /= np.std(Data, axis=0)
    
    # Partition data into training, validation and test datasets
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

### Create TensorFlow graph for linear model:

In [40]:
def buildLinearGraph(eta, lambda_):
    with tf.device('/cpu:0'):
        with tf.variable_scope('linear_regression'):
            # Model inputs
            with tf.name_scope('placeholders'):
                X = tf.placeholder(tf.float32, shape=[None, None], name='Input')
                Y = tf.placeholder(tf.float32, shape=[None, None], name='Target')

            # Model parameters
            with tf.name_scope('parameters'):
                W = tf.get_variable('weights', shape=[784, 1], initializer=tf.truncated_normal_initializer(stddev=0.5))
                b = tf.get_variable('biases', shape=[1, 1], initializer=tf.constant_initializer(0.0))
    
            with tf.device('/cpu:0'):
                # Predicted target
                with tf.name_scope('prediction'):
                    Yhat = tf.add(tf.matmul(X, W), b, 'pred')

                # Metrics
                with tf.name_scope('metrics'):
                    with tf.name_scope('error'):
                        error = tf.add(tf.reduce_mean(tf.nn.l2_loss(tf.subtract(Yhat, Y), name='elem_l2_loss'), \
                                                      name='total_l2_loss'), \
                                       tf.multiply(lambda_ / 2, tf.matmul(tf.transpose(W), W), name='l2_loss'), \
                                       name='total_loss')
                    with tf.name_scope('threshold'):
                        YhatThres = tf.cast(tf.greater_equal(Yhat, 0.5, name='pred_thres'), tf.float32)
                    with tf.name_scope('accuracy'):
                        accuracy = tf.truediv(tf.reduce_sum(tf.cast(tf.equal(YhatThres, Y), tf.int32), name='total_matches'), \
                                              tf.shape(X)[0], \
                                              name='accuracy')

        # Optimizer
        with tf.device('/cpu:0'):
            optimizer = tf.train.AdamOptimizer(eta).minimize(error)
    
    return W, b, X, Y, YhatThres, error, accuracy, optimizer

### Create TensorFlow graph for logistic model:

In [41]:
def buildLogisticGraph(eta, lambda_, opt):
    assert opt in ['GD', 'Adam']
    with tf.device('/cpu:0'):
        with tf.variable_scope('logistic_regression'):
            # Model inputs
            with tf.name_scope('placeholders'):
                X = tf.placeholder(tf.float32, shape=[None, None], name='input')
                Y = tf.placeholder(tf.float32, shape=[None, None], name='target')

            # Model parameters
            with tf.name_scope('parameters'):
                W = tf.get_variable('weights', shape=[784, 1], initializer=tf.truncated_normal_initializer(stddev=0.5))
                b = tf.get_variable('biases', shape=[1, 1], initializer=tf.constant_initializer(0.0))
    
            with tf.device('/cpu:0'):
                # Predicted target
                with tf.name_scope('prediction'):
                    Z = tf.add(tf.matmul(X, W), b, 'logits')
                    Yhat = tf.sigmoid(Z, name='pred')

                # Metrics
                with tf.name_scope('metrics'):
                    with tf.name_scope('error'):
                        error = tf.add(tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Y, name='elem_x_entropy'), \
                                                      name='total_x_entropy'), \
                                       tf.multiply(lambda_ / 2, tf.matmul(tf.transpose(W), W), name='l2_loss'), \
                                       name='total_loss')
                    with tf.name_scope('threshold'):
                        YhatThres = tf.cast(tf.greater_equal(Yhat, 0.5, name='pred_thres'), tf.float32)
                    with tf.name_scope('accuracy'):
                        accuracy = tf.truediv(tf.reduce_sum(tf.cast(tf.equal(YhatThres, Y), tf.int32), name='total_matches'), \
                                              tf.shape(X)[0], \
                                              name='accuracy')

        # Optimizer
        with tf.device('/cpu:0'):
            if opt == 'GD':
                optimizer = tf.train.GradientDescentOptimizer(eta).minimize(error)
            elif opt == 'Adam':
                optimizer = tf.train.AdamOptimizer(eta).minimize(error)
    
    return W, b, X, Y, YhatThres, error, accuracy, optimizer

### Create TensorFlow graph for multinomial logistic model:

In [42]:
def buildSoftmaxGraph(eta, lambda_):
    with tf.device('/cpu:0'):
        with tf.variable_scope('softmax'):
            # Model inputs
            with tf.name_scope('placeholders'):
                X = tf.placeholder(tf.float32, shape=[None, None], name='input')
                Y = tf.placeholder(tf.float32, shape=[None, None], name='target')

            # Model parameters
            with tf.name_scope('parameters'):
                W = tf.get_variable('weights', shape=[784, 10], initializer=tf.truncated_normal_initializer(stddev=0.5))
                b = tf.get_variable('biases', shape=[1, 10], initializer=tf.constant_initializer(0.0))
    
    with tf.device('/cpu:0'):
        # Predicted target
        with tf.name_scope('prediction'):
            Z = tf.add(tf.matmul(X, W), b, 'logits')
            Yhat = tf.nn.softmax(Z, name='activation')

        # Metrics
        with tf.name_scope('metrics'):
            with tf.name_scope('error'):
                with tf.name_scope('x_entropy'):
                    x_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z, \
                                                                                       labels=Y,\
                                                                                       name='elem_x_entropy'), \
                                               name='total_x_entropy')
                with tf.name_scope('l2_loss'):
                    l2_loss = tf.multiply(lambda_ / 2, \
                                          tf.reduce_sum(tf.matmul(tf.transpose(W), W)), name='l2_loss')
                with tf.name_scope('total_loss'):
                    error = tf.add(x_entropy, l2_loss, name='total_loss')
            with tf.name_scope('threshold'):
                YhatThres = tf.cast(tf.argmax(Yhat, axis=1, name='pred_threshold'), tf.float32)
            with tf.name_scope('accuracy'):
                Ycollapsed = tf.cast(tf.argmax(Y, axis=1), tf.float32, name='target_reverse_one_hot')
                accuracy = tf.truediv(tf.reduce_sum(tf.cast(tf.equal(YhatThres, Ycollapsed) \
                                                            , tf.int32), \
                                                    name='total_matches'), \
                                      tf.shape(X)[0], \
                                      name='accuracy')

    # Optimizer
    with tf.device('/cpu:0'):
        optimizer = tf.train.AdamOptimizer(eta).minimize(error)
    
    return W, b, X, Y, YhatThres, error, accuracy, optimizer

### Define Plotly interactive graph generation:

In [43]:
def etaIGraph(summary, title, filename):
    
    # Create subplot titles
    subplotTitle = ['Cross-Entropy Loss vs. Number of Updates', 'Classification Accuracy vs. Number of Updates']
    
    # Define subplot figure
    figure = tools.make_subplots(rows=2, cols=1, subplot_titles=(subplotTitle))

    # Define colour list as per Plotly's default colour list
    colorList = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    
    # Create plot for each summary
    runs = ['train', 'test']
    for i, run in enumerate(runs):
        crossEntropyTrace = go.Scatter(
            x = range(summary['numIter'] + 1),
            y = summary[run + 'Error'],
            name = run,
            marker = {'color': colorList[i]}
        )
        accuracyTrace = go.Scatter(
            x = range(summary['numIter'] + 1),
            y = summary[run + 'Accuracy'],
            name = run,
            marker = {'color': colorList[i]}
        )
        figure.append_trace(crossEntropyTrace, 1, 1)
        figure.append_trace(accuracyTrace, 2, 1)
        
    # Update subplot axes titles
    figure['layout']['xaxis1'].update(title = 'Number of Updates')
    figure['layout']['yaxis1'].update(title = 'Cross-Entropy Loss')
    figure['layout']['xaxis2'].update(title = 'Number of Updates')
    figure['layout']['yaxis2'].update(title = 'Classification Accuracy, %')
    
    # Update figure layout
    figure['layout'].update(
        height = 800,
        showlegend = False,
        title = title,
    )

    return py.iplot(figure, filename=filename)

### Define learning rate tuning function:

In [44]:
MAX_ITER = 10000
def tuneLearningRate(etaList, model, section_dir, batchSize=500, lambda_=0.01, opt='Adam'):    
    # Returns the i-th batch of training data and targets
    # Generates a new, reshuffled batch once all previous batches are fed
    def getNextTrainingBatch(currentIter, randIdx):
        currentBatchNum = currentIter % (trainData.shape[0] / batchSize)
        if currentBatchNum == 0:
            np.random.shuffle(randIdx)
        lowerBoundIdx = currentBatchNum * batchSize
        upperBoundIdx = (currentBatchNum + 1) * batchSize 
        return trainData[randIdx[lowerBoundIdx:upperBoundIdx]], trainTarget[randIdx[lowerBoundIdx:upperBoundIdx]]
    
    # Generate updated plots for training and validation MSE
    def plotErrGraph(errList, param):
        label = '$\eta$ = ' + str(param)
        label_classification = ['train.', 'valid.']

        display.clear_output(wait=True)
        plt.figure(figsize=(8,5), dpi=200)
        
        for i, err in enumerate(errList):
            plt.plot(range(len(err)), err, '-', label=label+' '+label_classification[i])
        
        plt.axis([0, MAX_ITER, 0, np.amax(errList)])
        plt.legend()
        plt.show()
    
    # Calculates the ratio between the n-th average epoch MSE and the (n-1)-th average epoch MSE
    def ratioAverageEpochMSE(currentValidErr):
        averageN = np.average(currentValidErr[-(np.arange(epochSize - 1) + 1)])
        averageNlessOne = np.average(currentValidErr[-(np.arange(epochSize - 1) + epochSize)])
        return averageN / averageNlessOne
    
    # Returns True if the average epoch validation MSE is at least 99.9% of the previous epoch average.
    # i.e. Returns True if the average learnings between epoch is less than +0.1%
    # Otherwise, returns False
    def shouldStopEarly(currentValidErr):
        if currentValidErr.shape[0] < 2 * epochSize:
            return False
        return True if (ratioAverageEpochMSE(currentValidErr) > 0.999) else False
    
    
    # Start of function
    summaryList = []
    randIdx = np.arange(trainData.shape[0])
    epochSize = trainData.shape[0] / batchSize
    randIdx = np.arange(trainData.shape[0])
    
    assert section_dir
    chapter_dir = '/Binary Loss/Section ' + section_dir + '/'
    current_time = '{:%b%d %H_%M_%S}'.format(datetime.datetime.now())
    
    for eta in etaList:
        # Reset graph to prevent duplication of ops and variables
        tf.reset_default_graph()
        
        # Build new graph
        assert model in ['linear', 'logistic', 'softmax']
        if model == 'linear':
            W, b, X, Y, YhatThres, error, accuracy, optimizer = buildLinearGraph(eta, lambda_)
        elif model == 'logistic':
            W, b, X, Y, YhatThres, error, accuracy, optimizer = buildLogisticGraph(eta, lambda_, opt)
        elif model == 'softmax':
            W, b, X, Y, YhatThres, error, accuracy, optimizer = buildSoftmaxGraph(eta, lambda_)
        
        # Begin session
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
            # Log starting time
            startTime = time.time()
            
            # Create summary writer
            writer = tf.summary.FileWriter(CURRENT_DIR + LOG_DIR + chapter_dir + current_time + \
                                           '/Eta-' + str(eta), \
                                           graph=sess.graph)
            
            # Initialise all TensorFlow variables
            tf.global_variables_initializer().run()

            # Creates blank training and validation MSE arrays for the Session
            currentTrainErr = np.array([])[:, np.newaxis]
            currentValidErr = np.array([])[:, np.newaxis]
            currentTestErr = np.array([])[:, np.newaxis]
            
            currentTrainAcc = np.array([])[:, np.newaxis]
            currentValidAcc = np.array([])[:, np.newaxis]
            currentTestAcc = np.array([])[:, np.newaxis]
    
            # Runs update
            currentIter = 0
            while currentIter < MAX_ITER:
                inputData, inputTarget = getNextTrainingBatch(currentIter, randIdx)
                
                _, trainErr, trainAcc = sess.run([optimizer, error, accuracy], feed_dict={X: inputData, Y: inputTarget})
                validErr, validAcc = sess.run([error, accuracy], feed_dict={X: validData, Y: validTarget})
                testErr, testAcc = sess.run([error, accuracy], feed_dict={X: testData, Y: testTarget})

                currentTrainErr = np.append(currentTrainErr, trainErr)
                currentValidErr = np.append(currentValidErr, validErr)
                currentTestErr = np.append(currentTestErr, testErr)
                
                currentTrainAcc = np.append(currentTrainAcc, trainAcc)
                currentValidAcc = np.append(currentValidAcc, validAcc)
                currentTestAcc = np.append(currentTestAcc, testAcc)
                
                # Update graph of training and validation MSE arrays
#                 if (currentIter < 3) or (currentIter % 1000 == 0):
#                     plotErrGraph([currentTrainErr, currentValidErr], eta)
                
                # At every epoch, check for early stopping possibilty. If so, breaks from while loop
                if currentIter % epochSize == 0:
                    if shouldStopEarly(currentValidErr):
                        writer.close()
                        break
                
                currentIter += 1
                
                if currentIter == MAX_ITER:
                    writer.close()
            
        # Save session results as dictionary and appends to MSEsummaryList
        summaryList.append(
            {
                'eta': eta,
                'B': batchSize,
                'lambda': lambda_,
                'optimizer': opt,
                'numIter': currentIter + 1,
                'epoch': float(currentIter + 1) / epochSize,
                'trainError': currentTrainErr,
                'validError': currentValidErr,
                'testError': currentTestErr,
                'trainAccuracy': currentTrainAcc,
                'validAccuracy': currentValidAcc,
                'testAccuracy': currentTestAcc
            }
        )
        
        # Print stats when one eta run is done
        print 'eta: %7.3f, numIter: %7d, validError: %.3f, testAcc: %.3f duration: %3.1fs' % \
            (summaryList[-1]['eta'], summaryList[-1]['numIter'], summaryList[-1]['validError'][-1], \
             np.mean(summaryList[-1]['testAccuracy'][-epochSize:]), time.time() - startTime)
            
    return summaryList

## Section 1: Tuning Learning Rate, $\eta$, for Gradient Descent Optimizer

In [9]:
etaList = [0.001, 0.01, 0.1, 1, 10]
summary1_1 = tuneLearningRate(etaList, model='logistic', section_dir='1.1', opt='GD')

eta:   0.001, numIter:    1254, validError: 1.176, testAcc: 0.862 duration: 8.8s
eta:   0.010, numIter:     673, validError: 0.805, testAcc: 0.931 duration: 5.0s
eta:   0.100, numIter:     260, validError: 0.523, testAcc: 0.986 duration: 2.1s
eta:   1.000, numIter:      78, validError: 0.261, testAcc: 0.993 duration: 0.9s
eta:  10.000, numIter:      22, validError: 3.298, testAcc: 0.986 duration: 0.5s


### Produce interactive 2x1 subplots for best learning rate, $\eta$:

In [24]:
fig1_1 = etaIGraph(summary1_1[3], \
                   '$ \\text{Graphs of Best Learning Rate } (\\eta=' + \
                   str(summary1_1[3]['eta']) + ') \\text{ using Gradient Descent optimizer}$',\
                   'A2Q1.1.1_LogisticSGD')
fig1_1

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



## Section 2: Tuning Learning Rate, $\eta$, for Adam Optimizer

In [11]:
etaList = [1e-4, 1e-3, 1e-2, 1e-1, 1]
summary1_2 = tuneLearningRate(etaList, model='logistic', section_dir='1.2', opt='Adam')

eta:   0.000, numIter:    1716, validError: 1.173, testAcc: 0.883 duration: 11.5s
eta:   0.001, numIter:    1471, validError: 0.327, testAcc: 0.966 duration: 10.0s
eta:   0.010, numIter:     253, validError: 0.298, testAcc: 0.979 duration: 2.2s
eta:   0.100, numIter:      15, validError: 1.397, testAcc: 0.979 duration: 0.8s
eta:   1.000, numIter:      43, validError: 2.115, testAcc: 0.979 duration: 0.8s


### Produce interactive 2x1 subplots for best learning rate, $\eta$:

In [23]:
fig1_2 = etaIGraph(summary1_2[2], \
                   '$ \\text{Graphs of Best Learning Rate } (\\eta=' + \
                   str(summary1_2[2]['eta']) + ') \\text{ using Adam optimizer}$',\
                   'A2Q1.1.2_LogisticAdam')
fig1_2

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



## Section 3: Comparing Linear and Logistic Models

In [13]:
etaList = [1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
print 'Linear Regression:'
summary1_3_linear = tuneLearningRate(etaList, model='linear', section_dir='1.3Lin', lambda_=0.0)
print '\nLogistic Regression:'
summary1_3_logistic = tuneLearningRate(etaList, model='logistic', section_dir='1.3Log', lambda_=0.0)

Linear Regression:
eta:   0.000, numIter:    7393, validError: 197.854, testAcc: 0.628 duration: 43.8s
eta:   0.001, numIter:    1065, validError: 395.130, testAcc: 0.552 duration: 6.7s
eta:   0.010, numIter:     253, validError: 119.346, testAcc: 0.690 duration: 1.9s
eta:   0.100, numIter:     134, validError: 145.433, testAcc: 0.814 duration: 1.1s
eta:   1.000, numIter:     183, validError: 159.603, testAcc: 0.759 duration: 1.5s
eta:  10.000, numIter:     120, validError: 14397.502, testAcc: 0.766 duration: 1.1s

Logistic Regression:
eta:   0.000, numIter:     855, validError: 0.271, testAcc: 0.917 duration: 6.2s
eta:   0.001, numIter:     414, validError: 0.310, testAcc: 0.959 duration: 3.1s
eta:   0.010, numIter:      57, validError: 0.152, testAcc: 0.938 duration: 0.8s
eta:   0.100, numIter:      15, validError: 1.188, testAcc: 0.986 duration: 0.6s
eta:   1.000, numIter:      15, validError: 4.711, testAcc: 0.972 duration: 0.6s
eta:  10.000, numIter:      22, validError: 47.081, t

Creating summary table for the two models above:

In [14]:
# Create summary table
print '%10s %10s %10s %10s %10s' % ('Model', 'trainAcc', 'validAcc', 'testAcc', 'eta')
print '%10s %10.3f %10.3f %10.3f %10.3f' % ('Linear', summary1_3_linear[2]['trainAccuracy'][-1], summary1_3_linear[2]['validAccuracy'][-1], \
                                        summary1_3_linear[2]['testAccuracy'][-1], summary1_3_linear[2]['eta'])
print '%10s %10.3f %10.3f %10.3f %10.3f' % ('Logistic', summary1_3_logistic[4]['trainAccuracy'][-1], summary1_3_logistic[4]['validAccuracy'][-1], \
                                        summary1_3_logistic[2]['testAccuracy'][-1], summary1_3_logistic[2]['eta'])

     Model   trainAcc   validAcc    testAcc        eta
    Linear      0.654      0.660      0.621      0.010
  Logistic      0.972      0.980      0.938      0.010


Generating dummmy cross-entropy and squared-error losses:

In [25]:
def generateDummyLosses():
    # Generate dummy data
    Yhat = np.linspace(0.01,1,100)[:, np.newaxis]
    Ydummy = 0.0

    # Calculate losses
    xEntropy = - Ydummy * np.log(Yhat) - (1 - Ydummy) * np.log(1 - Yhat)
    squaredErr = (Yhat - Ydummy)**2
    
    # Generate plot traces
    xEntropyTrace = go.Scatter(
        x = Yhat,
        y = xEntropy,
        name = 'Cross-Entropy'
    )
    squareErrTrace = go.Scatter(
        x = Yhat,
        y = squaredErr,
        name = 'Squared-Error'
    )
    
    data = go.Data([xEntropyTrace, squareErrTrace])
    
    # Generate figure layout
    layout = go.Layout(
        title = '$\\text{Comparison of Cross Entropy and Squared Error Losses on Dummy Variable } y = 0$',
        xaxis = {'title': '$\\hat{y}$'},
        yaxis = {'title': 'Loss'}
    )
    
    # Creates and plots figure
    figure = go.Figure(data=data, layout=layout)
    return py.iplot(figure, filename='A2Q1.1.3_LossComparison')

fig1_3 = generateDummyLosses()
fig1_3


divide by zero encountered in log



# Question 1.2: Multi-Class Classification using Softmax

Load notMNIST dataset into training, validation and testing datasets:

In [16]:
# Load notMNIST dataset
'''
 Training Set: 15,000 images
 Validation Set: 1,000 images
 Test Set: 2,724 images
 Images are 28 x 28 (normalised) pixels
'''
with np.load("notMNIST.npz") as data:
    Data, Target = data ["images"], data["labels"]
    
    # Set random seed
    np.random.seed(521)
    
    # Generate and shuffle random index
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    
    Data = Data[randIndx]/255.
    Target = Target[randIndx]
    
    # Generates one-hot version of target
    oneHot = np.zeros((Target.shape[0], 10))
    oneHot[np.arange(Target.shape[0]), Target] = 1
    Target = oneHot
    
    # Flatten arrays of dimension m x 28 x 28 into array of dimension m x 784
    Data = Data.reshape(Data.shape[0], -1)
    
    # Standardizing inputs of dataset
    Data -= np.mean(Data, axis=0)
    Data /= np.std(Data, axis=0)
    
    # Partition data into training, validation and test datasets
    trainData, trainTarget = Data[:15000], Target[:15000]
    validData, validTarget = Data[15000:16000], Target[15000:16000]
    testData, testTarget = Data[16000:], Target[16000:]

In [17]:
etaList = [1e-4, 1e-3, 1e-2, 1e-1, 1]
summary2_3 = tuneLearningRate(etaList, model='softmax', section_dir='2.3')

eta:   0.000, numIter:    8371, validError: 1.060, testAcc: 0.841 duration: 200.0s
eta:   0.001, numIter:    1921, validError: 0.867, testAcc: 0.853 duration: 46.4s
eta:   0.010, numIter:     451, validError: 0.807, testAcc: 0.856 duration: 11.4s
eta:   0.100, numIter:     121, validError: 2.091, testAcc: 0.873 duration: 3.6s
eta:   1.000, numIter:     121, validError: 18.852, testAcc: 0.873 duration: 3.5s


In [26]:
fig2_3 = etaIGraph(summary2_3[2],\
                   '$ \\text{Graphs of Multiclass Classification with Best Learning Rate } (\\eta=' + \
                   str(summary2_3[2]['eta']) + ')$',\
                   'A2Q1.2.3_Softmax')
fig2_3

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



In [19]:
fig2_3 = etaIGraph(summary2_3[2],\
                   '$ \\text{Graphs of Multiclass Classification with Best Learning Rate } (\\eta=' + \
                   str(summary2_3[2]['eta']) + ')$',\
                   'A2Q2.3_bestEtaGraph')
fig2_3

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]

