In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
with np.load('./notMNIST.npz') as data :
    Data, Target = data ["images"], data["labels"]
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data = Data[randIndx]/255.
    Target = Target[randIndx]
    trainData, trainTarget = Data[:15000], Target[:15000]
    validData, validTarget = Data[15000:16000], Target[15000:16000]
    testData, testTarget = Data[16000:], Target[16000:]

In [3]:
trainZeros=np.zeros((15000, 10))
trainZeros[np.arange(15000),trainTarget]=1
trainTarget = trainZeros
validZeros=np.zeros((1000, 10))
validZeros[np.arange(1000),validTarget]=1
validTarget = validZeros
testZeros=np.zeros((2724, 10))
testZeros[np.arange(2724),testTarget]=1
testTarget = testZeros

In [14]:
#Hyperparameters to chnage

np.random.seed(seed=60749150)

learning_rate = np.asscalar(np.exp(np.random.uniform(low=-7.5, high=-4.5, size=1)))

num_layers = int(np.random.randint(1,6,1))

hidden_units = int(np.random.randint(100,500,1))

weight_decay = np.asscalar(np.exp(np.random.uniform(low=-7.5, high=-4.5, size=1)))

#0 no dropout, 1 dropout

dropout = np.random.randint(0,2,1)

print('Learning Rate:',learning_rate,'Number Layers:',num_layers,'Hidden Units:', hidden_units,'Weight Decay:',weight_decay,\
      'No Dropout',dropout)

n_epochs = 5000
batch_size = 500
n_dim = 28*28

Learning Rate: 0.0024606117272511774 Number Layers: 5 Hidden Units: 371 Weight Decay: 0.0036402635633598934 No Dropout [0]


In [5]:
def grab_batches(trainData, trainTarget, batch_size):
    batch_indices = np.random.permutation(range(15000)).reshape(-1, batch_size)
    X_batches = trainData.reshape(-1, n_dim)[batch_indices]
    y_batches = trainTarget[batch_indices]
    batches = zip(X_batches, y_batches)
    return batches

In [6]:
X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,10])

In [19]:
def hidden_layer(X, hidden_units, dropout=True, activation=True):
    x_dimension = X.shape[1].value
    initializer = tf.contrib.layers.xavier_initializer(uniform=False)
    hidden_weights = tf.Variable(initializer([x_dimension, hidden_units]), name='weights')
    hidden_biases = tf.Variable(tf.zeros(hidden_units), name='biases')
    output = tf.add(tf.matmul(X, hidden_weights), hidden_biases)
    activations = tf.nn.relu(output) if activation else output
    activations = tf.nn.dropout(activations, keep_prob=0.5) if dropout else activations
    return activations, hidden_weights

In [22]:
from tensorflow.contrib.keras import models
Ws = []
y_, W = hidden_layer(X, hidden_units, dropout)
Ws.append(W)
for _ in range(num_layers-1):
    y_, W = hidden_layer(y_, hidden_units, dropout)
    Ws.append(W)
y_, W = hidden_layer(y_, 10, dropout=False, activation=False)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y_))
regularizer = 0
for W in Ws:
    regularizer += tf.nn.l2_loss(W)
    
loss = tf.reduce_mean(loss + weight_decay * regularizer)

prediction = tf.cast(tf.round(tf.argmax(y_,1)), tf.int8)
equality = tf.equal(prediction, tf.cast(tf.argmax(Y,1), tf.int8))
accuracy = tf.reduce_mean(tf.cast(equality, tf.float32))

training_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
init = tf.global_variables_initializer()

In [23]:
def plot_metrics(learning_rate):
    for metric in ['loss', 'accuracy']:
        plt.figure(figsize=(12,9))
        plt.title('{} vs. num. of epochs for learning rate of {:.5}'.format(metric, learning_rate))
        for dataset in ['train', 'test', 'valid']:
            plt.plot(range(1, epoch+1), metrics[dataset][metric], label=dataset)
        plt.axvline(x=best_epoch[metric], linewidth=1, linestyle='dashed', color='k', label='early stopping')
        plt.legend()
        plt.savefig('figures/lr_{:.8}_{}.png'.format(learning_rate, metric), dpi=300, bbox_inches='tight')

In [25]:
with tf.Session() as sess:
    sess.run(init)
    valid_metrics = {'loss': [], 'accuracy': []}
    train_metrics = {'loss': [], 'accuracy': []}
    test_metrics = {'loss': [], 'accuracy': []}
    metrics = {'train': train_metrics, 'valid': valid_metrics, 'test': test_metrics}

    best_epoch = {'loss': 0, 'accuracy': 0}
    best_loss = float('inf')
    best_accuracy = 0
    epochs_since_best = 0

    print("Learning Rate: {} \n".format(learning_rate))
    for epoch in range(1,n_epochs+1):
        batches = grab_batches(trainData, trainTarget, batch_size)
        for X_batch, y_batch in batches:
            sess.run(training_step, feed_dict={X: X_batch, Y: y_batch})

        train_loss, train_accuracy = sess.run([loss, accuracy], {X: trainData.reshape(-1,n_dim), Y: trainTarget})
        valid_loss, valid_accuracy = sess.run([loss, accuracy], {X: validData.reshape(-1,n_dim), Y: validTarget})
        test_loss, test_accuracy = sess.run([loss, accuracy], {X: testData.reshape(-1,n_dim), Y: testTarget})



        if valid_loss < best_loss:
            best_epoch['loss'] = epoch
            best_loss = valid_loss

        if valid_accuracy > best_accuracy:
            best_epoch['accuracy'] = epoch
            best_accuracy = valid_accuracy
            epochs_since_best = 0
        else:
            epochs_since_best += 1

        if epoch%10 == 0:
            print("Epoch: {}, Train Loss: {:.4}, Train Accuracy: {:.4}".format(epoch, train_loss, train_accuracy))
            print("Best Epoch: {}, Best Valid Loss: {:.4}, Best Valid Accuracy: {:.4} \n".format(best_epoch, best_loss, best_accuracy))

        metrics['train']['loss'].append(train_loss)
        metrics['train']['accuracy'].append(train_accuracy)
        metrics['valid']['loss'].append(valid_loss)
        metrics['valid']['accuracy'].append(valid_accuracy)
        metrics['test']['loss'].append(test_loss)
        metrics['test']['accuracy'].append(test_accuracy)

        if epoch < 20 and epochs_since_best > 15:
            print("Learning rate too high")
            break
    plot_metrics(learning_rate)



Learning Rate: 0.0024606117272511774 

Epoch: 10, Train Loss: 0.5156, Train Accuracy: 0.9381
Best Epoch: {'accuracy': 7, 'loss': 10}, Best Valid Loss: 0.5785, Best Valid Accuracy: 0.915 



KeyboardInterrupt: 