# Deep Learning for Engineers


## First Some Helpers

In [1]:

def print_row(cols, max_length=20):
    col_width = max_length
    print("".join(str(word).ljust(col_width) for word in cols))


## Demo 1: Logistic Regression using scikit-learn 

First we load the data into memory. The data only contains a small fraction of the notMNIST dataset.

In [2]:
def load_data(pickle_file):
    import pickle

    try:
        with open(pickle_file, 'rb') as f:
            return pickle.load(f)
    except Exception as e:
        print('Unable to load data from', pickle_file, ':', e)
        raise

data = load_data('partial_notMNIST.pickle')
train_dataset = data['train_dataset']
train_labels = data['train_labels']
test_dataset = data['test_dataset']
test_labels = data['test_labels']
valid_dataset = data['valid_dataset']
valid_labels = data['valid_labels']

Now, we can use the logistic regression implementation from scikit-learn:

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation

length = len(train_dataset)

def cross_validate(samples, classifier=SGDClassifier(n_jobs=-1, n_iter=1000)):
    td = train_dataset[:samples].reshape(-1, 28*28)
    tl = train_labels[:samples]

    score = cross_validation.cross_val_score(classifier, td, tl, cv=5)
    print("%d\t%s" % (samples, score))

          
cross_validate(1000, classifier=LogisticRegression(random_state=413,multi_class='multinomial', solver='newton-cg'))



1000	[ 0.77073171  0.78217822  0.73366834  0.78787879  0.74489796]


## Demo 2: Tensor Flow



In [4]:
import numpy as np
import tensorflow as tf

graph = tf.Graph()
with graph.as_default():
    A = tf.constant(np.array([1,2,3,4,5]))
    B = tf.constant(np.array([1,2,3,4,5]))
    R = A + B


In [5]:
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    res = session.run([R])
    print(res)
    

[array([ 2,  4,  6,  8, 10])]


## Demo 3: Logistic Regression using TensorFlow

Let's implement the logistic regression using tensor flow.

In [6]:
import numpy as np

image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (10000, 784) (10000, 10)
Validation set (2000, 784) (2000, 10)
Test set (1000, 784) (1000, 10)


In [7]:
train_subset = 10000

graph = tf.Graph()
with graph.as_default():
  # Load the Data
  tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
  tf_train_labels = tf.constant(train_labels[:train_subset])
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Linear Model
  weights = tf.Variable(tf.truncated_normal([image_size * image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  
  # Training computation.
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predict!
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [8]:
num_steps = 1000

def accuracy(predictions, labels):
  return (np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
  print_row(['Step', 'Loss', 'A-Train', 'A-Valid'])
  tf.global_variables_initializer().run()
  for step in range(num_steps):
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      train_acc = accuracy(predictions, train_labels[:train_subset, :])
      valid_acc = accuracy(valid_prediction.eval(), valid_labels)                    
             
      print_row([step, l, train_acc, valid_acc])

  print('Test accuracy: %.3f' % accuracy(test_prediction.eval(), test_labels))

Step                Loss                A-Train             A-Valid             
0                   16.0209             0.1082              0.149               
100                 2.29736             0.72                0.727               
200                 1.87817             0.7497              0.743               
300                 1.64424             0.7626              0.7485              
400                 1.48009             0.7698              0.751               
500                 1.35528             0.7755              0.753               
600                 1.25669             0.7825              0.758               
700                 1.17646             0.7863              0.7605              
800                 1.10953             0.7909              0.7575              
900                 1.0526              0.7959              0.7555              
Test accuracy: 0.814


## Demo 4: Stochastic Gradient Descent


In [9]:
train_subset = 128

graph = tf.Graph()
with graph.as_default():
  # Load the Data
  tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
  tf_train_labels = tf.constant(train_labels[:train_subset])
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Linear Model
  weights = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  
  # Training computation.
  logits = tf.matmul(tf_train_dataset, weights) + biases
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predict!
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [10]:
num_steps = 3000
batch_size = 128

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print_row(['Step', 'Loss', 'Acc-Train', 'Acc-Valid'])
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if (step % 500 == 0):
      train_acc = accuracy(predictions, batch_labels)
      valid_acc = accuracy(valid_prediction.eval(), valid_labels)
        
      print_row([step, l, train_acc, valid_acc])
  print("Test accuracy: %.4f%%" % accuracy(test_prediction.eval(), test_labels))

Step                Loss                Acc-Train           Acc-Valid           
0                   23.2552             0.03125             0.0555              
500                 0.625185            0.84375             0.746               
1000                2.0469              0.6484375           0.7675              
1500                1.24211             0.7890625           0.7695              
2000                0.542968            0.8125              0.774               
2500                0.75939             0.7890625           0.7675              
Test accuracy: 0.8330%


## Demo 4: First Neural Network

In [11]:
batch_size = 128
num_relu = 1024

graph = tf.Graph()
with graph.as_default():
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights_1 = tf.Variable(tf.truncated_normal([image_size * image_size, num_relu]))
  biases_1 = tf.Variable(tf.zeros([num_relu]))
  
  hidden_input = tf.matmul(tf_train_dataset, weights_1) + biases_1
  hidden_output = tf.nn.relu(hidden_input)

  weights_2 = tf.Variable(tf.truncated_normal([num_relu, num_labels]))
  biases_2 = tf.Variable(tf.zeros([num_labels]))

  # Training computation.
  logits = tf.matmul(hidden_output, weights_2) + biases_2

  # Loss to optimize
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset,weights_1)+biases_1), weights_2) + biases_2)
  test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset,weights_1)+biases_1), weights_2) + biases_2)


In [12]:
num_steps = 4001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print_row(['Step', 'Loss', 'Acc-Train', 'Acc-Valid'])
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if (step % 500 == 0):
      test_acc = accuracy(predictions, batch_labels)
      valid_acc = accuracy(valid_prediction.eval(), valid_labels)
        
      print_row([step, l, test_acc, valid_acc])
  print("Test accuracy: %.4f" % accuracy(test_prediction.eval(), test_labels))

Step                Loss                Acc-Train           Acc-Valid           
0                   354.213             0.1328125           0.373               
500                 0.84551             0.9453125           0.808               
1000                0.411211            0.9765625           0.8245              
1500                0.0949894           0.984375            0.823               
2000                0.909019            0.984375            0.8245              
2500                9.31322e-09         1.0                 0.827               
3000                1.21072e-08         1.0                 0.828               
3500                0.0                 1.0                 0.825               
4000                0.0                 1.0                 0.828               
Test accuracy: 0.8820


## Demo 5: Regularization



In [13]:
batch_size = 256
num_relu = 1024

graph = tf.Graph()
with graph.as_default():

  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  weights_1 = tf.Variable(tf.truncated_normal([image_size * image_size, num_relu]))
  biases_1 = tf.Variable(tf.zeros([num_relu]))
  
  hidden_input = tf.matmul(tf_train_dataset, weights_1) + biases_1
  hidden_output = tf.nn.relu(hidden_input)
  regularized_output_1 = tf.nn.dropout(hidden_output, 0.5)

  weights_2 = tf.Variable(tf.truncated_normal([num_relu, num_labels]))
  biases_2 = tf.Variable(tf.zeros([num_labels]))

  # Training computation.
  logits = tf.matmul(regularized_output_1, weights_2) + biases_2

  # Loss to optimize
  beta = tf.constant(0.001)
  l2_w1 = beta * tf.nn.l2_loss(weights_1)
  l2_w2 = beta * tf.nn.l2_loss(weights_2)
  logits = tf.matmul(hidden_output, weights_2) + biases_2 
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) + l2_w1 + l2_w2
  
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset,weights_1)+biases_1), weights_2) + biases_2)
  test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset,weights_1)+biases_1), weights_2) + biases_2)


In [14]:
num_steps = 4000

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print_row(['Step', 'Loss', 'Acc-Train', 'Acc-Valid'])
  for step in range(num_steps):
    # Pick Minibatch
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if step % 1000 == 0:
      test_acc = accuracy(predictions, batch_labels)
      valid_acc = accuracy(valid_prediction.eval(), valid_labels)
        
      print_row([step, l, test_acc, valid_acc])
  print("Test accuracy: %.4f%%" % accuracy(test_prediction.eval(), test_labels))

Step                Loss                Acc-Train           Acc-Valid           
0                   639.007             0.1015625           0.2475              
1000                114.794             1.0                 0.8265              
2000                42.2288             1.0                 0.8255              
3000                15.5775             1.0                 0.8405              
Test accuracy: 0.9000%


# Demo 6: Convolutional Networks

In [15]:
image_size = 28
num_labels = 10
num_channels = 1 

import numpy as np

data = load_data('partial_notMNIST.pickle')
train_dataset = data['train_dataset']
train_labels = data['train_labels']
test_dataset = data['test_dataset']
test_labels = data['test_labels']
valid_dataset = data['valid_dataset']
valid_labels = data['valid_labels']

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (10000, 28, 28, 1) (10000, 10)
Validation set (2000, 28, 28, 1) (2000, 10)
Test set (1000, 28, 28, 1) (1000, 10)


In [16]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
    
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [17]:
num_steps = 10001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print_row(['Step', 'Loss', 'Acc-Train', 'Acc-Valid'])
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % (num_steps // 10) == 0):
      test_acc = accuracy(predictions, batch_labels)
      valid_acc = accuracy(valid_prediction.eval(), valid_labels)
      print_row([step, l, test_acc, valid_acc])   
  print('Test accuracy: %.4f%%' % accuracy(test_prediction.eval(), test_labels))

Step                Loss                Acc-Train           Acc-Valid           
0                   3.20483             0.0625              0.1095              
1000                0.803979            0.75                0.837               
2000                0.220482            0.9375              0.8645              
3000                0.278593            0.8125              0.859               
4000                0.480946            0.8125              0.864               
5000                0.280655            0.9375              0.8615              
6000                0.0316329           1.0                 0.864               
7000                0.00358476          1.0                 0.854               
8000                0.0564438           1.0                 0.858               
9000                0.111813            0.9375              0.8575              
10000               0.0846771           0.9375              0.861               
Test accuracy: 0.9100%
