In [1]:
# Training convolutional networks

# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [2]:
# Load the pickle file

pickle_file = '/Users/rgparekh/Documents/Personal/Rajesh/Data/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [3]:
# Re-shape the data sets to be TensorFlow friendly

image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [4]:
# Define accuracy

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [11]:
# Define the CNN architecture

batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    # data is the input data
    # layer1_weights define the filter parameter [filter_height, filter_width, input_channels, output_channels]
    # [1,2,2,1] defines the strides parameter - 1D tensor of length 4 (batch, height, width, channels)
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.AdamOptimizer(0.005).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))


In [12]:
# Train model and compute test set accuracy

num_steps = 1001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 2.927531
Minibatch accuracy: 12.5%
Validation accuracy: 10.0%
Minibatch loss at step 50: 1.046034
Minibatch accuracy: 56.2%
Validation accuracy: 69.7%
Minibatch loss at step 100: 0.901807
Minibatch accuracy: 62.5%
Validation accuracy: 76.8%
Minibatch loss at step 150: 0.285646
Minibatch accuracy: 87.5%
Validation accuracy: 78.8%
Minibatch loss at step 200: 0.875015
Minibatch accuracy: 75.0%
Validation accuracy: 80.0%
Minibatch loss at step 250: 0.912396
Minibatch accuracy: 68.8%
Validation accuracy: 80.8%
Minibatch loss at step 300: 0.293089
Minibatch accuracy: 93.8%
Validation accuracy: 81.7%
Minibatch loss at step 350: 0.437883
Minibatch accuracy: 93.8%
Validation accuracy: 80.8%
Minibatch loss at step 400: 0.215000
Minibatch accuracy: 93.8%
Validation accuracy: 82.5%
Minibatch loss at step 450: 0.868602
Minibatch accuracy: 81.2%
Validation accuracy: 81.6%
Minibatch loss at step 500: 0.505556
Minibatch accuracy: 87.5%
Validation accuracy: 82.3%
M

In [1]:
# Convolutional Neural Network from the TensorFlow tutorial: https://www.tensorflow.org/tutorials/layers

from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [6]:
# Load the pickle file

pickle_file = '/Users/rgparekh/Documents/Personal/Rajesh/Data/notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [8]:
# Testing
print('Train labels [0]', train_labels[0])
print('Test labels [0]', test_labels[0])
print('Valid labels [0]', valid_labels[0])

Train labels [0] 4
Test labels [0] 3
Valid labels [0] 1


In [9]:
# Re-shape the data sets to be TensorFlow friendly

image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [10]:
# Testing
# Labels are now in 1-hot encoding
print('Train labels [0]', train_labels[0])
print('Test labels [0]', test_labels[0])
print('Valid labels [0]', valid_labels[0])

Train labels [0] [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
Test labels [0] [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
Valid labels [0] [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]


In [30]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Imports
import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

# Our application logic will be added here

if __name__ == "__main__":
  tf.app.run()

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/notmnist_convnet_model/model.ckpt-2400
INFO:tensorflow:Saving checkpoints for 2401 into /tmp/notmnist_convnet_model/model.ckpt.
INFO:tensorflow:loss = 1.16968, step = 2401
INFO:tensorflow:probabilities = [[ 0.19444923  0.01019515  0.00028452  0.00619345  0.00154695  0.00160887
   0.0022322   0.00974129  0.08458734  0.68916106]
 [ 0.05345789  0.0337581   0.00740292  0.01530634  0.06090756  0.03013228
   0.00908712  0.0134669   0.5755437   0.20093717]
 [ 0.98760825  0.00187131  0.00022567  0.00087676  0.00046516  0.00033656
   0.00105335  0.0016172   0.00518806  0.00075763]
 [ 0.00507217  0.0433133   0.03007456  0.80659485  0.00592063  0.00383412
   0.07795572  0.01451581  0.00069871  0.01202016]
 [ 0.27241713  0.057

INFO:tensorflow:global_step/sec: 4.6453
INFO:tensorflow:loss = 1.10964, step = 2501 (21.528 sec)
INFO:tensorflow:probabilities = [[ 0.00434023  0.00094209  0.00448475  0.00082615  0.01971925  0.96312159
   0.00116955  0.00511949  0.0002286   0.00004839]
 [ 0.17727886  0.07474162  0.00025283  0.00369421  0.01349015  0.038057
   0.01169052  0.64046884  0.01259759  0.02772831]
 [ 0.06568386  0.04632153  0.24507834  0.04434178  0.04869646  0.01313833
   0.4402324   0.03593935  0.01618609  0.04438193]
 [ 0.10416142  0.07112595  0.01446772  0.6917088   0.0105937   0.01207648
   0.03944845  0.0258574   0.00521283  0.02534719]
 [ 0.00868776  0.00718991  0.41282085  0.00160545  0.45157421  0.05418848
   0.03342432  0.00116977  0.02836126  0.00097803]
 [ 0.03431149  0.21130052  0.01234312  0.20714468  0.03150428  0.00456545
   0.02152766  0.02726845  0.4360297   0.01400462]
 [ 0.04266089  0.00282881  0.00051199  0.0009528   0.0005501   0.00049718
   0.00384151  0.00182076  0.01507927  0.93125665

INFO:tensorflow:global_step/sec: 4.52709
INFO:tensorflow:loss = 0.975401, step = 2601 (22.089 sec)
INFO:tensorflow:probabilities = [[ 0.00492817  0.09700635  0.22457154  0.00751085  0.58354497  0.0224287
   0.01305441  0.0069514   0.03612808  0.00387554]
 [ 0.05114162  0.26824391  0.00124947  0.00889038  0.01611776  0.00776323
   0.01315596  0.62830108  0.00079018  0.00434636]
 [ 0.00032146  0.02872117  0.00737534  0.89755201  0.00266622  0.00049029
   0.06096895  0.00041276  0.00028856  0.0012033 ]
 [ 0.01016904  0.01584224  0.00274201  0.006769    0.08328256  0.4303169
   0.00566771  0.03007705  0.40706778  0.00806571]
 [ 0.99440825  0.00104705  0.00005662  0.00050507  0.0005105   0.0006221
   0.00075696  0.00110259  0.00079804  0.00019293]
 [ 0.00008092  0.00124538  0.00423424  0.00225946  0.06389496  0.91790128
   0.00465521  0.00538535  0.00033263  0.00001063]
 [ 0.33814383  0.07189411  0.00057641  0.05831837  0.00134861  0.00301701
   0.01984495  0.01734683  0.0148927   0.4746170

INFO:tensorflow:global_step/sec: 5.06434
INFO:tensorflow:loss = 0.719517, step = 2701 (19.746 sec)
INFO:tensorflow:probabilities = [[ 0.00798648  0.0237061   0.62689769  0.02854398  0.13719159  0.03793151
   0.0800707   0.04183488  0.00845483  0.00738219]
 [ 0.0261048   0.13975111  0.00306859  0.03546005  0.08367515  0.02371753
   0.01346559  0.61112016  0.02224379  0.04139329]
 [ 0.10706554  0.08905989  0.02161488  0.10185704  0.04463784  0.0641025
   0.09415996  0.09303712  0.16724342  0.21722183]
 [ 0.96145815  0.01126953  0.00041893  0.00076517  0.00636393  0.00368163
   0.00308725  0.00693106  0.00284683  0.0031776 ]
 [ 0.00024974  0.0077826   0.00208432  0.00489234  0.26986849  0.69423932
   0.00093048  0.0196824   0.00024549  0.00002492]
 [ 0.01147739  0.02820891  0.00096121  0.00698209  0.0168285   0.00259626
   0.00247577  0.92767358  0.00072481  0.00207155]
 [ 0.01145676  0.54694754  0.00263444  0.032375    0.05534988  0.00526744
   0.02645687  0.29221734  0.02585008  0.00144

INFO:tensorflow:global_step/sec: 4.84572
INFO:tensorflow:loss = 0.973922, step = 2801 (20.636 sec)
INFO:tensorflow:probabilities = [[ 0.03520432  0.00719671  0.00027152  0.00483918  0.00016215  0.00002889
   0.00374384  0.00422094  0.00892939  0.93540311]
 [ 0.16585173  0.24765064  0.00206221  0.01807358  0.05326014  0.05308567
   0.01613572  0.06162081  0.05955956  0.32269987]
 [ 0.00911785  0.20207076  0.007309    0.5535208   0.01678972  0.00041226
   0.0879144   0.07000591  0.00538639  0.04747296]
 [ 0.00038661  0.00037102  0.07685549  0.00066833  0.09540209  0.82236826
   0.00100603  0.00011757  0.00280894  0.00001555]
 [ 0.15090063  0.02247276  0.05286982  0.31264809  0.00134682  0.00672683
   0.29636994  0.04803711  0.01573455  0.09289351]
 [ 0.01442582  0.00961329  0.05994665  0.10669871  0.00513046  0.00296299
   0.76495713  0.00624597  0.00628196  0.02373706]
 [ 0.76381767  0.053273    0.00021207  0.00402715  0.00222949  0.00170959
   0.00341275  0.11228     0.0145471   0.0444

INFO:tensorflow:Saving checkpoints for 2900 into /tmp/notmnist_convnet_model/model.ckpt.
INFO:tensorflow:Loss for final step: 0.854049.
INFO:tensorflow:Starting evaluation at 2017-09-05-19:44:03
INFO:tensorflow:Restoring parameters from /tmp/notmnist_convnet_model/model.ckpt-2900
INFO:tensorflow:Finished evaluation at 2017-09-05-19:44:10
INFO:tensorflow:Saving dict for global step 2900: accuracy = 0.7896, global_step = 2900, loss = 0.800827
{'global_step': 2900, 'loss': 0.80082685, 'accuracy': 0.78960001}


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [28]:
def cnn_model_fn(features, labels, mode):
  """Model function for CNN."""
  # Input Layer
  # batch_size = -1 indicates a place holder to dynamically fill in the batch_size
  input_layer = tf.reshape(features["x"], [-1, image_size, image_size, 1])

  # Convolutional Layer #1
  conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #1
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Convolutional Layer #2 and Pooling Layer #2
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Dense Layer
  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

  # Logits Layer
  logits = tf.layers.dense(inputs=dropout, units=num_labels)

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
  loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)

  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])}
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [29]:
def main(unused_argv):
    # Load the pickle file
    pickle_file = '/Users/rgparekh/Documents/Personal/Rajesh/Data/notMNIST.pickle'

    with open(pickle_file, 'rb') as f:
      save = pickle.load(f)
      train_dataset = save['train_dataset']
      train_labels = save['train_labels']
      valid_dataset = save['valid_dataset']
      valid_labels = save['valid_labels']
      test_dataset = save['test_dataset']
      test_labels = save['test_labels']
      del save  # hint to help gc free up memory
      print('Training set', train_dataset.shape, train_labels.shape)
      print('Validation set', valid_dataset.shape, valid_labels.shape)
      print('Test set', test_dataset.shape, test_labels.shape)
    
    # Reformat the datasets
    #train_dataset, train_labels = reformat(train_dataset, train_labels)
    #valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
    #test_dataset, test_labels = reformat(test_dataset, test_labels)
    #print('Training set', train_dataset.shape, train_labels.shape)
    #print('Validation set', valid_dataset.shape, valid_labels.shape)
    #print('Test set', test_dataset.shape, test_labels.shape)
    
    classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/notmnist_convnet_model")
    
    # Set up logging for predictions
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=100)
    
    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_dataset},
        y=train_labels,
        batch_size=100,
        num_epochs=None,
        shuffle=True)
    classifier.train(
        input_fn=train_input_fn,
        steps=500,
        hooks=[logging_hook])
    
    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": valid_dataset},
        y=valid_labels,
        num_epochs=1,
        shuffle=False)
    eval_results = classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)
 

In [40]:
# CNN architecture with max-pooling

# Define the CNN architecture

batch_size = 256
patch_size = 5
depth1 = 16
depth2 = 32
num_hidden = 64
num_channels = 1

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth1], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth1]))

  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth1, depth2], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth2]))

  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 14 * image_size // 14 * depth2, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))

  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    # data is the input data
    # layer1_weights define the filter parameter [filter_height, filter_width, input_channels, output_channels]
    # [1,2,2,1] defines the strides parameter - 1D tensor of length 4 (batch, height, width, channels)
    conv1 = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    
    # conv1 returns a tensor of size [batch_size, image_size/2, image_size/2, depth1]
    
    hidden1 = tf.nn.relu(conv1 + layer1_biases)
    
    # hidden1 returns a tensor of size [batch_size, image_size/2, image_size/2, depth1]
    
    pool1 = tf.nn.max_pool(value=hidden1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    
    # pool1 returns a tensor of size [batch_size, image_size/4, image_size/4, depth1] (dim reduction since strides = 2)    
    
    #print("Conv1 ", conv1.get_shape().as_list())
    #print("Hidden1 ", hidden1.get_shape().as_list())
    #print("Pool1 ", pool1.get_shape().as_list())
    
    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 2, 2, 1], padding='SAME')
    
   # conv1 returns a tensor of size [batch_size, image_size/4/2, image_size/4/2, depth2]

    hidden2 = tf.nn.relu(conv2 + layer2_biases)
    
  # hidden2 returns a tensor of size [batch_size, image_size/8, image_size/8, depth2]

    pool2 = tf.nn.max_pool(value=hidden2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    
  # pool2 returns a tensor of size [batch_size, image_size/14, image_size/14, depth2]
    
    print("Conv2 ", conv2.get_shape().as_list())    
    print("Hidden2 ", hidden2.get_shape().as_list())    
    print("Pool2 ", pool2.get_shape().as_list())
    
    shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2, [shape[0], shape[1] * shape[2] * shape[3]])
    
    hidden3 = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    
    return tf.matmul(hidden3, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  optimizer = tf.train.AdamOptimizer(0.005).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))


Conv1  [256, 14, 14, 16]
Hidden1  [256, 14, 14, 16]
Pool1  [256, 7, 7, 16]
Conv2  [256, 4, 4, 32]
Hidden2  [256, 4, 4, 32]
Pool2  [256, 2, 2, 32]
Conv1  [10000, 14, 14, 16]
Hidden1  [10000, 14, 14, 16]
Pool1  [10000, 7, 7, 16]
Conv2  [10000, 4, 4, 32]
Hidden2  [10000, 4, 4, 32]
Pool2  [10000, 2, 2, 32]
Conv1  [10000, 14, 14, 16]
Hidden1  [10000, 14, 14, 16]
Pool1  [10000, 7, 7, 16]
Conv2  [10000, 4, 4, 32]
Hidden2  [10000, 4, 4, 32]
Pool2  [10000, 2, 2, 32]


In [None]:
# Train model and compute test set accuracy

num_steps = 1001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))