In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt 
import os

In [2]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
x_train = x_train.reshape(60000,-1)
x_test = x_test.reshape(10000,-1)

In [4]:
# the values of x_train and x_test are between 0 and 255
# compute the mean and standard deviation of the train set and 
# normalize the data
def normalize(x_train, x_test):
  train_mean = np.mean(x_train)
  train_std = np.mean(x_train)
  x_train = (x_train - train_mean)/train_std
  x_test = (x_test - train_mean)/train_std  
  return x_train, x_test

In [5]:
def convert_to_one_hot(labels):
  no_samples = labels.shape[0]
  n_classes = np.max(labels) + 1
  one_hot = np.zeros((no_samples, n_classes))
  one_hot[np.arange(no_samples),labels.ravel()] = 1
  return one_hot
  

In [6]:
x_train, x_test = normalize(x_train, x_test)
y_train = convert_to_one_hot(y_train)
y_test = convert_to_one_hot(y_test)

In [7]:
def get_placeholders(input_size, output_size):
  inputs = tf.placeholder(dtype=tf.float32, shape=[None, input_size], name="inputs")
  targets = tf.placeholder(dtype=tf.float32, shape=[None, output_size], name="targets")
  return inputs, targets

In [8]:
def dense_layer(input, hidden_units, layer_no, kernel_initializer, activation_fn= tf.nn.relu):
  weights_name = "layer{}/kernel".format(layer_no)
  bias_name = "layer{}/biases".format(layer_no)
  matmul_name = "layer{}/MatMul".format(layer_no)
  weights = tf.get_variable(weights_name, shape=[input.shape[1], hidden_units], initializer = kernel_initializer)
  biases = tf.get_variable(bias_name, shape=[hidden_units], initializer = tf.zeros_initializer())
  matmul = tf.add(tf.matmul(input, weights), biases, name=matmul_name)
  tf.summary.histogram('MatMul{}'.format(layer_no), matmul)
  if activation_fn:
    output =  activation_fn(matmul)
  else:
    output = matmul
  tf.summary.histogram('Output{}'.format(layer_no), output)
  return output
                                             

In [9]:
def build_network(features, labels, hidden_units, num_layers, kernel_initializer, activation_fn):
  inputs = features
  for layer in range(num_layers-1):
    inputs = dense_layer(inputs, hidden_units[layer], layer+1, kernel_initializer, activation_fn)
  logits = dense_layer(inputs, 10, num_layers, kernel_initializer, None) 
  return logits

In [10]:
def compute_loss(logits, labels):
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = labels , logits= logits))
  optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)
  tf.summary.scalar('loss', loss)
  return loss, optimizer

In [11]:
def compute_accuracy(logits, labels):
  correct_predictions = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
  accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
  tf.summary.scalar('accuracy', accuracy)
  return accuracy

In [12]:
def get_variance(x):
  mean = tf.reduce_mean(x)
  x_centered = tf.square(x-mean)
  variance = tf.reduce_mean(x_centered)
  return sess.run(variance)
  

In [13]:
def get_matmul_tensors(num_layers):
  def_graph = tf.get_default_graph()
  return [def_graph.get_tensor_by_name('layer{}/MatMul:0'.format(layer+1)) for layer in  range(num_layers)]

In [14]:
def train_model(features, labels, hidden_units, epochs, batch_size, learning_rate, num_layers, kernel_initializer, activation_fn, file_name, single_pass=False):
  tf.reset_default_graph()
  input_size = features.shape[1]
  output_size = labels.shape[1]
  
  # get the placeholders 
  inputs, targets = get_placeholders(input_size,output_size)
  
  # create a dataset
  dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
  
  # make the required batches 
  dataset = dataset.batch(batch_size)
  
  # initialize the iterator for the dataset 
  iterator = dataset.make_initializable_iterator()
  
  # get the next batch
  x_batch, y_batch = iterator.get_next()
  
  # forward network
  logits = build_network(x_batch, y_batch, hidden_units, num_layers, kernel_initializer, activation_fn)
 
  # compute the loss
  loss, optimizer = compute_loss(logits, y_batch)

  accuracy = compute_accuracy(logits, y_batch)
  
  init_op = tf.global_variables_initializer()
  
  merged_summary = tf.summary.merge_all()
  
  saver = tf.train.Saver()
  
  with tf.Session() as sess:
    sess.run(init_op)
    def_graph = tf.get_default_graph()
    train_samples = features.shape[0]
    
    train_writer = tf.summary.FileWriter(file_name, sess.graph)
    iteration = 0
    
    if single_pass:
      sess.run(iterator.initializer, feed_dict={inputs:features, targets:labels})
      try:
        while iteration < 1:
          summary = sess.run(merged_summary)
          train_writer.add_summary(summary,iteration)
          iteration += 1
      except tf.errors.OutOfRangeError:
        pass
      
    else:
      for epoch in range(epochs):
        # run the iterator's initializer
        sess.run(iterator.initializer, feed_dict={inputs:features, targets:labels})
        try:

          while True:

            batch_loss,  _ , batch_accuracy = sess.run([loss, optimizer, accuracy])
            if iteration % 100 == 0:
              summary = sess.run(merged_summary)
              train_writer.add_summary(summary,iteration)
            iteration += 1
        except tf.errors.OutOfRangeError:
          pass
        #print("Total Iterations {}".format(iteration))
        
      
  
  
  

In [15]:
features = x_train
labels = y_train
epochs = 20
batch_size = 256 
learning_rate = 0.001

num_layers = 4
hidden_units = [100,100,100]
input_units = x_train.shape[1]
output_units = y_train.shape[1] 

random_normal_initializer = tf.random_normal_initializer
xavier_initializer = tf.contrib.layers.variance_scaling_initializer(factor=1.0, mode='FAN_AVG', uniform=False)
he_initializer = tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False)


single_pass_models = {
    "Sigmoid - Random Normal Initializer - Single Pass" : (tf.nn.sigmoid, random_normal_initializer),
    "Sigmoid - Xavier Initializer - Single Pass" : (tf.nn.sigmoid, xavier_initializer)
}

full_pass_models = {
    "Sigmoid - Random Normal Initializer - Full Pass" : (tf.nn.sigmoid, random_normal_initializer),
    "Sigmoid - Xavier Initializer - Full Pass" : (tf.nn.sigmoid, xavier_initializer),
    "RELU - Random Normal Initializer - Full Pass" : (tf.nn.relu, random_normal_initializer),
    "RELU - Xavier Initializer - Full Pass" : (tf.nn.relu, xavier_initializer),
    "RELU - He Initializer - Full Pass" : (tf.nn.relu, he_initializer),
}

all_models = [
    (single_pass_models, True),
    (full_pass_models, False)
]

tensorboard_dir = os.path.join(os.getcwd() , "tensorboard_visualizations")

for models, single_pass in all_models:
  for name, (activation_fn, kernel_initializer) in models.items():
    model_name = "Running model - {}/debug".format(name)
    file_name = os.path.join(tensorboard_dir, model_name)
    print(model_name)
    train_model(features = features,
                  labels = labels, 
                  epochs = epochs, 
                  hidden_units = hidden_units,                                
                  batch_size = batch_size, 
                  learning_rate = learning_rate, 
                  num_layers = num_layers, 
                  kernel_initializer = kernel_initializer,                               
                  activation_fn = activation_fn, 
                  file_name = file_name,
                  single_pass=single_pass)

Running model - Sigmoid - Random Normal Initializer - Single Pass/debug
Running model - Sigmoid - Xavier Initializer - Single Pass/debug
Running model - RELU - Xavier Initializer - Full Pass/debug
Running model - RELU - He Initializer - Full Pass/debug
Running model - Sigmoid - Xavier Initializer - Full Pass/debug
Running model - Sigmoid - Random Normal Initializer - Full Pass/debug
Running model - RELU - Random Normal Initializer - Full Pass/debug
