<a href="https://colab.research.google.com/github/alexanderimanicowenrivers/TF-Pruning/blob/master/MNIST_Prune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intro 

In this colab document we will explore how to create and prune a basic model in tensorflow.


# Imports & utils

In [0]:
from __future__ import print_function
import keras
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow.contrib.eager as tfe
import numpy as np
import tensorflow as tf

In [0]:
# Plot learning curves of experiments
def plot_learning_curves(experiment_data):
  # Generate figure.
  fig, axes = plt.subplots(3, 4, figsize=(22,12))
  st = fig.suptitle(
      "Learning Curves for all Tasks and Hyper-parameter settings",
      fontsize="x-large")
  # Plot all learning curves.
  for i, results in enumerate(experiment_data):
    for j, (setting, train_accuracy, test_accuracy) in enumerate(results):
      # Plot.
      xs = [x * log_period_samples for x in range(1, len(train_accuracy)+1)]
      axes[j, i].plot(xs, train_accuracy, label='train_accuracy')
      axes[j, i].plot(xs, test_accuracy, label='test_accuracy')
      # Prettify individual plots.
      axes[j, i].ticklabel_format(style='sci', axis='x', scilimits=(0,0))
      axes[j, i].set_xlabel('Number of samples processed')
      axes[j, i].set_ylabel('Epochs: {}, Learning rate: {}.  Accuracy'.format(*setting))
      axes[j, i].set_title('Task {}'.format(i + 1))
      axes[j, i].legend()
  # Prettify overall figure.
  plt.tight_layout()
  st.set_y(0.95)
  fig.subplots_adjust(top=0.91)
  plt.show()

# Generate summary table of results.
def plot_summary_table(experiment_data):
  # Fill Data.
  cell_text = []
  rows = []
  columns = ['Setting 1', 'Setting 2', 'Setting 3']
  for i, results in enumerate(experiment_data):
    rows.append('Model {}'.format(i + 1))
    cell_text.append([])
    for j, (setting, train_accuracy, test_accuracy) in enumerate(results):
      cell_text[i].append(test_accuracy[-1])
  # Generate Table.
  fig=plt.figure(frameon=False)
  ax = plt.gca()
  the_table = ax.table(
      cellText=cell_text,
      rowLabels=rows,
      colLabels=columns,
      loc='center')
  the_table.scale(1, 4)
  # Prettify.
  ax.patch.set_facecolor('None')
  ax.xaxis.set_visible(False)
  ax.yaxis.set_visible(False)

# Data loader

In [0]:
# Global variables.
log_period_samples = 20000
batch_size = 100

In [0]:
def get_data():
    return input_data.read_data_sets("MNIST_data/", one_hot=True)

In [12]:
  mnist=get_data()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [0]:
# Store results of runs with different configurations in a dictionary.
# Use a tuple (num_epochs, learning_rate) as keys, and a tuple (training_accuracy, testing_accuracy)
experiments_task1 = []
settings = [(5, 0.005)]

# Model

In [0]:
weight_default_name='weight'
bias_default_name='bias'

def get_placeholders():
  x = tf.placeholder(tf.float32, [None, 784])
  y_ = tf.placeholder(tf.float32, [None, 10])
  return x, y_

def _weight_variable(
        shape,
        initializer=None,
        name=None,
        layer_no=0,):
    """
    Returns a weight variable with a given shape.
    :param initializer: TensorFlow initializer. Default Xavier.
    :param layer: Variable layer number.
    :param shape: var shape.
    """
    if name==weight_default_name: 
      name=weight_default_name+'_'+str(layer_no)
      
    if initializer is None:
        initializer = tf.contrib.layers.xavier_initializer()

    var = tf.get_variable(name, shape, initializer=initializer)
    return var
  
def _bias_variable(
        shape,
        initializer=None,
        layer_no=0,
):
    """
    Returns a bias variable with a given shape.
    :param initializer: TensorFlow initializer. Default zero.
    :param layer_no: Variable layer number.
    :param shape: Variable shape.
    """
    name=bias_default_name+'_'+str(layer_no)
    if initializer is None:
        initializer = tf.constant_initializer(0.)

    return _weight_variable(shape,
                            initializer=initializer,
                            name=name)
  
  
def affine(
        inp,
        units,
        bias=True,
        W_initializer=None,
        b_initializer=None,
        W_name=weight_default_name,
        bias_name=bias_default_name,
        layer_no=0
):
    """ Creates an affine layer.
    :param inp: Input tensor.
    :param units: Number of units.
    :param bias: Include bias term.
    :param W_initializer: Initializer for the multiplicative weight.
    :param b_initializer: Initializer for the bias term.
    :param W_name: Name of the weight.
    :param bias_name: Name of the bias.
    :return: Tensor defined as input.dot(weight) + bias.
    """
    input_size = inp.shape[-1]
    W = _weight_variable([input_size, units],
                         initializer=W_initializer,
                         name=W_name,layer_no=layer_no)

    output = tf.matmul(inp, W)

    if bias:
        b = _bias_variable((units,),
                           initializer=b_initializer,
                           layer_no=layer_no)

        output=tf.add(output, b)

    return output
  
def mlp(inputs,
        layer_sizes,
        nonlinearity=tf.nn.relu,
        output_nonlinearity=None,
        W_initializer=None,
        b_initializer=None):
    """
    Creates a multi-layer perceptron with given hidden sizes. A nonlinearity
    is applied after every hidden layer.
    
    output shape: N x (number of output units)
    :param inputs: List of input tensors.
    :param layer_sizes: List of layers sizes, including output layer size.
    :param nonlinearity: Hidden layer nonlinearity.
    :param output_nonlinearity: Output layer nonlinearity.
    :param W_initializer: Weight initializer.
    :param b_initializer: Bias initializer.
    :return:
    """
    if layer_sizes[-1] is None:
        layer_sizes = list(layer_sizes)
        layer_sizes[-1] = 1

    # Take care of the input layer separately to make use of broadcasting in
    # a case of several input tensors.
    layer = affine(
    inp=inputs,
    units=layer_sizes[0],
    bias=False,
    W_initializer=W_initializer,
    b_initializer=b_initializer
    )

    layer = nonlinearity(layer)

    for i_layer, size in enumerate(layer_sizes[1:], 1):

      layer = affine(layer, size,
                     W_initializer=W_initializer,
                     b_initializer=b_initializer,
                    layer_no=i_layer)
      if i_layer < len(layer_sizes) - 1:
          layer = nonlinearity(layer)

    return layer
  
def get_graph():
  x, y_ = get_placeholders()
  linear=mlp(x,[1000, 1000, 500, 200,10])
  loss=tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=linear,labels=y_))
  opt=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
  correct_prediction = tf.equal(tf.argmax(linear,1), tf.argmax(y_,1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  
  return x, y_,linear,loss,opt,correct_prediction,accuracy

# Training

In [49]:
print('Training Model 1')

# Train Model 1 with the different hyper-parameter settings.
for (num_epochs, learning_rate) in settings:

  # Reset graph, recreate placeholders and dataset.
  tf.reset_default_graph()
  mnist = get_data()
  eval_mnist = get_data()

  #####################################################
  # Define model, loss, update and evaluation metric. #
  #####################################################
  x, y_,linear,loss,opt,correct_prediction,accuracy=get_graph()
  # Train.
  i, train_accuracy, test_accuracy = 0, [], []
  log_period_updates = int(log_period_samples / batch_size)
  saver = tf.train.Saver()
  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    while mnist.train.epochs_completed < num_epochs:
      
      # Update.
      i += 1
      batch_xs, batch_ys = mnist.train.next_batch(batch_size)
      
      #################
      # Training step #
      #################
      feed_dict={x:batch_xs,y_:batch_ys}
      _=sess.run([opt],feed_dict=feed_dict)
      
      # Periodically evaluate.
      if i % log_period_updates == 0:
        
        #####################################
        # Compute and store train accuracy. #
        #####################################
        
        batch_xs, batch_ys = mnist.train.next_batch((int(mnist.train.labels.shape[0]/5)))
        
        feed_dict={x:batch_xs,y_:batch_ys}
        acc=sess.run([accuracy],feed_dict)
        print(f'Accuracy {acc} at iteration {i}')

        train_accuracy.append(acc)
        #####################################
        # Compute and store test accuracy.  #
        #####################################
    save_path = saver.save(sess, "./model.ckpt")

    feed_dict={x:eval_mnist.test.images,y_:eval_mnist.test.labels}
    acc=sess.run([accuracy],feed_dict)
    test_accuracy.append(acc)
  
  experiments_task1.append(
      ((num_epochs, learning_rate), train_accuracy, test_accuracy))

Training Model 1
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Accuracy [0.9398182] at iteration 200
Accuracy [0.9578182] at iteration 400
Accuracy [0.9628182] at iteration 600
Accuracy [0.96327275] at iteration 800
Accuracy [0.95945454] at iteration 1000
Accuracy [0.96863633] at iteration 1200
Accuracy [0.973] at iteration 1400
Accuracy [0.9718182] at iteration 1600
Accuracy [0.9728182] at iteration 1800


In [57]:
tf.reset_default_graph()
x, y_,linear,loss,opt,correct_prediction,accuracy=get_graph()
saver = tf.train.Saver()

with tf.Session() as sess:
  # Restore variables from disk.
  saver.restore(sess, "./model.ckpt")
  print("Model restored.")
  # Check the values of the variables
  all_vars = tf.get_collection('vars')
  batch_xs, batch_ys = mnist.train.next_batch((int(mnist.train.labels.shape[0]/5)))
  feed_dict={x:batch_xs,y_:batch_ys}
  acc=sess.run([accuracy],feed_dict)
  print(f'Accuracy {acc} at iteration {i}')

INFO:tensorflow:Restoring parameters from ./model.ckpt
Model restored.
Accuracy [0.97409093] at iteration 1800


# Pruning

# Analysis