2.4. Grad Students only [20 marks] Add label-noise to the training data set and train the two-layer model. To make mislabeled data you can simply shuffle some of the labels. For example, if you want to make 10 percent of the data mislabeled, you can permute/shuffle %10 of the labels. Evaluate the performance of the network with different rates of label noise (%10, %25, %50, %75, %100) applied. Illustrate your result in a plot. Explain the performance of your network with %100 noise, specifically the difference between the training and test error.

In [None]:
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
"""Convolutional Neural Network Estimator for MNIST, built with tf.layers."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
import sys
#tf.logging.set_verbosity(tf.logging.INFO)
classes = [0,1,2,3,4,5,6,7,8,9]
num_classes = len(classes)

def my_cnn_model_fn(features, labels, mode):
  """Model function for CNN."""
  # Input Layer
  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
  # MNIST images are 28x28 pixels, and have one color channel
  input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

  # Convolutional Layer #1
  # Computes 32 features using a 5x5 filter with ReLU activation.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 28, 28, 1]
  # Output Tensor Shape: [batch_size, 28, 28, 32]
  conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
  



  # Pooling Layer #1
  # First max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 28, 28, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 32]
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Convolutional Layer #2
  # Computes 64 features using a 5x5 filter.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 14, 14, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 64]
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #2
  # Second max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 14, 14, 64]
  # Output Tensor Shape: [batch_size, 7, 7, 64]
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Flatten tensor into a batch of vectors
  # Input Tensor Shape: [batch_size, 7, 7, 64]
  # Output Tensor Shape: [batch_size, 7 * 7 * 64]
  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

  # Dense Layer
  # Densely connected layer with 1024 neurons
  # Input Tensor Shape: [batch_size, 7 * 7 * 64]
  # Output Tensor Shape: [batch_size, 1024]
  dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)

  # Add dropout operation; 0.6 probability that element will be kept
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

  # Logits layer
  # Input Tensor Shape: [batch_size, 1024]
  # Output Tensor Shape: [batch_size, 10]
  logits = tf.layers.dense(inputs=dropout, units= num_classes)

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }
  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(
        loss=loss,
        global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

 #  Add evaluation metrics (for EVAL mode)
  eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])}
  return tf.estimator.EstimatorSpec(
      mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

def noise_implementation(noise_percent):
    #  # Load training and eval data
    mnist = tf.contrib.learn.datasets.load_dataset("mnist")
      
    train_data = mnist.train.images  # Returns np.array
    train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
    ix_size = int(noise_percent * len(train_labels))
    ix = np.random.choice(len(train_labels), size=ix_size, replace=False)
    train_labels_noise = train_labels[ix]
    np.random.shuffle(train_labels_noise)
    train_labels[ix] = train_labels_noise
    
    indices = np.isin(train_labels,classes)
    sample_train_data = train_data[indices,:]
    sample_train_labels = train_labels[indices]
    
    eval_data = mnist.test.images  # Returns np.array
    eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
    indices_test = np.isin(eval_labels,classes)
    sample_eval_data = eval_data[indices_test,:]
    sample_eval_labels = eval_labels[indices_test]
    
      # Create the Estimator
    mnist_classifier = tf.estimator.Estimator(
          model_fn=my_cnn_model_fn)#, model_dir="/tmp/mnist_convnet_model")
    
      # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
          x={"x": sample_train_data},
          y=sample_train_labels,
          batch_size=10,
          num_epochs=None,
          shuffle=True)
    mnist_classifier.train(
          input_fn=train_input_fn,
          steps=1000)
    
      # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
          x={"x": sample_eval_data},
          y=sample_eval_labels,
          batch_size=10,
          num_epochs=1,
          shuffle=False)
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    return(eval_results)

In [None]:
eval_results_10 = noise_implementation(0.1)

In [None]:
eval_results_25 = noise_implementation(0.25)

In [None]:
eval_results_50 = noise_implementation(0.50)

In [None]:
eval_results_75 = noise_implementation(0.75)

In [None]:
eval_results_100 = noise_implementation(1.00)

In [None]:
noise_amount = np.array([10, 25, 50,75, 100])

In [None]:
model_accuracy  = np.array([eval_results_10['accuracy'], eval_results_25['accuracy'], eval_results_50['accuracy'], eval_results_75['accuracy'], eval_results_100['accuracy']])

In [None]:
print(model_accuracy)

In [None]:
from matplotlib import pyplot as plt
plt.plot(noise_amount, model_accuracy)
plt.xlabel('Noise Percentage')
plt.ylabel('Model Accuracy')
plt.title('Noise v/s Accuracy')
plt.show()

The above plot helps us understand the relationship of noise in the training data. By noise, we mean the shuffling of training labels keeping the features (here images) intact. The shuffling of training labels can be adjusted based on the amount of noise we want to include. So what it does is, the program will shuffle exactly the same amount of training labels. 

In [None]:
print("The accurarcy of the model with 100% noise is :")
print(model_accuracy[4])

It is clear from the plot and the accuracy that the model is still able to represent very few images. 