MNIST using Ensemble Learning

In [1]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
import os
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta

import prettytensor as pt ### Pretty tensor used to simplify NN representation

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST/',one_hot=True) ## data in HOT encoding form

Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
print('Size of-->')
print("Training Set:-\t\t{}".format(len(data.train.labels)))
print("Test Set:-\t\t{}".format(len(data.test.labels)))
print("Validation Set:-\t{}".format(len(data.validation.labels)))

Size of-->
Training Set:-		55000
Test Set:-		10000
Validation Set:-	5000


In [4]:
data.test.cls =  np.argmax(data.test.labels,axis=1)
data.validation.cls  = np.argmax(data.validation.labels,axis=1) ## axis=1 means horizontal direction

In [5]:
##Traing set and Validation combined for ensemble
combined_images = np.concatenate([data.train.images,data.validation.images],axis=0)
combined_labels = np.concatenate([data.train.labels,data.validation.labels],axis=0)

In [6]:
combined_size = len(combined_images)

In [7]:
train_size = int(0.8*combined_size)
train_size

48000

Ensemble Learning ---> It includes developing neural networks from random traing set 

In [8]:
## Creating random training set

def random_training_set():
    
    idx = np.random.permutation(combined_size) ##Creates a random permutation array Eg- np.random.permutation(5)
    ## can create 5 3 2 4 1 or 1 4 2 5 3 and many more
    
    idx_train = idx[0:train_size]
    idx_validation = idx[train_size:]
    
    x_train = combined_images[idx_train,:]
    y_train = combined_labels[idx_train,:]
    
    x_validation  = combined_images[idx_validation,:]
    y_validation = combined_labels[idx_validation,:]
    
    return x_train,y_train,x_validation,y_validation

In [9]:
img_size = 28
img_size_flat = img_size * img_size

img_shape = (img_size, img_size)
num_channels = 1

num_classes = 10

In [10]:
def plot_images(images,                  
                cls_true,                
                ensemble_cls_pred=None,  
                best_cls_pred=None):     

    assert len(images) == len(cls_true)
    
    fig, axes = plt.subplots(3, 3)

    if ensemble_cls_pred is None:
        hspace = 0.3
    else:
        hspace = 1.0
    fig.subplots_adjust(hspace=hspace, wspace=0.3)
    
    for i, ax in enumerate(axes.flat):
        if i < len(images):
            # Plot image.
            ax.imshow(images[i].reshape(img_shape), cmap='binary')

            if ensemble_cls_pred is None:
                xlabel = "True: {0}".format(cls_true[i])
            else:
                msg = "True: {0}\nEnsemble: {1}\nBest Net: {2}"
                xlabel = msg.format(cls_true[i],
                                    ensemble_cls_pred[i],
                                    best_cls_pred[i])
            ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

In [11]:
x = tf.placeholder(tf.float32,shape=[None,img_size_flat],name='x')

## VV IMP--> Tensorfloe expects convolution layers to be encoded in 4 dim tensor [Num_images,img_height,img_weight,num_channels]
x_image = tf.reshape(x,[-1,img_size,img_size,num_channels])

In [12]:
y_true = tf.placeholder(tf.float32,shape=[None,10],name='y')

y_true_cls = tf.argmax(y_true,axis=1)

In [13]:
x_pretty = pt.wrap(x_image)  ## preety rensor wrapped around our x to create a tensor

In [14]:
with pt.defaults_scope(activation_fn=tf.nn.relu):  ## Now default is relu activation  ##y_pred is in HOT encoded form## depth is --> no of filters
    y_pred,loss = x_pretty.conv2d(kernel=5,depth=16,name='layer_conv1').max_pool(kernel=2,stride=2).\
        conv2d(kernel=5,depth=36,name='layer_conv2').\
        max_pool(kernel=2,stride=2).\
        flatten().\
        fully_connected(size=128,name='layer_fc1').\
        softmax_classifier(num_classes=num_classes,labels=y_true)
        

In [15]:
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)

In [16]:
y_pred_cls = tf.argmax(y_pred,axis=1)

In [17]:
correct_prediction = tf.equal(y_true_cls,y_pred_cls)  ## A vector of booleans

accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))  ## reduce_mean is average and bool is casted to float

In [18]:
saver = tf.train.Saver(max_to_keep=100)

save_dir = 'checkpoints/'

In [19]:
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [20]:
def get_save_path(net_number):
    return save_dir + 'network' + str(net_number)

In [21]:
session = tf.Session()
def init_variables():
    session.run(tf.initialize_all_variables())

In [22]:
train_batch_size = 64

def random_batch(x_train, y_train):
    num_images = len(x_train)

    idx = np.random.choice(num_images, size=train_batch_size, replace=False)
    
    x_batch = x_train[idx, :]  # Images.
    y_batch = y_train[idx, :]  # Labels.

    # Return the batch.
    return x_batch, y_batch

In [23]:
def optimize(num_iterations, x_train, y_train):
    start_time = time.time()

    for i in range(num_iterations):

        x_batch, y_true_batch = random_batch(x_train, y_train)
        feed_dict_train = {x: x_batch,
                           y_true: y_true_batch}
        session.run(optimizer, feed_dict=feed_dict_train)
        if i % 100 == 0:
            acc = session.run(accuracy, feed_dict=feed_dict_train)
            
            msg = "Optimization Iteration: {0:>6}, Training Batch Accuracy: {1:>6.1%}"

            print(msg.format(i + 1, acc))

    end_time = time.time()

    time_dif = end_time - start_time

    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

In [24]:
num_networks = 5

num_iterations = 10000

In [25]:
if True:
    for i in range(num_networks):
        print("Neural network: {0}".format(i))

        x_train, y_train, _, _ = random_training_set()

        session.run(tf.global_variables_initializer())

        optimize(num_iterations=num_iterations,
                 x_train=x_train,
                 y_train=y_train)

        saver.save(sess=session, save_path=get_save_path(i))

        # Print newline.
        print()

Neural network: 0
Optimization Iteration:      1, Training Batch Accuracy:  14.1%
Optimization Iteration:    101, Training Batch Accuracy:  92.2%
Optimization Iteration:    201, Training Batch Accuracy:  90.6%
Optimization Iteration:    301, Training Batch Accuracy:  98.4%
Optimization Iteration:    401, Training Batch Accuracy:  90.6%
Optimization Iteration:    501, Training Batch Accuracy:  92.2%
Optimization Iteration:    601, Training Batch Accuracy:  95.3%
Optimization Iteration:    701, Training Batch Accuracy:  93.8%
Optimization Iteration:    801, Training Batch Accuracy:  98.4%
Optimization Iteration:    901, Training Batch Accuracy:  96.9%
Optimization Iteration:   1001, Training Batch Accuracy:  93.8%
Optimization Iteration:   1101, Training Batch Accuracy: 100.0%
Optimization Iteration:   1201, Training Batch Accuracy:  96.9%
Optimization Iteration:   1301, Training Batch Accuracy: 100.0%
Optimization Iteration:   1401, Training Batch Accuracy:  98.4%
Optimization Iteration

Optimization Iteration:   2801, Training Batch Accuracy:  98.4%
Optimization Iteration:   2901, Training Batch Accuracy: 100.0%
Optimization Iteration:   3001, Training Batch Accuracy: 100.0%
Optimization Iteration:   3101, Training Batch Accuracy:  98.4%
Optimization Iteration:   3201, Training Batch Accuracy: 100.0%
Optimization Iteration:   3301, Training Batch Accuracy: 100.0%
Optimization Iteration:   3401, Training Batch Accuracy: 100.0%
Optimization Iteration:   3501, Training Batch Accuracy: 100.0%
Optimization Iteration:   3601, Training Batch Accuracy: 100.0%
Optimization Iteration:   3701, Training Batch Accuracy: 100.0%
Optimization Iteration:   3801, Training Batch Accuracy:  96.9%
Optimization Iteration:   3901, Training Batch Accuracy: 100.0%
Optimization Iteration:   4001, Training Batch Accuracy:  98.4%
Optimization Iteration:   4101, Training Batch Accuracy:  98.4%
Optimization Iteration:   4201, Training Batch Accuracy:  98.4%
Optimization Iteration:   4301, Training

Optimization Iteration:   5601, Training Batch Accuracy: 100.0%
Optimization Iteration:   5701, Training Batch Accuracy: 100.0%
Optimization Iteration:   5801, Training Batch Accuracy: 100.0%
Optimization Iteration:   5901, Training Batch Accuracy: 100.0%
Optimization Iteration:   6001, Training Batch Accuracy:  96.9%
Optimization Iteration:   6101, Training Batch Accuracy: 100.0%
Optimization Iteration:   6201, Training Batch Accuracy:  98.4%
Optimization Iteration:   6301, Training Batch Accuracy:  96.9%
Optimization Iteration:   6401, Training Batch Accuracy: 100.0%
Optimization Iteration:   6501, Training Batch Accuracy:  98.4%
Optimization Iteration:   6601, Training Batch Accuracy: 100.0%
Optimization Iteration:   6701, Training Batch Accuracy:  98.4%
Optimization Iteration:   6801, Training Batch Accuracy: 100.0%
Optimization Iteration:   6901, Training Batch Accuracy:  96.9%
Optimization Iteration:   7001, Training Batch Accuracy:  98.4%
Optimization Iteration:   7101, Training

Optimization Iteration:   8401, Training Batch Accuracy: 100.0%
Optimization Iteration:   8501, Training Batch Accuracy: 100.0%
Optimization Iteration:   8601, Training Batch Accuracy: 100.0%
Optimization Iteration:   8701, Training Batch Accuracy:  98.4%
Optimization Iteration:   8801, Training Batch Accuracy: 100.0%
Optimization Iteration:   8901, Training Batch Accuracy: 100.0%
Optimization Iteration:   9001, Training Batch Accuracy: 100.0%
Optimization Iteration:   9101, Training Batch Accuracy:  98.4%
Optimization Iteration:   9201, Training Batch Accuracy: 100.0%
Optimization Iteration:   9301, Training Batch Accuracy: 100.0%
Optimization Iteration:   9401, Training Batch Accuracy: 100.0%
Optimization Iteration:   9501, Training Batch Accuracy: 100.0%
Optimization Iteration:   9601, Training Batch Accuracy: 100.0%
Optimization Iteration:   9701, Training Batch Accuracy:  98.4%
Optimization Iteration:   9801, Training Batch Accuracy: 100.0%
Optimization Iteration:   9901, Training

In [35]:
### Splitting Test data into batches and finding labels

batch_size = 128

def predict_labels(images):
    num_images = len(images) ###images is the test data
    ##num_images now represent no of images in test dataset
    
    pred_label  = np.zeros(shape=(num_images,num_classes),dtype = np.float)
    ## i.e now we have defined an arrayof zeros of the above shape
    
    
    i = 0 ## this represents starting index of next batch
    
    while i < num_images:
        
        j = min(i + batch_size, num_images) ###In case i exceeds number of images
        
        feed_dict = {x: images[i:j,:]} ### Values to be fed
        
        pred_label[i:j] = session.run(y_pred,feed_dict = feed_dict)## running y_pred i.e NN and feeding thro dict
        
        i=j
        
    return pred_label
        

In [36]:
def correct_prediction(images, labels, cls_true):
    
    pred_labels = predict_labels(images=images)  ##i.e using above defined function
    
    cls_pred = np.argmax(pred_labels,axis=1) 
    
    correct = (cls_true==cls_pred)  ### Creates a boolean array
    
    return correct
    

In [37]:
##Boolean array for test data

def test_correct():
    return correct_prediction(images=data.test.images, labels = data.test.labels, cls_true=data.test.cls)

In [38]:
##Boolean array for validation data

def validation_correct():
    return correct_prediction(images=data.validation.images,
                             labels = data.validation.labels,
                             cls_true = data.validation.cls)

In [39]:
def classification_accuracy(correct): ##correctis boolean array
    return correct.mean()

In [40]:
##For test data
def test_accuracy():
    correct = test_correct()
    return classification_accuracy(correct)

In [41]:
##For Validation data
def validation_accuracy():
    correct = validation_correct()
    return classification_accuracy(correct)

In [46]:
def ensemble_predictions():
    
    pred_labels = [] ### Will store labels from all 5 NN
    
    test_accuracies = [] ## Will store test set accuracy from all 5 NN
    
    validation_accuracies = [] ## Will store validation set acuuracy from all 5 NN
    
    for i in range(num_networks):
        
        saver.restore(sess=session, save_path=get_save_path(i))
        
        test_acc = test_accuracy()
        
        test_accuracies.append(test_acc)
        
        val_acc = validation_accuracy()
        
        validation_accuracies.append(val_acc)
        
        msg = "Neural Network : {0}, Validation Set accuracy : {1:.4f}, Test Set accuracy : {2:.4f}"
        print(msg.format(i,val_acc,test_acc))
        
        pred = predict_labels(images=data.test.images)
        
        pred_labels.append(pred)
        
    return np.array(pred_labels), np.array(test_accuracies), np.array(validation_accuracies)

In [47]:
pred_labels, test_accuracies, validation_accuracies = ensemble_predictions()

INFO:tensorflow:Restoring parameters from checkpoints/network0
Neural Network : 0, Validation Set accuracy : 0.9946, Test Set accuracy : 0.9893
INFO:tensorflow:Restoring parameters from checkpoints/network1
Neural Network : 1, Validation Set accuracy : 0.9956, Test Set accuracy : 0.9893
INFO:tensorflow:Restoring parameters from checkpoints/network2
Neural Network : 2, Validation Set accuracy : 0.9932, Test Set accuracy : 0.9885
INFO:tensorflow:Restoring parameters from checkpoints/network3
Neural Network : 3, Validation Set accuracy : 0.9948, Test Set accuracy : 0.9877
INFO:tensorflow:Restoring parameters from checkpoints/network4
Neural Network : 4, Validation Set accuracy : 0.9934, Test Set accuracy : 0.9881


In [48]:
print("Mean test-set accuracy: {0:.4f}".format(np.mean(test_accuracies)))
print("Min test-set accuracy:  {0:.4f}".format(np.min(test_accuracies)))
print("Max test-set accuracy:  {0:.4f}".format(np.max(test_accuracies)))

Mean test-set accuracy: 0.9886
Min test-set accuracy:  0.9877
Max test-set accuracy:  0.9893


In [49]:
### Ensemble

ensemble_pred_labels = np.mean(pred_labels, axis=0) ##average of the predicted labels for all the networks in the ensemble
##shape (1000,10)

ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1)
###shape (1000,)
ensemble_correct = (ensemble_cls_pred == data.test.cls)

ensemble_incorrect = np.logical_not(ensemble_correct)

In [50]:
###Best NN

best_net = np.argmax(test_accuracies)

best_net_pred_labels = pred_labels[best_net, :, :]### sine pred_labels shape (5,1000,10)

best_net_cls_pred = np.argmax(best_net_pred_labels, axis=1)

best_net_correct = (best_net_cls_pred == data.test.cls)

best_net_incorrect = np.logical_not(best_net_correct)

In [51]:
np.sum(ensemble_correct)

9914

In [52]:
np.sum(best_net_correct)

9893

In [53]:
ensemble_better = np.logical_and(best_net_incorrect,
                                 ensemble_correct)
ensemble_better.sum()

32

In [54]:
best_net_better = np.logical_and(best_net_correct,
                                 ensemble_incorrect)
best_net_better.sum()

11

Therefore Ensemble--->99.1% and classifying 32 images correclt in compare with the best NN