In [1]:
import numpy as np
import tensorflow as tf
%matplotlib inline
from matplotlib import pyplot as plt
import time

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST',one_hot=True)

Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
data.test.cls = np.argmax(data.test.labels,axis=1)
data.validation.cls = np.argmax(data.validation.labels,axis=1)

In [4]:
# combining train and validation set
combined_images = np.concatenate([data.train.images,data.validation.images])
combined_labels = np.concatenate([data.train.labels,data.validation.labels])

In [5]:
print (combined_images.shape)
print (combined_labels.shape)

(60000, 784)
(60000, 10)


In [6]:
combined_size = len(combined_images)
train_size = int(0.8*combined_size)
validation_size = combined_size-train_size

In [7]:
def random_training_set():
    idx = np.random.permutation(combined_size)
    idx_train = idx[:train_size]
    idx_validation = idx[train_size:]
    
    x_train = combined_images[idx_train,:]
    y_train = combined_labels[idx_train,:]
    
    x_validation = combined_images[idx_validation,:]
    y_validation = combined_labels[idx_validation,:]
    
    return x_train,y_train , x_validation, y_validation 

In [8]:
img_size = 28
img_size_flat = img_size*img_size
img_shape = (img_size,img_size)
num_channels = 1
num_classes = 10

# MODEL 

## MODEL INPUTS

In [9]:
x = tf.placeholder(tf.float32,shape=[None,img_size_flat],name='x')
x_image = tf.reshape(x,[-1,img_size,img_size,num_channels])

y_true = tf.placeholder(tf.float32,shape=[None,num_classes],name='y')
y_true_cls = tf.arg_max(y_true,dimension=1)

## MODEL ARCHITECTURE

In [10]:
net = x_image
net = tf.layers.conv2d(inputs=net,name='layer_conv1',padding='same',filters=16,kernel_size=5,activation=tf.nn.relu)
net = tf.layers.max_pooling2d(inputs = net,pool_size=2,strides=2)
net = tf.layers.conv2d(inputs=net,name='layer_conv2',padding='same',filters=36,kernel_size=5,activation=tf.nn.relu)
net = tf.layers.max_pooling2d(inputs=net,pool_size=2,strides=2)
net = tf.contrib.layers.flatten(net)
net = tf.layers.dense(inputs=net,units=128,activation=tf.nn.relu)
net = tf.layers.dense(inputs=net,units=num_classes,activation=None)

In [11]:
logits = net
y_pred = tf.nn.softmax(logits)
y_pred_cls = tf.arg_max(y_pred,dimension=1)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y_true)
loss = tf.reduce_mean(cross_entropy)

optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)

In [12]:
correct_prediction = tf.equal(y_true_cls,y_pred_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [13]:
saver = tf.train.Saver(max_to_keep=100)
def get_save_path(net_number):
    return 'checkpoints_ensemble_gpu/network'+str(net_number)

In [14]:
train_batch_size = 64
def random_batch(x_train,y_train):
    num_images = len(x_train)
    idx = np.random.choice(num_images,size= train_batch_size , replace=False)
    x_batch = x_train[idx,:]
    y_batch = y_train[idx,:]
    return x_batch,y_batch

In [15]:
def optimize(num_iterations,x_train,y_train):
    start_time = time.time()
    for i in range(num_iterations) :
        x_batch,y_true_batch = random_batch(x_train,y_train)
        feed_dict = {x:x_batch,y_true:y_true_batch}
        session.run(optimizer,feed_dict=feed_dict)
        if i%100 == 0 :
            acc = session.run(accuracy,feed_dict=feed_dict)
            print ('Iter {0:>6} Training Accuracy {1}'.format(i+1,acc))
    end_time = time.time()
    print ('Time Usage:',round(end_time-start_time,3),' seconds')

# TF SESSION 

In [16]:
session = tf.Session()
def init_variables():
    session.run(tf.global_variables_initializer())
init_variables()

# ENSEMBLE OF NETWORKS

In [17]:
num_networks= 5
num_iterations = 4000
# keeping num_iterations small for now.

In [18]:
if True :
#     for each NN
    for i in range(num_networks):
        print ('Neural Network{0}'.format(i))
        x_train,y_train,_,_ = random_training_set()
        session.run(tf.global_variables_initializer())
        
        optimize(num_iterations,x_train=x_train,y_train=y_train)
        saver.save(sess = session,save_path=get_save_path(i))
        print ('')

Neural Network0
Iter      1 Training Accuracy 0.171875
Iter    101 Training Accuracy 0.734375
Iter    201 Training Accuracy 0.890625
Iter    301 Training Accuracy 0.953125
Iter    401 Training Accuracy 0.9375
Iter    501 Training Accuracy 0.921875
Iter    601 Training Accuracy 0.96875
Iter    701 Training Accuracy 0.90625
Iter    801 Training Accuracy 0.90625
Iter    901 Training Accuracy 0.90625
Iter   1001 Training Accuracy 0.984375
Iter   1101 Training Accuracy 0.953125
Iter   1201 Training Accuracy 0.9375
Iter   1301 Training Accuracy 0.9375
Iter   1401 Training Accuracy 0.984375
Iter   1501 Training Accuracy 0.96875
Iter   1601 Training Accuracy 0.984375
Iter   1701 Training Accuracy 0.984375
Iter   1801 Training Accuracy 0.96875
Iter   1901 Training Accuracy 0.984375
Iter   2001 Training Accuracy 0.984375
Iter   2101 Training Accuracy 0.953125
Iter   2201 Training Accuracy 0.984375
Iter   2301 Training Accuracy 0.96875
Iter   2401 Training Accuracy 1.0
Iter   2501 Training Accura

## HELPER FUNCTIONS

In [19]:
batch_size = 256
def predict_label(images) :
    num_images = len(images)
    pred_labels = np.zeros(shape=(num_images,num_classes),dtype=np.float)
    i=0
    while(i<num_images):
        j=min(i+batch_size,num_images)
        feed_dict={x:images[i:j,:]}
        
        pred_labels[i:j] = session.run(y_pred,feed_dict=feed_dict)
        i=j
    return pred_labels

In [20]:
def correct_prediction(images,labels,cls_true):
    pred_labels = predict_label(images)
    cls_pred = np.argmax(pred_labels,axis=1)
    
    correct = (cls_pred==cls_true)
    return correct

### CORRECT - test & val

In [21]:
def test_correct():
    return correct_prediction(images = data.test.images,
                              labels = data.test.labels,
                              cls_true=data.test.cls)

In [22]:
def validation_correct():
    return correct_prediction(images = data.validation.images,
                             labels = data.validation.labels,
                             cls_true = data.validation.cls)

###  ACCURACY

In [23]:
def classification_accuracy(correct):
    return correct.mean()

In [24]:
def test_accuracy():
    correct = test_correct()
    return classification_accuracy(correct)

In [25]:
def validation_accuracy():
    correct = validation_correct()
    return classification_accuracy(correct)

## RESULT ENSEMBLE

In [26]:
def ensemble_predictions():
#     for each NN
    pred_labels =[]
    test_accuracies = []
    val_accuracies = []
    
    for i in range(num_networks):
        saver.restore(sess=session,save_path=get_save_path(i))
        test_acc = test_accuracy()
        val_acc = validation_accuracy()
        test_accuracies.append(test_acc)
        val_accuracies.append(val_acc)
        print ("Network: {0}, Accuracy Validation: {1:.4f}, Test: {2:.4f}".format(i, val_acc, test_acc))
        pred = predict_label(images=data.test.images)
        pred_labels.append(pred)
        
    return np.array(pred_labels),np.array(test_accuracies),np.array(val_accuracies)

In [27]:
pred_labels, test_accuracies, val_accuracies = ensemble_predictions()

INFO:tensorflow:Restoring parameters from checkpoints_ensemble/network0
Network: 0, Accuracy Validation: 0.9854, Test: 0.9820
INFO:tensorflow:Restoring parameters from checkpoints_ensemble/network1
Network: 1, Accuracy Validation: 0.9828, Test: 0.9811
INFO:tensorflow:Restoring parameters from checkpoints_ensemble/network2
Network: 2, Accuracy Validation: 0.9846, Test: 0.9811
INFO:tensorflow:Restoring parameters from checkpoints_ensemble/network3
Network: 3, Accuracy Validation: 0.9810, Test: 0.9819
INFO:tensorflow:Restoring parameters from checkpoints_ensemble/network4
Network: 4, Accuracy Validation: 0.9822, Test: 0.9789


In [28]:
print("Mean test accuracy: {0:.4f}".format(np.mean(test_accuracies)))
print("Min test accuracy:  {0:.4f}".format(np.min(test_accuracies)))
print("Max test accuracy:  {0:.4f}".format(np.max(test_accuracies)))

Mean test accuracy: 0.9810
Min test accuracy:  0.9789
Max test accuracy:  0.9820


In [29]:
print (pred_labels.shape)

(5, 10000, 10)


## ENSEMBLE PREDICTIONS

In [30]:
ensemble_pred_labels = np.mean(pred_labels, axis=0)
print (ensemble_pred_labels.shape)

(10000, 10)


In [31]:
ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1)
print (ensemble_cls_pred.shape)

(10000,)


In [32]:
ensemble_correct = (ensemble_cls_pred == data.test.cls)
ensemble_incorrect = np.logical_not(ensemble_correct)

#### BEST NETWORK

In [33]:
print (test_accuracies)

[ 0.982   0.9811  0.9811  0.9819  0.9789]


In [34]:
best_net = np.argmax(test_accuracies)
print ('Best Network:',best_net)
print ('Test Accuracy of Best Network:',test_accuracies[best_net])

Best Network: 0
Test Accuracy of Best Network: 0.982


In [35]:
best_net_pred_labels = pred_labels[best_net, :, :]
best_net_cls_pred = np.argmax(best_net_pred_labels, axis=1)

In [36]:
best_net_correct = (best_net_cls_pred == data.test.cls)
best_net_incorrect = np.logical_not(best_net_correct)

## COMPARISON OF Ensemble & BestNetwork

In [37]:
print ('Number of Images Correctly Classified by Ensemble    :',np.sum(ensemble_correct))
print ('Number of Images Correctly Classified by BestNetwork :',np.sum(best_net_correct))

Number of Images Correctly Classified by Ensemble    : 9840
Number of Images Correctly Classified by BestNetwork : 9820


In [38]:
ensemble_better = np.logical_and(best_net_incorrect,ensemble_correct)
print (ensemble_better.sum())

54


In [39]:
best_net_better = np.logical_and(best_net_correct,ensemble_incorrect)
print (best_net_better.sum())

34
