In [1]:
import numpy as np
import os
import tensorflow as tf
from IPython.display import clear_output, Image, display, HTML
from tensorflow.contrib.layers import variance_scaling_initializer
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.data import Dataset, Iterator
from sklearn.metrics import classification_report

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

###### Do not modify here ###### 


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
n_input = X_train1.shape[1]
n_hidden = [128, 128, 128, 128, 128]
n_output = 5
learning_rate = 0.0017
n_batches = 3000
batch_size = 2048
target_names = ['Label 0', 'Label 1', 'Label 2', 'Label 3', 'Label 4']

In [3]:
# Clear graph
reset_graph()

X = tf.placeholder(tf.float32, shape = (None, n_input), name = "X")
y = tf.placeholder(tf.int32, shape = (None,), name = "y")

he_init = tf.contrib.layers.variance_scaling_initializer()

hidden1 = fully_connected(X, n_hidden[0], weights_initializer=he_init, activation_fn = tf.nn.elu, scope = "h1")
hidden2 = fully_connected(hidden1, n_hidden[1], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h2")
hidden3 = fully_connected(hidden2, n_hidden[2], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h3")
hidden4 = fully_connected(hidden3, n_hidden[3], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h4")
hidden5 = fully_connected(hidden4, n_hidden[4], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h5")
y_ = tf.contrib.layers.fully_connected(hidden5, n_output, weights_initializer = he_init, activation_fn = tf.nn.softmax, scope = "softmax")

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = y_, labels = y, name = "cross_entropy"))

training_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)

# Accuracy
predicted_class = tf.argmax(y_, 1, output_type = tf.int32)
correct_prediction = tf.equal(y, predicted_class)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [5]:
with tf.Session() as sess:
    show_graph(tf.get_default_graph().as_graph_def())

In [6]:
train_data = Dataset.from_tensor_slices((X_train1, y_train1)).batch(batch_size).repeat()
# Create an iterator, to go over the dataset
iterator = train_data.make_one_shot_iterator()
train_next_batch = iterator.get_next()

In [7]:
minimum_acc = 0.8
best_batch = None
max_checks = 7
continuous_checks = 0

In [8]:
###### Start TF session ######
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # initialize the weights 
    sess.run(tf.local_variables_initializer()) # initialize the local variables hidden in the tf.metrics.recallmethod
    
    for batch in range(n_batches):
        train, label = sess.run(train_next_batch) # Get the mini-batch data sample
        sess.run(training_op, feed_dict={X: train, y: label}) # Feed to network to train
        
        # Get best accuracy
        if batch % 100 == 0: # validate for every 100 batches
            # loss_val = sess.run(loss, feed_dict={X: X_valid1, y: y_valid1})
            acc_val = sess.run(accuracy, feed_dict={X: X_valid1, y: y_valid1})
            if acc_val > minimum_acc:
                minimum_acc = acc_val
                best_batch = batch
                continuous_checks = 0
                testing_acc = sess.run(accuracy, feed_dict={X: X_test1, y: y_test1})
                print('Batch: ' + str(best_batch))
                print('Valid accuraccy: ' + str(minimum_acc))
                print('Test acuraccy: ' + str(testing_acc) + '\n')
            else:
                continuous_checks += 1
                if continuous_checks > max_checks:
                    print('Early stopping...\n')
                    break
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={X: X_valid1, y: y_valid1})
    testing_labels = sess.run(predicted_class, feed_dict={X: X_test1, y: y_test1})
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc))
    print(classification_report(y_valid1, valid_labels, target_names=target_names))
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names))

Batch: 100
Valid accuraccy: 0.981626
Test acuraccy: 0.984433

Batch: 200
Valid accuraccy: 0.982408
Test acuraccy: 0.98813

Batch: 300
Valid accuraccy: 0.985145
Test acuraccy: 0.988325

Batch: 400
Valid accuraccy: 0.98749
Test acuraccy: 0.99066

Batch: 500
Valid accuraccy: 0.988272
Test acuraccy: 0.989687

Batch: 600
Valid accuraccy: 0.989836
Test acuraccy: 0.990465

Batch: 700
Valid accuraccy: 0.990227
Test acuraccy: 0.989881

Batch: 800
Valid accuraccy: 0.99179
Test acuraccy: 0.99027

Batch: 1400
Valid accuraccy: 0.992572
Test acuraccy: 0.991438

Early stopping...

--------------------

Validation Result
Total accuracy: 0.9925723075866699
             precision    recall  f1-score   support

    Label 0       1.00      1.00      1.00       479
    Label 1       0.99      0.99      0.99       563
    Label 2       0.98      0.99      0.98       488
    Label 3       0.99      0.98      0.99       493
    Label 4       0.99      0.99      0.99       535

avg / total       0.99      0.99

# Cross Validation

In [9]:
from sklearn.model_selection import KFold

In [10]:
X_all = np.concatenate((X_train1, X_valid1), axis=0)
y_all = np.concatenate((y_train1, y_valid1), axis=0)

## 5-Fold

In [11]:
kf_5 = KFold(n_splits=5)  # indice of 5 fold

###### Start TF session ######
with tf.Session() as sess:
    for cross_validation_round, (train_index, valid_index) in enumerate(kf_5.split(X_all, y_all)):
        X_train_folds = X_all[train_index]
        y_train_folds = y_all[train_index]
        X_valid_folds = X_all[valid_index]
        y_valid_folds = y_all[valid_index]

        train_data = Dataset.from_tensor_slices((X_train_folds, y_train_folds)).batch(batch_size).repeat()
        # Create an iterator, to go over the dataset
        iterator = train_data.make_one_shot_iterator()
        train_next_batch = iterator.get_next()

        minimum_acc = 0.8
        best_batch = None
        max_checks = 7
        continuous_checks = 0

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        for batch in range(n_batches):
            train, label = sess.run(train_next_batch) # Get the mini-batch data sample
            sess.run(training_op, feed_dict={X: train, y: label}) # Feed to network to train

            if batch % 100 == 0:
                # loss_val = sess.run(loss, feed_dict={X: X_valid1, y: y_valid1})
                acc_val = sess.run(accuracy, feed_dict={X: X_valid_folds, y: y_valid_folds})
                if acc_val > minimum_acc:
                    minimum_acc = acc_val
                    best_batch = batch
                    continuous_checks = 0
                else:
                    continuous_checks += 1
                    if continuous_checks > max_checks:
                        #print('Early stopping...')
                        break
                        
        print("Fold:{}".format(cross_validation_round+1))
        print('Best Batch: ' + str(best_batch))
        print('Valid accuraccy: ' + str(minimum_acc))
        print('Test acuraccy: ' + str(sess.run(accuracy, feed_dict={X: X_test1, y: y_test1})) + '\n')
    
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={X: X_valid_folds, y: y_valid_folds})
    testing_labels = sess.run(predicted_class, feed_dict={X: X_test1, y: y_test1})
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc))
    print(classification_report(y_valid_folds, valid_labels, target_names=target_names))
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names))

Fold:1
Best Batch: 2200
Valid accuraccy: 0.989542
Test acuraccy: 0.993384

Fold:2
Best Batch: 2200
Valid accuraccy: 0.991665
Test acuraccy: 0.99066

Fold:3
Best Batch: 1800
Valid accuraccy: 0.989214
Test acuraccy: 0.992995

Fold:4
Best Batch: 2000
Valid accuraccy: 0.987089
Test acuraccy: 0.988908

Fold:5
Best Batch: 1000
Valid accuraccy: 0.989214
Test acuraccy: 0.987157

--------------------

Validation Result
Total accuracy: 0.9892139434814453
             precision    recall  f1-score   support

    Label 0       1.00      0.99      0.99      1169
    Label 1       0.99      0.99      0.99      1302
    Label 2       0.98      0.99      0.98      1187
    Label 3       0.98      0.99      0.99      1223
    Label 4       1.00      0.98      0.99      1238

avg / total       0.99      0.99      0.99      6119

Testing Result
Total accuracy: 0.9914380311965942
             precision    recall  f1-score   support

    Label 0       0.99      0.99      0.99       980
    Label 1       1.

## 10-Fold

In [12]:
kf_10 = KFold(n_splits=10)  
i = 0
###### Start TF session ######
with tf.Session() as sess:
    for train_index, valid_index in kf_10.split(X_all, y_all):
        X_train_folds = X_all[train_index]
        y_train_folds = y_all[train_index]
        X_valid_folds = X_all[valid_index]
        y_valid_folds = y_all[valid_index]

        train_data = Dataset.from_tensor_slices((X_train_folds, y_train_folds)).batch(batch_size).repeat()
        # Create an iterator, to go over the dataset
        iterator = train_data.make_one_shot_iterator()
        train_next_batch = iterator.get_next()

        minimum_acc = 0.8
        best_batch = None
        max_checks = 7
        continuous_checks = 0
    
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        for batch in range(n_batches):
            train, label = sess.run(train_next_batch) # Get the mini-batch data sample
            sess.run(training_op, feed_dict={X: train, y: label}) # Feed to network to train

            if batch % 100 == 0:
                # loss_val = sess.run(loss, feed_dict={X: X_valid1, y: y_valid1})
                acc_val = sess.run(accuracy, feed_dict={X: X_valid_folds, y: y_valid_folds})
                if acc_val > minimum_acc:
                    minimum_acc = acc_val
                    best_batch = batch
                    continuous_checks = 0

                else:
                    continuous_checks += 1
                    if continuous_checks > max_checks:
                        # print('Early stopping...')
                        break
        print('Fold: ' + str(i+1))
        print('Best Batch: ' + str(best_batch))
        print('Valid accuraccy: ' + str(minimum_acc))
        print('Test acuraccy: ' + str(sess.run(accuracy, feed_dict={X: X_test1, y: y_test1})) + '\n')
        i += 1
        
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={X: X_valid_folds, y: y_valid_folds})
    testing_labels = sess.run(predicted_class, feed_dict={X: X_test1, y: y_test1})
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc))
    print(classification_report(y_valid_folds, valid_labels, target_names=target_names))
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names))

Fold: 1
Best Batch: 700
Valid accuraccy: 0.986601
Test acuraccy: 0.991633

Fold: 2
Best Batch: 600
Valid accuraccy: 0.989216
Test acuraccy: 0.992995

Fold: 3
Best Batch: 500
Valid accuraccy: 0.989869
Test acuraccy: 0.989881

Fold: 4
Best Batch: 800
Valid accuraccy: 0.990196
Test acuraccy: 0.991049

Fold: 5
Best Batch: 1600
Valid accuraccy: 0.987255
Test acuraccy: 0.993578

Fold: 6
Best Batch: 2000
Valid accuraccy: 0.99183
Test acuraccy: 0.989881

Fold: 7
Best Batch: 1000
Valid accuraccy: 0.986924
Test acuraccy: 0.993189

Fold: 8
Best Batch: 600
Valid accuraccy: 0.989866
Test acuraccy: 0.99066

Fold: 9
Best Batch: 1000
Valid accuraccy: 0.993462
Test acuraccy: 0.992216

Fold: 10
Best Batch: 1400
Valid accuraccy: 0.991174
Test acuraccy: 0.988714

--------------------

Validation Result
Total accuracy: 0.9911735653877258
             precision    recall  f1-score   support

    Label 0       0.99      0.99      0.99       577
    Label 1       0.99      0.98      0.99       665
    Label 2

# Dropout

Dropout rate: 0.1

In [13]:
dropout_rate = 0.1

In [19]:
# Clear graph
reset_graph()

X = tf.placeholder(tf.float32, shape = (None, n_input), name = "X")
y = tf.placeholder(tf.int32, shape = (None,), name = "y")
is_training = tf.placeholder(tf.bool, name = "is_training")

he_init = tf.contrib.layers.variance_scaling_initializer()

hidden1 = fully_connected(X, n_hidden[0], weights_initializer=he_init, activation_fn = tf.nn.elu, scope = "h1")
hidden1_drop = tf.layers.dropout(hidden1, rate = dropout_rate, training = is_training)
hidden2 = fully_connected(hidden1_drop, n_hidden[1], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h2")
hidden2_drop = tf.layers.dropout(hidden2, rate = dropout_rate, training = is_training)
hidden3 = fully_connected(hidden2_drop, n_hidden[2], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h3")
hidden3_drop = tf.layers.dropout(hidden3, rate = dropout_rate, training = is_training)
hidden4 = fully_connected(hidden3_drop, n_hidden[3], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h4")
hidden4_drop = tf.layers.dropout(hidden4, rate = dropout_rate, training = is_training)
hidden5 = fully_connected(hidden4_drop, n_hidden[4], weights_initializer = he_init, activation_fn = tf.nn.elu, scope = "h5")
hidden5_drop = tf.layers.dropout(hidden5, rate = dropout_rate, training = is_training)
y_ = tf.contrib.layers.fully_connected(hidden5_drop, n_output, weights_initializer = he_init, activation_fn = tf.nn.softmax, scope = "softmax")

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = y_, labels = y, name = "cross_entropy"))

training_op = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss)

# Accuracy
predicted_class = tf.argmax(y_, 1, output_type = tf.int32)
correct_prediction = tf.equal(y, predicted_class)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [20]:
with tf.Session() as sess:
    show_graph(tf.get_default_graph().as_graph_def())

In [21]:
train_data = Dataset.from_tensor_slices((X_train1, y_train1)).batch(batch_size).repeat()
# Create an iterator, to go over the dataset
iterator = train_data.make_one_shot_iterator()
train_next_batch = iterator.get_next()

In [22]:
minimum_acc = 0.8
best_batch = None
max_checks = 7
continuous_checks = 0

In [23]:
###### Start TF session ######
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # initialize the weights 
    sess.run(tf.local_variables_initializer()) # initialize the local variables hidden in the tf.metrics.recallmethod
    
    for batch in range(n_batches):
        train, label = sess.run(train_next_batch) # Get the mini-batch data sample
        sess.run(training_op, feed_dict={X: train, y: label, is_training: True}) # Feed to network to train
        
        # Get best accuracy
        if batch % 100 == 0: # validate for every 100 batches
            # loss_val = sess.run(loss, feed_dict={X: X_valid1, y: y_valid1})
            acc_val = sess.run(accuracy, feed_dict={X: X_valid1, y: y_valid1, is_training: False})
            if acc_val > minimum_acc:
                minimum_acc = acc_val
                best_batch = batch
                continuous_checks = 0
                testing_acc = sess.run(accuracy, feed_dict={X: X_test1, y: y_test1, is_training: False})
                print('Batch: ' + str(best_batch))
                print('Valid accuraccy: ' + str(minimum_acc))
                print('Test acuraccy: ' + str(testing_acc) + '\n')
            else:
                continuous_checks += 1
                if continuous_checks > max_checks:
                    print('Early stopping...\n')
                    break
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={X: X_valid1, y: y_valid1, is_training: False})
    testing_labels = sess.run(predicted_class, feed_dict={X: X_test1, y: y_test1, is_training: False})
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc))
    print(classification_report(y_valid1, valid_labels, target_names=target_names))
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names))

Batch: 100
Valid accuraccy: 0.980063
Test acuraccy: 0.982292

Batch: 200
Valid accuraccy: 0.983972
Test acuraccy: 0.985795

Batch: 300
Valid accuraccy: 0.986317
Test acuraccy: 0.98813

Batch: 500
Valid accuraccy: 0.986708
Test acuraccy: 0.98813

Batch: 600
Valid accuraccy: 0.988272
Test acuraccy: 0.991243

Batch: 800
Valid accuraccy: 0.990227
Test acuraccy: 0.991049

Batch: 900
Valid accuraccy: 0.99179
Test acuraccy: 0.991827

Early stopping...

--------------------

Validation Result
Total accuracy: 0.9917904734611511
             precision    recall  f1-score   support

    Label 0       1.00      1.00      1.00       479
    Label 1       0.99      0.99      0.99       563
    Label 2       0.98      0.99      0.98       488
    Label 3       0.99      0.99      0.99       493
    Label 4       0.99      0.99      0.99       535

avg / total       0.99      0.99      0.99      2558

Testing Result
Total accuracy: 0.9918271899223328
             precision    recall  f1-score   suppor