In [8]:
import numpy as np
import os
import tensorflow as tf
from IPython.display import clear_output, Image, display, HTML
from tensorflow.contrib.layers import variance_scaling_initializer
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.data import Dataset, Iterator
from sklearn.metrics import classification_report

###### Do not modify here ###### 
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = graph_def
    #strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
###### Do not modify  here ######

###### Do not modify here ###### 

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

reset_graph()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# training on MNIST but only on digits 0 to 4
X_train1 = mnist.train.images[mnist.train.labels < 5]
y_train1 = mnist.train.labels[mnist.train.labels < 5]
X_valid1 = mnist.validation.images[mnist.validation.labels < 5]
y_valid1 = mnist.validation.labels[mnist.validation.labels < 5]
X_test1 = mnist.test.images[mnist.test.labels < 5]
y_test1 = mnist.test.labels[mnist.test.labels < 5]

###### Do not modify here ###### 



Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
# define Hyper Parameter
n_input = X_train1.shape[1] # the features num
n_hidden = [128, 128, 128, 128, 128] # define the dimensions for our five fully connected layer
n_output = 5 # define five one-hot encoding output which are 0,1,2,3,4
learning_rate = 0.0017 # set the learning reate 
batch_size = 2048 # define the size for each mini batch
n_batches = 3000 # define the max round for batches
target_names = ['Label 0', 'Label 1', 'Label 2', 'Label 3', 'Label 4'] # defined the class name used in classification report

# No Dropout

### Define Graph

In [46]:
def fully_connected_layer(x, dims, activate, name, mode=False, dropout=None):
    '''
    [Function Input]
    x        "Tensor" : The input tensor
    dims     "List"   : The dimension of Input and Output
    activate "String" : The name of activate function
    name     "String" : The name of this operation
    
    [Function Output]
    The Result of Linear Function + Activate Function 
    '''
    with tf.variable_scope(name):

        # Define Activate Function, we can dynamic assign the activation function
        if activate == 'elu':  # shch as elu
            activation = tf.nn.elu # used in each hidden layer
        elif activate == 'softmax': # or softmax
            activation = tf.nn.softmax # used in the last logits layer
        
        #Build Layer
        output = tf.layers.dense(inputs=x, # assign x to input
                                 units=dims[1], #with dimension for each layer
                                 activation=activation, # defile the activation function
                                 kernel_initializer=tf.contrib.layers.variance_scaling_initializer(), # special initializer, which help to fight gradient vanish/explosion
                                 name="Neural_Operation" # give this layer a name
                                )  # Layer using dense
        if dropout: # the variable to design whether to use dropout or not
            output = tf.layers.dropout(output, rate=dropout, training=mode, name="Dropout") # set the dropout layer and give a name 
    return output # return the output of this operation unit(node)


# Clear graph
reset_graph() # clean the tensorflow graph

# Define Input Entry
x = tf.placeholder(tf.float32, shape=(None, n_input), name="Input_X") # Input
y = tf.placeholder(tf.int32, shape=(None,), name ="Input_Label")      # Labels
mode = tf.placeholder(tf.bool, name ="Mode")                          # Train or Inference Mode

# Define Connect Layers
x1 = fully_connected_layer(x , [n_input, n_hidden[0]], "elu", "Fully_Connected_Layer_1", mode)     # 1st 128 nodes Layer
x2 = fully_connected_layer(x1, [n_hidden[0], n_hidden[1]], "elu", "Fully_Connected_Layer_2", mode) # 2nd 128 nodes Layer
x3 = fully_connected_layer(x2, [n_hidden[1], n_hidden[2]], "elu", "Fully_Connected_Layer_3", mode) # 3rd 128 nodes Layer
x4 = fully_connected_layer(x3, [n_hidden[2], n_hidden[3]], "elu", "Fully_Connected_Layer_4", mode) # 4th 128 nodes Layer
x5 = fully_connected_layer(x4, [n_hidden[3], n_hidden[4]], "elu", "Fully_Connected_Layer_5", mode) # 5th 128 nodes Layer
y_ = fully_connected_layer(x5, [n_hidden[4], n_output], "softmax", "Softmax_Layer")                # 128 to 5 class with softmax

# Define Loss Function
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_, name="Cross_Entropy")) # given the ground truth y, predict output y_, we utilizing the sparse_softmax_cross_entropy_with_logits to calculate the cross entropy between them. Following take the average of all the sample.

# Define Training Process
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) # as we define the loss function above. we using the adam optimizer to minimize our defined loss function

# Define Accuracy
predicted_class = tf.argmax(y_,1, output_type=tf.int32)         # get the index of max value from logits vector, which is the predicted label
correct_predict = tf.equal(y, predicted_class)                  # [True, False ..., True]
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) # [True, False ..., True] --> [1,0,...,1]


### Originally we calculate precision and recall by ourselves in tensorflow graph.
### Afterwards, we directly utilize sklearn.metrics.classification_report to show the results.
### The following code is just for reference.

# Define Precision
# precision = tf.metrics.precision(y, predicted_class)
# """
# GET TP + FP
# 1. get the indices of predicted class that is specific label(ex:0)

# GET TP/(TP + FP)
# 2. get the correspondind ground true label from indices 
# 3. check if they are also match the label
# 4. take average
# """
# prec_zer = tf.reduce_mean(tf.cast(tf.equal(0, tf.gather(y , tf.where(tf.equal(0, predicted_class)))), tf.float32))
# prec_one = tf.reduce_mean(tf.cast(tf.equal(1, tf.gather(y , tf.where(tf.equal(1, predicted_class)))), tf.float32))
# prec_two = tf.reduce_mean(tf.cast(tf.equal(2, tf.gather(y , tf.where(tf.equal(2, predicted_class)))), tf.float32))
# prec_thi = tf.reduce_mean(tf.cast(tf.equal(3, tf.gather(y , tf.where(tf.equal(3, predicted_class)))), tf.float32))
# prec_fou = tf.reduce_mean(tf.cast(tf.equal(4, tf.gather(y , tf.where(tf.equal(4, predicted_class)))), tf.float32))


# Define Recall
# recall = tf.metrics.recall(y, predicted_class)
# """
# GET TP + FN
# 1. get the indices of ground true label that is specific label(ex:0)

# GET TP/(TP + FN)
# 2. get the correspondind predicted class from indices 
# 3. check if they are also match the label
# 4. take average
# """
# recall_zer = tf.reduce_mean(tf.cast(tf.equal(0, tf.gather(predicted_class , tf.where(tf.equal(0, y)))), tf.float32))
# recall_one = tf.reduce_mean(tf.cast(tf.equal(1, tf.gather(predicted_class , tf.where(tf.equal(1, y)))), tf.float32))
# recall_two = tf.reduce_mean(tf.cast(tf.equal(2, tf.gather(predicted_class , tf.where(tf.equal(2, y)))), tf.float32))
# recall_thi = tf.reduce_mean(tf.cast(tf.equal(3, tf.gather(predicted_class , tf.where(tf.equal(3, y)))), tf.float32))
# recall_fou = tf.reduce_mean(tf.cast(tf.equal(4, tf.gather(predicted_class , tf.where(tf.equal(4, y)))), tf.float32))

In [47]:
with tf.Session() as sess:
    show_graph(tf.get_default_graph().as_graph_def())

In [21]:
# prepare the training batch
train_data = Dataset.from_tensor_slices((X_train1, y_train1)).batch(batch_size).repeat() # to provide the sample operation, we using the mini-batch approach. first assign the train and label dataset in to the function. assign the size of eatch batch. allow the data to repeat again and again.
iterator = train_data.make_one_shot_iterator() # Create an iterator, to go over the dataset
train_next_batch = iterator.get_next() # calling this function to get next batch dataset

In [22]:
saver = tf.train.Saver() # to store the model

In [23]:
# Model Hyper Parameter to save the best model
minimum_acc = 0.8 # the accuracy of model must at least better than 0.8
best_batch = None # the variable to keep the best_batch number
max_checks = 7 # define how much round didn't better than the best, and we would stop the training
continuous_checks = 0 # to keep the number of round that didn't better than the best accuracy

In [24]:
###### Start TF session ######
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # initialize the weights 
    sess.run(tf.local_variables_initializer())  # initialize the local variables hidden in the tf.metrics.recallmethod
    
    for batch in range(n_batches):
        train, label = sess.run(train_next_batch)                        # Get the mini-batch data sample
        sess.run(train_step, feed_dict={x: train, y: label, mode:False}) # Feed to network to train
        
        # Get best accuracy
        if batch % 100 == 0: # validate for every 100 batches
            loss_val = sess.run(cross_entropy, feed_dict={x: X_valid1, y: y_valid1, mode:False})
            acc_val = sess.run(accuracy, feed_dict={x: X_valid1, y: y_valid1, mode:False})
            
            # implement early stopping
            if acc_val > minimum_acc:  # checking current validation result better than minimum accuracy
                minimum_acc = acc_val  # update the minimum accuracy
                best_batch = batch     # update the best batch
                continuous_checks = 0  # reset the number of checks
                save_path = saver.save(sess, "./Models/Team26_HW2.ckpt") #keep the best model
                testing_acc = sess.run(accuracy, feed_dict={x: X_test1, y: y_test1, mode:False}) # get accuracy from model
                print('Best Batch: ' + str(best_batch))
                print('Valid accuraccy: ' + str(minimum_acc))
                print('Test acuraccy: ' + str(testing_acc) + '\n')
                
            else:                                  # every time the current validation result is not better than the current best valid accuracy
                continuous_checks += 1             # we increment the fail count by 1
                if continuous_checks > max_checks: # if total count larger than max chechs, we consider the model wont impove anymore
                    print('Early stopping...\n')
                    break                          # stop the training step
                    
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={x: X_valid1, y: y_valid1, mode:False})   # predicted labels from validation dataset
    testing_labels = sess.run(predicted_class, feed_dict={x: X_test1, y: y_test1, mode:False})   # predicted labels from testing dataset
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc))
    print(classification_report(y_valid1, valid_labels, target_names=target_names))  # show validation results using classification report
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names)) # show testing results using classification report

Best Batch: 100
Valid accuraccy: 0.980453
Test acuraccy: 0.982876

Best Batch: 200
Valid accuraccy: 0.986317
Test acuraccy: 0.986379

Best Batch: 300
Valid accuraccy: 0.98749
Test acuraccy: 0.988908

Best Batch: 500
Valid accuraccy: 0.990618
Test acuraccy: 0.990076

Best Batch: 600
Valid accuraccy: 0.992963
Test acuraccy: 0.991632

Best Batch: 700
Valid accuraccy: 0.993354
Test acuraccy: 0.991438

Early stopping...

--------------------

Validation Result
Total accuracy: 0.9933541417121887
             precision    recall  f1-score   support

    Label 0       1.00      1.00      1.00       479
    Label 1       0.99      0.99      0.99       563
    Label 2       0.99      0.98      0.99       488
    Label 3       1.00      0.99      0.99       493
    Label 4       0.99      1.00      0.99       535

avg / total       0.99      0.99      0.99      2558

Testing Result
Total accuracy: 0.9914379715919495
             precision    recall  f1-score   support

    Label 0       1.00     

# Cross Validation

Here we choose sklearn.model_selection.KFold to implement cross validation.
<br>
KFold provides two index to split data into training set and testing(validating) set.
<br>
The whole dataset will be separated into K folds.
<br>
Each fold is utilized as a validation set once while the k - 1 remaining folds are for the training set. 

In [25]:
from sklearn.model_selection import KFold # import this function to proform the K fold validation

In [26]:
"""
1. concatenate both X and Y
2. using KFold to split both X and Y to n fold with different propotion
"""
X_all = np.concatenate((X_train1, X_valid1), axis=0) # concatenate X
y_all = np.concatenate((y_train1, y_valid1), axis=0) # concatenate Y

## 5-Fold without dropout

In [39]:
best_acc = 0.8
total_valid_accuracy = []
total_test_accuracy  = []

In [40]:
kf_5 = KFold(n_splits=5)  # indice of 5 fold
saver = tf.train.Saver() # to store the model

###### Start TF session ######
with tf.Session() as sess:
    for cross_validation_round, (train_index, valid_index) in enumerate(kf_5.split(X_all, y_all)):
        X_train_folds = X_all[train_index]  # define training set for input X
        y_train_folds = y_all[train_index]  # define training set for label y
        X_valid_folds = X_all[valid_index]  # define valid set for input X
        y_valid_folds = y_all[valid_index]  # define valid set for label y
        
        # re-prepare the training and valid dataset
        train_data = Dataset.from_tensor_slices((X_train_folds, y_train_folds)).batch(batch_size).repeat()
        iterator = train_data.make_one_shot_iterator()  # Create an iterator, to go over the dataset
        train_next_batch = iterator.get_next()
        
        minimum_acc = 0.8
        best_batch = None
        max_checks = 5
        continuous_checks = 0

        sess.run(tf.global_variables_initializer())  # initialize the weights 
        sess.run(tf.local_variables_initializer())   # initialize the local variables hidden in the tf.metrics.recallmethod

        for batch in range(n_batches):
            train, label = sess.run(train_next_batch)                         # Get the mini-batch data sample
            sess.run(train_step, feed_dict={x: train, y: label, mode: False}) # Feed to network to train

            # Get best accuracy
            if batch % 100 == 0:
                acc_val = sess.run(accuracy, feed_dict={x: X_valid_folds, y: y_valid_folds, mode: False})
                
                # implement early stopping
                if acc_val > minimum_acc:   # checking current validation result better than minimum accuracy
                    minimum_acc = acc_val   # update the minimum accuracy
                    best_batch = batch      # update the best batch
                    continuous_checks = 0   # reset the number of checks
                    if acc_val > best_acc:  # if the current accuracy is higher than the accuracy of previous fold
                        best_acc = acc_val
                        save_path = saver.save(sess, "./Models/5-fold/Team26_HW2.ckpt") #keep the best model
                    
                else:                                    # every time the current validation result is not better than the current best valid accuracy
                    continuous_checks += 1               # we increment the fail count by 1
                    if continuous_checks > max_checks:   # if total count larger than max chechs, we consider the model wont impove anymore
                        # print('Early stopping...')
                        break                            # stop the training step
        
        # show valid accuraccy and test accuracy for each fold
        testing_acc = sess.run(accuracy, feed_dict={x: X_test1, y: y_test1, mode: False})
        print("Fold:{}".format(cross_validation_round+1))
        print('Best Batch: ' + str(best_batch))
        print('Valid accuraccy: ' + str(minimum_acc))
        print('Test acuraccy: ' + str(testing_acc) + '\n')
        
        # Record the performance for each fold
        total_valid_accuracy.append(minimum_acc)
        total_test_accuracy.append(testing_acc)
       
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={x: X_valid_folds, y: y_valid_folds, mode: False})  # predicted labels from validation dataset
    testing_labels = sess.run(predicted_class, feed_dict={x: X_test1, y: y_test1, mode: False})            # predicted labels from testing dataset
    print('Validation Result\nAverage accuracy: {}'.format(sum(total_valid_accuracy)/len(total_valid_accuracy)))
    print(classification_report(y_valid_folds, valid_labels, target_names=target_names))  # show validation results using classification report
    print('Testing Result\nAverage accuracy: {}'.format(sum(total_test_accuracy)/len(total_test_accuracy)))
    print(classification_report(y_test1, testing_labels, target_names=target_names))      # show testing results using classification report

Fold:1
Best Batch: 2800
Valid accuraccy: 0.990523
Test acuraccy: 0.99027

Fold:2
Best Batch: 700
Valid accuraccy: 0.990685
Test acuraccy: 0.989881

Fold:3
Best Batch: 800
Valid accuraccy: 0.988234
Test acuraccy: 0.991049

Fold:4
Best Batch: 2100
Valid accuraccy: 0.989868
Test acuraccy: 0.992605

Fold:5
Best Batch: 1700
Valid accuraccy: 0.990848
Test acuraccy: 0.987741

--------------------

Validation Result
Average accuracy: 0.9900315403938293
             precision    recall  f1-score   support

    Label 0       0.99      0.99      0.99      1169
    Label 1       0.98      0.99      0.99      1302
    Label 2       0.98      0.98      0.98      1187
    Label 3       0.99      0.98      0.99      1223
    Label 4       0.99      0.99      0.99      1238

avg / total       0.99      0.99      0.99      6119

Testing Result
Average accuracy: 0.9903093338012695
             precision    recall  f1-score   support

    Label 0       0.99      0.99      0.99       980
    Label 1       

## 10-Fold without dropout

In [48]:
best_acc = 0.8
total_valid_accuracy = []
total_test_accuracy  = []

In [49]:
kf_10 = KFold(n_splits=10)  # indice of 10 fold
saver = tf.train.Saver() # to store the model
best_acc = 0.8
###### Start TF session ######
with tf.Session() as sess:
    for cross_validation_round, (train_index, valid_index) in enumerate(kf_10.split(X_all, y_all)):
        X_train_folds = X_all[train_index]  # define training set for input X
        y_train_folds = y_all[train_index]  # define training set for label y
        X_valid_folds = X_all[valid_index]  # define valid set for input X
        y_valid_folds = y_all[valid_index]  # define valid set for label y

        # re-prepare the training and valid dataset
        train_data = Dataset.from_tensor_slices((X_train_folds, y_train_folds)).batch(batch_size).repeat()
        iterator = train_data.make_one_shot_iterator()  # Create an iterator, to go over the dataset
        train_next_batch = iterator.get_next()
        

        minimum_acc = 0.8
        best_batch = None
        max_checks = 5
        continuous_checks = 0
    
        sess.run(tf.global_variables_initializer())  # initialize the weights 
        sess.run(tf.local_variables_initializer())   # initialize the local variables hidden in the tf.metrics.recallmethod

        for batch in range(n_batches):
            train, label = sess.run(train_next_batch)                         # Get the mini-batch data sample
            sess.run(train_step, feed_dict={x: train, y: label, mode: False}) # Feed to network to train

            # Get best accuracy
            if batch % 100 == 0:
                acc_val = sess.run(accuracy, feed_dict={x: X_valid_folds, y: y_valid_folds, mode: False})
                
                # implement early stopping
                if acc_val > minimum_acc:   # checking current validation result better than minimum accuracy
                    minimum_acc = acc_val   # update the minimum accuracy
                    best_batch = batch      # update the best batch
                    continuous_checks = 0   # reset the number of checks
                    if acc_val > best_acc:  # if the current accuracy is higher than the accuracy of previous fold
                        best_acc = acc_val
                        save_path = saver.save(sess, "./Models/10-fold/Team26_HW2.ckpt") #keep the best model

                else:                                   # every time the current validation result is not better than the current best valid accuracy
                    continuous_checks += 1              # we increment the fail count by 1
                    if continuous_checks > max_checks:  # if total count larger than max chechs, we consider the model wont impove anymore
                        # print('Early stopping...')
                        break                           # stop the training step
        
        # show valid accuraccy and test accuracy for each fold
        testing_acc = sess.run(accuracy, feed_dict={x: X_test1, y: y_test1, mode: False})
        print("Fold:{}".format(cross_validation_round+1))
        print('Best Batch: ' + str(best_batch))
        print('Valid accuraccy: ' + str(minimum_acc))
        print('Test acuraccy: ' + str(testing_acc) + '\n')
        
        # Record the performance for each fold
        total_valid_accuracy.append(minimum_acc)
        total_test_accuracy.append(testing_acc)
        
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={x: X_valid_folds, y: y_valid_folds, mode: False}) # predicted labels from validation dataset
    testing_labels = sess.run(predicted_class, feed_dict={x: X_test1, y: y_test1, mode: False})           # predicted labels from testing dataset
    print('Validation Result\nAverage accuracy: {}'.format(sum(total_valid_accuracy)/len(total_valid_accuracy)))
    print(classification_report(y_valid_folds, valid_labels, target_names=target_names))  # show validation results using classification report
    print('Testing Result\nAverage accuracy: {}'.format(sum(total_test_accuracy)/len(total_test_accuracy)))
    print(classification_report(y_test1, testing_labels, target_names=target_names))      # show testing results using classification report

Fold:1
Best Batch: 1400
Valid accuraccy: 0.989216
Test acuraccy: 0.991827

Fold:2
Best Batch: 500
Valid accuraccy: 0.986928
Test acuraccy: 0.991633

Fold:3
Best Batch: 800
Valid accuraccy: 0.990196
Test acuraccy: 0.992216

Fold:4
Best Batch: 600
Valid accuraccy: 0.990196
Test acuraccy: 0.990854

Fold:5
Best Batch: 900
Valid accuraccy: 0.986601
Test acuraccy: 0.986768

Fold:6
Best Batch: 600
Valid accuraccy: 0.990523
Test acuraccy: 0.991633

Fold:7
Best Batch: 500
Valid accuraccy: 0.987251
Test acuraccy: 0.993189

Fold:8
Best Batch: 900
Valid accuraccy: 0.99052
Test acuraccy: 0.986768

Fold:9
Best Batch: 800
Valid accuraccy: 0.994443
Test acuraccy: 0.991049

Fold:10
Best Batch: 2600
Valid accuraccy: 0.992808
Test acuraccy: 0.991243

--------------------

Validation Result
Average accuracy: 0.9898681521415711
             precision    recall  f1-score   support

    Label 0       0.99      1.00      1.00       577
    Label 1       1.00      0.99      0.99       665
    Label 2       0.9

# Dropout

Here we apply dropout rate: 0.1 to check whether the performance become better.

In [50]:
dropout = 0.1

In [57]:
# Clear graph
tf.reset_default_graph()

# Define Input Entry
x = tf.placeholder(tf.float32, shape=(None, 784), name="Input_X") # Input
y = tf.placeholder(tf.int32, shape=(None,), name ="Input_Label")  # Labels
mode = tf.placeholder(tf.bool, name ="Mode")                      # Train or Inference Mode

# Define Connect Layers
x1 = fully_connected_layer(x , [784, 128], "elu", "Fully_Connected_Layer_1", mode, dropout) # 1st 128 nodes Layer with dropout
x2 = fully_connected_layer(x1, [128, 128], "elu", "Fully_Connected_Layer_2", mode, dropout) # 2nd 128 nodes Layer with dropout
x3 = fully_connected_layer(x2, [128, 128], "elu", "Fully_Connected_Layer_3", mode, dropout) # 3rd 128 nodes Layer with dropout
x4 = fully_connected_layer(x3, [128, 128], "elu", "Fully_Connected_Layer_4", mode, dropout) # 4th 128 nodes Layer with dropout
x5 = fully_connected_layer(x4, [128, 128], "elu", "Fully_Connected_Layer_5", mode, dropout) # 5th 128 nodes Layer with dropout
y_ = fully_connected_layer(x5, [128, 5], "softmax", "Softmax_Layer")                        # 128 to 5 class with softmax

# Define Loss Function
cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_, name="Cross_Entropy"))

# Define Training Process
train_step = tf.train.AdamOptimizer(0.0017).minimize(cross_entropy)

# Define Accuracy
predicted_class = tf.argmax(y_,1, output_type=tf.int32)
correct_predict = tf.equal(y, predicted_class)                  # [True, False ..., True]
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) # [True, False ..., True] --> [1,0,...,1]


In [58]:
with tf.Session() as sess:
    show_graph(tf.get_default_graph().as_graph_def())

In [59]:
# prepare the training batch
rain_data = Dataset.from_tensor_slices((X_train1, y_train1)).batch(batch_size).repeat()
iterator = train_data.make_one_shot_iterator()  # Create an iterator, to go over the dataset
train_next_batch = iterator.get_next()

In [60]:
saver = tf.train.Saver() # to store the model

In [61]:
# Model Hyper Parameter
minimum_acc = 0.8
best_batch = None
max_checks = 5
continuous_checks = 0

In [62]:
###### Start TF session ######
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    for batch in range(n_batches):
        train, label = sess.run(train_next_batch)                        # Get the mini-batch data sample
        sess.run(train_step, feed_dict={x: train, y: label, mode: True}) # Feed to network to train
        
        # Get best accuracy
        if batch % 100 == 0:
            acc_val = sess.run(accuracy, feed_dict={x: X_valid1, y: y_valid1, mode: False})
            
            # implement early stopping
            if acc_val > minimum_acc:   # checking current validation result better than minimum accuracy
                minimum_acc = acc_val   # update the minimum accuracy
                best_batch = batch      # update the best batch
                continuous_checks = 0   # reset the number of checks
                save_path = saver.save(sess, "./Models/dropout/Team26_HW2.ckpt") #keep the best model
                testing_acc = sess.run(accuracy, feed_dict={x: X_test1, y: y_test1, mode: False})
                print('Batch: ' + str(best_batch))
                print('Valid accuraccy: ' + str(minimum_acc))
                print('Test acuraccy: ' + str(testing_acc) + '\n')
            else:                                   # every time the current validation result is not better than the current best valid accuracy
                continuous_checks += 1              # we increment the fail count by 1
                if continuous_checks > max_checks:  # if total count larger than max chechs, we consider the model wont impove anymore
                    print('Early stopping...\n')
                    break                           # stop the training step
    
    print('--------------------\n')
    valid_labels = sess.run(predicted_class, feed_dict={x: X_valid1, y: y_valid1, mode: False})  # predicted labels from validation dataset
    testing_labels = sess.run(predicted_class, feed_dict={x: X_test1, y: y_test1, mode: False})  # predicted labels from testing dataset
    print('Validation Result\nTotal accuracy: {}'.format(minimum_acc)) 
    print(classification_report(y_valid1, valid_labels, target_names=target_names))              # show validation results using classification report
    print('Testing Result\nTotal accuracy: {}'.format(testing_acc))
    print(classification_report(y_test1, testing_labels, target_names=target_names))             # show testing results using classification report

Batch: 100
Valid accuraccy: 0.976935
Test acuraccy: 0.978984

Batch: 200
Valid accuraccy: 0.983581
Test acuraccy: 0.9856

Batch: 300
Valid accuraccy: 0.984754
Test acuraccy: 0.98813

Batch: 400
Valid accuraccy: 0.986317
Test acuraccy: 0.989297

Batch: 500
Valid accuraccy: 0.989054
Test acuraccy: 0.99027

Batch: 700
Valid accuraccy: 0.989445
Test acuraccy: 0.990076

Batch: 800
Valid accuraccy: 0.989445
Test acuraccy: 0.99027

Batch: 1000
Valid accuraccy: 0.99179
Test acuraccy: 0.990465

Batch: 1500
Valid accuraccy: 0.99179
Test acuraccy: 0.991633

Early stopping...

--------------------

Validation Result
Total accuracy: 0.9917904138565063
             precision    recall  f1-score   support

    Label 0       0.99      1.00      0.99       479
    Label 1       0.99      0.99      0.99       563
    Label 2       0.99      0.98      0.98       488
    Label 3       1.00      0.99      0.99       493
    Label 4       0.99      1.00      0.99       535

avg / total       0.99      0.99 