In [1]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import random
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.model_selection import KFold
import time

# Define paramaters for the model
learning_rate = 0.5
n_epochs = 10
batch_size = 50

# regulation_rate = 1e-4

def fullLayer(input_data, output_size, act, std = 0.1):    
    ##defining the full linear Layer here
    w = tf.Variable(tf.random_normal([input_data.get_shape().as_list()[1], output_size], stddev = std))
    b = tf.Variable(tf.zeros([output_size]))
    return act(tf.matmul(input_data, w) + b)


#read data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

for hidden_size in [5, 10, 20, 50]:
    
    tf.reset_default_graph()
    #define placeholder
    # all image are 28 * 28 so x has 784 dimension
    X = tf.placeholder(tf.float32, [batch_size, 784], name='X_placeholder')
    Y = tf.placeholder(tf.float32, [batch_size, 10], name='Y_placeholder')
    
    ## the layers
    h = fullLayer(X, hidden_size, tf.nn.sigmoid)
    logits = fullLayer(h, 10, tf.nn.sigmoid)

    ## defining loss function
    ## use cross entropy of softmax of logits as the loss function
    entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
    ## computes the mean over all the examples in the batch
    loss = tf.reduce_mean(entropy) 
    # + regulation_rate*tf.nn.l2_loss(w)  

    ##defining optimizer
    ## using gradient descent with learning rate of 0.5 to minimize loss
    gradient = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer = gradient.minimize(loss)

    ##the prediction we made
    preds = tf.nn.softmax(logits)
    ##check how many of them are correct arg maxx is used because Y is one hat
    correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
    
    #just some config for not getting whole server
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(gpu_options=gpu_options)
    
    best_acc = 0
    best_hidden = 0
    with tf.Session(config = config) as sess:
        # to visualize using TensorBoard
#         writer = tf.summary.FileWriter('./graphs/mnist/c', sess.graph)
        ##starting time
        start_time = time.time()
        ##initialize the variables
        sess.run(tf.global_variables_initializer())
        ## split to 5 pieces for cross validation
        kf = KFold(n_splits=5, shuffle = True)
        total_correct_preds = 0
        for train_index, test_index in kf.split(mnist.train.images):
            ## each time set validation to one of 5 pieces
            X_train, X_validation = mnist.train.images[train_index], mnist.train.images[test_index]
            Y_train, Y_validation = mnist.train.labels[train_index], mnist.train.labels[test_index]
            
            ##number of train batches
            n_batches = int(len(X_train) / batch_size) 
            for i in range(n_epochs):  # train the model n_epochs times
                total_loss = 0
                total_acc = 0
                for j in range(n_batches):
                    ##train batches
                    X_batch =  X_train[j * batch_size:(j+1) * batch_size]
                    Y_batch =  Y_train[j * batch_size:(j+1) * batch_size]
                    
                    _, loss_batch, acc_batch = sess.run([optimizer, loss, accuracy], feed_dict={X: X_batch, Y: Y_batch})
                
                    total_loss += loss_batch
                    total_acc += acc_batch
                if i % 5 == 4:
                    print('with hidden_size {}, epoch {}, Average loss : {}, Accuracy : {:.6f}'.format(hidden_size, i, total_loss / n_batches, total_acc / mnist.train.num_examples))
                
            
            #### test the model
            
            ##number of validation batches
            n_batches = int(len(X_validation) / batch_size)
            for i in range(n_batches):
                ##validation batches
                X_batch, Y_batch = X_validation[i * batch_size:(i+1) * batch_size], Y_validation[i * batch_size:(i+1) * batch_size]
                accuracy_batch = sess.run([accuracy], feed_dict={X: X_batch, Y: Y_batch})
                total_correct_preds += accuracy_batch[0]
            
            
        print('Total time: {0} seconds'.format(time.time() - start_time))
        print('Optimization Finished!')  # should be around 0.35 after 25 epochs

        print('Accuracy batch_size {}, {}'.format(hidden_size, total_correct_preds / mnist.train.num_examples * 5 / 5))
        
        if best_acc < total_correct_preds / mnist.train.num_examples / 5 / 5 :
            best_acc = total_correct_preds / mnist.train.num_examples / 5 / 5
            best_hidden = hidden_size
        
        
#         writer.close()
    sess.close()



Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
with hidden_size 5, epoch 4, Average loss : 1.7124333408745853, Accuracy : 0.523582


KeyboardInterrupt: 

In [2]:
# test the model
tf.reset_default_graph()
#define placeholder
# all image are 28 * 28 so x has 784 dimension
X = tf.placeholder(tf.float32, [batch_size, 784], name='X_placeholder')
Y = tf.placeholder(tf.float32, [batch_size, 10], name='Y_placeholder')

## the layers
h = fullLayer(X, hidden_size, tf.nn.sigmoid)
logits = fullLayer(h, 10, tf.nn.sigmoid)

## defining loss function
## use cross entropy of softmax of logits as the loss function
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
## computes the mean over all the examples in the batch
loss = tf.reduce_mean(entropy) 
# + regulation_rate*tf.nn.l2_loss(w)  

##defining optimizer
## using gradient descent with learning rate of 0.5 to minimize loss
gradient = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = gradient.minimize(loss)

##the prediction we made
preds = tf.nn.softmax(logits)
##check how many of them are correct arg maxx is used because Y is one hat
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

#just some config for not getting whole server
gpu_options = tf.GPUOptions(allow_growth=True)
config = tf.ConfigProto(gpu_options=gpu_options)

with tf.Session() as sess:
    ## training model on the best number of hidden size
    # to visualize using TensorBoard
    start_time = time.time()
    ## run the initializer
    sess.run(tf.global_variables_initializer())
    ##number of train batches
    n_batches = int(mnist.train.num_examples / batch_size)
    for i in range(batch_size):  # train the model n_epochs times
        total_loss = 0
        total_acc = 0
        for _ in range(n_batches):
            ##train batches
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            _, loss_batch, acc_batch = sess.run([optimizer, loss, accuracy], feed_dict={X: X_batch, Y: Y_batch})
            total_loss += loss_batch
            total_acc += acc_batch
        if i % 5 == 0:
            print('with hidden_size {}, epoch {}, Average loss : {}, Accuracy : {:.6f}'.format(best_hidden, i, total_loss / n_batches, total_acc / mnist.train.num_examples))

    print('Total time: {0} seconds'.format(time.time() - start_time))
    print('Optimization Finished!')  # should be around 0.35 after 25 epochs

    ## test the model on best number of hidden size
    ## number of test batches 
    n_batches = int(mnist.test.num_examples / batch_size)
    total_correct_preds = 0
    for i in range(n_batches):
        ## test batches
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        accuracy_batch = sess.run([accuracy], feed_dict={X: X_batch, Y: Y_batch})
        total_correct_preds += accuracy_batch[0]

    print('Validation Accuracy hidden_size {}, {}'.format(best_hidden, total_correct_preds / mnist.test.num_examples))
sess.close()

with hidden_size 50, epoch 0, Average loss : 1.786351341117512, Accuracy : 0.787164
with hidden_size 50, epoch 5, Average loss : 1.546934331330386, Accuracy : 0.921582
with hidden_size 50, epoch 10, Average loss : 1.5281841286745939, Accuracy : 0.935182
with hidden_size 50, epoch 15, Average loss : 1.5182938295060937, Accuracy : 0.944400
with hidden_size 50, epoch 20, Average loss : 1.5118392433903434, Accuracy : 0.950727
with hidden_size 50, epoch 25, Average loss : 1.5071476243842732, Accuracy : 0.954873
with hidden_size 50, epoch 30, Average loss : 1.5034445460276171, Accuracy : 0.958418
with hidden_size 50, epoch 35, Average loss : 1.500404855554754, Accuracy : 0.961364
with hidden_size 50, epoch 40, Average loss : 1.4978467944535343, Accuracy : 0.963582
with hidden_size 50, epoch 45, Average loss : 1.495626823793758, Accuracy : 0.965745
Total time: 166.80193495750427 seconds
Optimization Finished!
Validation Accuracy hidden_size 50, 0.96
