## **Import statements and set random seed**

In [None]:
# import time
import time
t1 = time.time()

In [None]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
np_rand_seed = random.randint(0,100)
tf_rand_seed = random.randint(0,100)
# np_rand_seed = 15
# tf_rand_seed = 95
np.random.seed(np_rand_seed)
print("random seed of numpy: ", np_rand_seed)
print("random seed of tensorflow: ", tf_rand_seed)

# **Loading training data**

In [None]:
train_data = pd.read_csv('../input/train.csv')

In [None]:
train_data.shape

In [None]:
train_data.head(10)

In [None]:
features = train_data.columns.tolist()
output = "label"
features.remove(output)

In [None]:
train_X = train_data[features].as_matrix()
train_X = train_X.reshape((-1, 28,28,1))

In [None]:
train_y = train_data[output].as_matrix()
train_y = train_y.astype(int)
train_y = pd.get_dummies(train_y).as_matrix()

In [None]:
train_X.shape

In [None]:
train_y.shape

# **Creating Convolutional Neural Network**

In [None]:
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()
# sess = tf.InteractiveSession()

In [None]:
# image dimensions
width = 28
height = 28
num_channels = 1
flat = width * height
num_classes = 10

In [None]:
x_image = tf.placeholder(tf.float32, shape=[None,height, width, num_channels])
labels = tf.placeholder(tf.int32, shape=[None, num_classes])
keep_prob = tf.placeholder(tf.float32)

## **Create functions for creating deep learning layers¶**

In [None]:
def create_weights(shape):
    '''a function to create weight tensor'''
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
 
def create_biases(size):
    '''a function to create bias tensor'''
    return tf.Variable(tf.constant(0.05, shape=[size]))

In [None]:
def create_convolutional_layer(input,
                               num_input_channels,
                               conv_filter_size,
                               max_pool_filter_size,
                               num_filters):  
    
    '''a function to create convoutional layer'''
    
    # create filter for the convolutional layer
    weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
    
    # create biases
    biases = create_biases(num_filters)
    
    # create covolutional layer
    layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=[1, 1, 1, 1],
                     padding='SAME')
    
    # add the bias to the convolutional layer
    layer += biases
    
    # relu activation layer fed into layer
    layer = tf.nn.relu(layer)
    
    # max pooling to half the size of the image
    layer = tf.nn.max_pool(value=layer,
                            ksize=[1, max_pool_filter_size, max_pool_filter_size, 1],
                            strides=[1, 2, 2, 1],
                            padding='SAME')
        
    # return the output layer of the convolution
    return layer

In [None]:
def create_flatten_layer(layer):
    
    '''a function for creating flattened layer from convolutional output'''
    
    # extract the shape of the layer
    layer_shape = layer.get_shape()
    # calculate the number features of the flattened layer
    num_features = layer_shape[1:4].num_elements()
    # create the flattened layer
    layer = tf.reshape(layer, [-1, num_features])
    # return the layer
    return layer

In [None]:
def create_fc_layer(input,          
                    num_inputs,    
                    num_outputs,
                    use_relu=True,
                    dropout = False, 
                    keep_prob = 0.2):
    
    '''a function for creating fully connected layer'''
    
    #Let's define trainable weights and biases.
    weights = create_weights(shape=[num_inputs, num_outputs])
    biases = create_biases(num_outputs)
    
    # matrix multiplication between input and weight matrix
    layer = tf.matmul(input, weights) + biases
    
    # add relu activation if wanted
    if use_relu:
        layer = tf.nn.relu(layer)
        
    # if dropout is wanted add dropout
    if dropout:        
        layer = tf.nn.dropout(layer, keep_prob)
    
    # return layer
    return layer

## **Create Layers of Covnet**

In [None]:
conv1_features = 64
conv1_filter_size = 5
max_pool_size1 = 2

conv2_features = 128
conv2_filter_size = 5
max_pool_size2 = 2

fc_layer_size1 = 1024
fc_layer_size2 = 512

In [None]:
layer_conv1 = create_convolutional_layer(input=x_image,
                                         num_input_channels= num_channels,
                                         conv_filter_size = conv1_filter_size,
                                         max_pool_filter_size = max_pool_size1,
                                         num_filters = conv1_features)
layer_conv1

In [None]:
layer_conv2 = create_convolutional_layer(input=layer_conv1,
                                         num_input_channels= conv1_features,
                                         conv_filter_size = conv2_filter_size,
                                         max_pool_filter_size = max_pool_size2,
                                         num_filters = conv2_features)
layer_conv2

In [None]:
layer_flat = create_flatten_layer(layer_conv2)
layer_flat

In [None]:
layer_fc1 = create_fc_layer(input=layer_flat,
                            num_inputs=layer_flat.get_shape()[1:4].num_elements(),
                            num_outputs=fc_layer_size1,
                            use_relu=True,
                            dropout=False)
layer_fc1

In [None]:
layer_fc2 = create_fc_layer(input=layer_fc1,
                            num_inputs=fc_layer_size1,
                            num_outputs=fc_layer_size2,
                            use_relu=True,
                            dropout=True,
                            keep_prob=keep_prob)
layer_fc2

In [None]:
output_layer = create_fc_layer(input=layer_fc2,
                     num_inputs = fc_layer_size2,
                     num_outputs = num_classes,
                     use_relu=False)
output_layer

## **Create prediction & accuracy metric**

In [None]:
y_pred = tf.nn.softmax(output_layer)
y_pred_cls = tf.argmax(y_pred, axis=1, output_type=tf.int32)
y_true_cls = tf.argmax(labels, axis=1, output_type=tf.int32)

In [None]:
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## **Create Optimizers**

In [None]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=output_layer,
                                                    labels=labels)
loss = tf.reduce_mean(cross_entropy)

In [None]:
learning_rate = 1e-4
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# **Train Model**

In [None]:
train_indices = np.random.choice(len(train_y), round(len(train_y)*0.95), replace=False)
validation_indices = np.array(list(set(range(len(train_y))) - set(train_indices)))

image_train = train_X[train_indices]
image_validation = train_X[validation_indices]

labels_train = train_y[train_indices]
labels_validation = train_y[validation_indices]

In [None]:
train_loss = []
valid_loss = []
valid_acc = []

train_batch_size = 250
max_iter = 1400

In [None]:
iteration = 0
best_accuracy = 0
best_iteration = 0
saver = tf.train.Saver(max_to_keep=1)

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())

    # while 59 minutes have not elapsed (to finish before the kernel is killed)
    while (time.time()-t1) < 58*60:
        
        # break if max iteration is reached
        if iteration >= max_iter:
            break

        rand_index_train = np.random.choice(labels_train.shape[0], size=train_batch_size)
        image_rand_train = image_train[rand_index_train]    
        labels_rand_train = labels_train[rand_index_train]

        feed_dict_batch =  {x_image: image_rand_train,
                            labels: labels_rand_train,
                            keep_prob: 0.7}
        
        feed_dict_train =  {x_image: image_rand_train,
                            labels: labels_rand_train,
                            keep_prob: 1.0}

        feed_dict_validation =  {x_image: image_validation,
                                 labels: labels_validation,
                                 keep_prob:1.0}

        sess.run(train_step, feed_dict=feed_dict_batch)

        temp_train_loss = loss.eval(session=sess, feed_dict=feed_dict_train)
        train_loss.append(temp_train_loss)

        temp_validation_loss = loss.eval(session=sess, feed_dict=feed_dict_validation)
        valid_loss.append(temp_validation_loss)

        temp_validation_accuracy = accuracy.eval(session=sess, feed_dict=feed_dict_validation)
        valid_acc.append(temp_validation_accuracy)   
        
        # if valid accuracy is better than best recorded so far then update the best valid accuracy
        if temp_validation_accuracy > best_accuracy:
            best_accuracy = temp_validation_accuracy
            best_iteration = iteration
            saver.save(sess, './my-model', global_step = best_iteration)
            
        print("iterations:",iteration,
              ", train_loss:",temp_train_loss,
              ", valid_loss:", temp_validation_loss,
              ", valid_accuracy:", temp_validation_accuracy)
        
        iteration += 1

In [None]:
print("Best accuracy is",best_accuracy,"attained at iteration", best_iteration)

In [None]:
del(train_data,train_X,train_y,image_train, image_validation, labels_train, labels_validation)

# **Create submission file**

In [None]:
test_data = pd.read_csv('../input/test.csv')
test_X = test_data[features].as_matrix()
test_X = test_X.reshape((-1,28,28,1))
iD = test_data.index.tolist()
print(test_X.shape)
del(test_data)

In [None]:
with tf.Session() as sess:    
    
    # restore the best model
    model_path = "./"+"my-model-"+str(best_iteration)
    saver.restore(sess, model_path)
    
    # break the test set into k folds other wise kernel will be out of memory
    n = test_X.shape[0]
    k = 10
    step = n//k
    
    # array to store the prediction
    preds = np.array([])

    # iterate through each fold
    for i in range(k):

        # start and end indices of the fold
        start = (step*i)
        end = (step*(i+1)) 
    
        # feed dictionary for the fold
        feed_dict_test =  {x_image: test_X[start:end],
                           keep_prob: 1.0}

        # evaluate predictions of the fold
        fold_preds = y_pred_cls.eval(session=sess, feed_dict = feed_dict_test)
        
        # append the predictions of the fold to the designated array
        preds = np.append(preds, fold_preds)
        
        print("Finished computing prediction for fold", i)
        
    preds = preds.astype(np.int)
    
    # save the submission csv file
    submission_path = "./submission.csv"
    submission = pd.DataFrame({"ImageId": iD, "Label": preds})
    submission.to_csv(submission_path, header = True, index=False)

# **Visualize Performance**

In [None]:
plt.figure(figsize=(16, 8), dpi= 80, facecolor='w', edgecolor='k')
iterations = list(range(1,iteration+1))
plt.plot(iterations, train_loss, label = "train loss")
plt.plot(iterations, valid_loss, label = "valid loss")
plt.title("Loss")
plt.xlabel("iter")
plt.ylabel("loss")
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(16, 8), dpi= 80, facecolor='w', edgecolor='k')
plt.plot(iterations, valid_acc, label = "train loss")
plt.title("Accuracy")
plt.xlabel("iter")
plt.ylabel("accuracy")
plt.grid()
plt.show()