# MNIST
Learn computer vision fundamentals with the famous MNIST data

In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series, crosstab
from datetime import datetime, timedelta 
from collections import Counter
import tensorflow as tf

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

<br>
### Load data

In [None]:
train = pd.read_csv('data/train.csv')
test= pd.read_csv("data/test.csv")

In [None]:
print('training-set shape: ({0[0]},{0[1]})'.format(train.shape))
train.head(3)

In [None]:
print('testing-set shape: ({0[0]},{0[1]})'.format(test.shape))
test.head(3)

In [None]:
labels = train['label']
train = train.drop("label",axis=1)

labels_count = np.unique(labels.values)
labels_count

In [None]:
def display(img):
    
    image_size = img.shape[0]
    image_width = image_height = np.ceil(np.sqrt(image_size)).astype(np.uint8)
    
    # (784) => (28,28)
    one_image = img.reshape(image_width,image_height)
    
    plt.axis('off')
    plt.imshow(one_image, cmap='Greys_r')

for i in range(10):
    plt.subplot(1,10,i+1)
    display(train.values[i])
plt.show()
print('label: %s' % (labels.values[0:10],))

In [None]:
# convert class labels from scalars to one-hot vectors
# 0 => [1 0 0 0 0 0 0 0 0 0]
# 1 => [0 1 0 0 0 0 0 0 0 0]
# ...
# 9 => [0 0 0 0 0 0 0 0 0 1]
def dense_to_one_hot(target, num_classes):
    one_hot_targets = np.eye(num_classes)[target]
    return one_hot_targets

labels = dense_to_one_hot(labels.values, len(labels_count))

<br>
### Logistic regression as baseline

In [None]:
# Parameters
learning_rate = 0.001
training_epochs = 30
batch_size = 64
display_step = 1

In [None]:
# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

# Set model weights
W = tf.Variable(tf.zeros([784, 10]))    # tf.random_normal()
#W = tf.random_normal([784, 10])/1000    # tf.random_normal()
b = tf.Variable(tf.zeros([10]))

# Construct model
pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.losses.sigmoid_cross_entropy(y, pred)
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [None]:
def get_batch(data, batch_size=64):
    l = len(data)
    arr = np.arange(l)
    #np.random.shuffle(arr)
    for ndx in range(0, l, batch_size):
        select = arr[ndx:min(ndx + batch_size, l)]
        yield data[select]

In [None]:
# Start training
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = counter = 0.
        
        train_X_batch = get_batch(train.values, batch_size)
        train_Y_batch = get_batch(labels, batch_size)
        for batch_xs, batch_ys in zip(train_X_batch, train_Y_batch):

            # Fit training using batch data
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs, y: batch_ys})

            # Compute average loss
            avg_cost += c
            counter += 1
        avg_cost /= counter
        
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("Accuracy:", accuracy.eval({x: train.values, y: labels}))
    
    # Prediction
    classification = sess.run(pred, feed_dict={x: test.values})
    print(classification)


In [None]:
result = np.argmax(classification, 1)
df = DataFrame({'Label': result}, index=range(1, len(result)+1))
df.index.name='ImageId'
df.to_csv('result.csv')