# MNIST 

## Load up the Tensorflow-gpu version of Keras

In [3]:
from __future__ import print_function
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

Using TensorFlow backend.


    batch_size is how many iterations will pass before the weights are adjusted
    num_classes would be the 10 classes of digits
    epochs is how many passes of the data (forward and backward) that we want to make
    num_channel is the # of color layers

In [4]:
batch_size = 128
num_classes = 10
epochs = 50
num_channels = 1

# input image dimensions
img_rows, img_cols = 28, 28


def convert_images(raw):
    global img_rows
    global img_cols
    global num_channels
    images = raw.reshape([-1, num_channels, img_rows, img_cols])
    # Reorder the indices of the array.
    images = images.transpose([0, 2, 3, 1])
    return images

## Load up the csv data and reshape it

### Also normalize the pixel data by diving it by 255.  This will make sure the pixel values are between 0 and 1.


In [5]:
train = pd.read_csv('train.csv')
# the first character in the train set is the label, break it off
labels = train.iloc[:, 0]
features = train.iloc[:, 1:785]

test = pd.read_csv('test.csv')
test /= 255
kaggleTest = test.values.reshape(test.shape[0], 28, 28, 1)

x_train, x_test, y_train, y_test = train_test_split(
    features.values, labels.values, test_size=0.2, random_state=1212)

x_train = convert_images(x_train)
x_test = convert_images(x_test)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (33600, 28, 28, 1)
33600 train samples
8400 test samples


# One Hot Encode the Labels

keras.losses.categorical_crossentrop expects this to be done

In [6]:
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

    Sequential means the layers are sequential executed
    Dense will create a layer of x outputs.  I optionally specify the activation function.  Default activation is linear.
    Dropout will raondomly drop some input data to help prevent overfitting.
    Flatten will take a multidimensional array and make it one dimensional.
    MaxPooling2D, pool_size will downscale the input for a factor of (x,y).

In [7]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(img_rows, img_cols, num_channels)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


## Compile the model

Using Adadelta. From Keras:

    Adadelta is a more robust extension of Adagrad that adapts learning rates based on a moving window of gradient updates, instead of accumulating all past gradients. This way, Adadelta continues learning even when many updates have been done. Compared to Adagrad, in the original version of Adadelta you don't have to set an initial learning rate. In this version, initial learning rate and decay factor can be set, as in most other Keras optimizers.

In [8]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])

## GPU Options

I need this below to help prevent my GPU from running out of memory.  Otherwise, Python might try to grab all the memory it could up front, and then cause itself not to have enough when it was time to make the predictions

In [9]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


kagglePred = model.predict(kaggleTest)
results = np.argmax(kagglePred, axis=1)
results = pd.Series(results, name="Label")

submission = pd.concat([pd.Series(range(1, 28001), name="ImageId"), results], axis=1)

submission.to_csv("submission.csv", index=False)

Train on 33600 samples, validate on 8400 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 0.05431537332541801
Test accuracy: 0.9905952
