In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras 
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Conv2D, AveragePooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator               # used for data augmentation

In [None]:
train = pd.read_csv("../input/digit-recognizer/train.csv")
test = pd.read_csv("../input/digit-recognizer/test.csv")

In [None]:
print(train.shape)
print(test.shape)

In [None]:
# split into x and y
y_train = train['label']
x_train = train.drop(labels = ['label'],axis = 1)
#clear up memory
del train

y_train

In [None]:
# scale values to between 0 and 1 for faster learning
x_train = x_train/255

# you want number of imput channels to be last index in this version of keras
image_size = int(np.sqrt(x_train.shape[1]))

ip_shape = (image_size, image_size, 1)
x_train = x_train.values.reshape(x_train.shape[0], image_size, image_size, 1)

# convert y to one hot encodings
y_train = keras.utils.np_utils.to_categorical(y_train.values, num_classes=10)

In [None]:
y_train

In [None]:
print('x_train.shape = ', x_train.shape)
print('y_train.shape = ', y_train.shape)

In [None]:
# process the test model similarly
test = test/255
test = test.values.reshape(test.shape[0], image_size, image_size, 1)

print(test.shape)

In [None]:
x_train, x_dev, y_train, y_dev = train_test_split(x_train, y_train, test_size=10000, random_state = 12)

In [None]:
print('x_train.shape = ', x_train.shape)
print('y_train.shape = ', y_train.shape)
print('x_dev.shape   = ', x_dev.shape)
print('y_dev.shape   = ', y_dev.shape)

In [None]:
model = keras.Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=ip_shape ))
model.add(AveragePooling2D())
model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(AveragePooling2D())
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=10, activation = 'softmax'))
# even though the original paper did not use relu we'll use it as it is better.
# The reason why relu wasn't used on the og paper was because it was not famous at the time of writing the paper

In [None]:
model.summary()

In [None]:
model.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = keras.optimizers.Adam(),
             metrics = ['accuracy'])

In [None]:
model.fit(x_train, y_train, batch_size = 128, epochs = 15)

In [None]:
dev_loss, dev_metric = model.evaluate(x_dev, y_dev)
print('Accuracy = ', dev_metric)

In [None]:
results = model.predict(test)
results = np.argmax(results,axis = 1)
results

In [None]:
results_df = pd.DataFrame()
results_df['ImageId'] = np.arange(len(results)) + 1
results_df['Label'] = pd.Series(results)
results_df

In [None]:
results_df.to_csv('submission.csv', index = False)