In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

from __future__ import absolute_import, division, print_function

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import RMSprop


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import train_test_split

print(tf.__version__)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

In [None]:
train.shape

In [None]:
test.shape

In [None]:
train.info()

In [None]:
test.info()

In [None]:
train.head()

In [None]:
test.head()

In [None]:
Y_train = train["label"]
# Drop 'label' column
X_train = train.drop(labels = ["label"],axis = 1) 

In [None]:
X_train = X_train.values.reshape(-1,28,28)
test = test.values.reshape(-1,28,28)

In [None]:
X_train[0]

We scale these values to a range of 0 to 1 before feeding to the neural network model. For this, we divide the values by 255. It's important that the training set and the testing set are preprocessed in the same way:

In [None]:
X_train = X_train / 255.0
test = test / 255

In [None]:
# Show first digit image
plt.figure()
plt.imshow(X_train[0])
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
# Show first digit label
Y_train[0]

In [None]:
# First 25 digit images and their label plots to get sense of the train data
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap=plt.cm.binary)
    plt.xlabel(Y_train.iloc[i])
plt.show()

In [None]:
#expand 1 more dimention as 1 for colour channel gray
X_train = X_train.reshape(X_train.shape[0], 28, 28,1)
X_train.shape

In [None]:
test = test.reshape(test.shape[0], 28, 28,1)
test.shape

In [None]:
# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=2)

**Build the model**

In [None]:

model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28, 1)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images from a 2d-array (of 28 by 28 pixels), to a 1d-array of 28 * 28 = 784 pixels. Think of this layer as unstacking rows of pixels in the image and lining them up. This layer has no parameters to learn; it only reformats the data.

After the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers. These are densely-connected, or fully-connected, neural layers. The first Dense layer has 128 nodes (or neurons). The second (and last) layer is a 10-node softmax layer梩his returns an array of 10 probability scores that sum to 1. Each node contains a score that indicates the probability that the current image belongs to one of the 10 classes.

**Compile the model**

Before the model is ready for training, it needs a few more settings. These are added during the model's compile step:

Loss function 桾his measures how accurate the model is during training. We want to minimize this function to "steer" the model in the right direction.
Optimizer 桾his is how the model is updated based on the data it sees and its loss function.
Metrics 桿sed to monitor the training and testing steps. The following example uses accuracy, the fraction of the images that are correctly classified.

In [None]:
# For classification problems we use sparse_categorical_crossentropy loss function 
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, epochs=5)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
test_loss, test_acc = model.evaluate(X_val, Y_val)
print('Test accuracy:', test_acc)

**Using CNN**

In [None]:
model = keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation=tf.nn.relu,
                           input_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(32, (3,3), padding='same', activation=tf.nn.relu),
    tf.keras.layers.MaxPooling2D((2, 2), strides=2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
    tf.keras.layers.Conv2D(64, (3,3), padding='same', activation=tf.nn.relu),
    tf.keras.layers.MaxPooling2D((2, 2), strides=2),
    tf.keras.layers.Dropout(0.25),
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.50),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [None]:
model.compile(optimizer = 'adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, epochs=30)

In [None]:
test_loss, test_acc = model.evaluate(X_val, Y_val)
print('Test accuracy:', test_acc)

In [None]:
predictions = model.predict(test)

In [None]:
predictions[0]

In [None]:
np.argmax(predictions[0])

In [None]:
# select the indix with the maximum probability
predictions = np.argmax(predictions,axis = 1)
predictions = pd.Series(predictions,name="Label")

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),predictions],axis = 1)
submission.to_csv("mnist_submission_v6.csv",index=False)

In [None]:
submission.head()