# MNIST Digit Recognition

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv("./data/train.csv")
test = pd.read_csv("./data/test.csv")

In [3]:
train_X_raw = train.drop(columns=["label"]).values
train_y_raw = train["label"].values
test_X_raw = test.values

## Preprocessing

In [4]:
from sklearn.preprocessing import OneHotEncoder

In [5]:
def preprocess(data):
    return data.reshape((-1, 28, 28, 1)) / 255.0

In [6]:
train_X = preprocess(train_X_raw)
test_X = preprocess(test_X_raw)

In [7]:
train_X.shape

(42000, 28, 28, 1)

In [8]:
def preprocess_labels(labels):
    oh = OneHotEncoder()
    return oh.fit_transform(labels.reshape(-1,1))

In [9]:
train_y = preprocess_labels(train_y_raw)

In [10]:
train_y.shape

(42000, 10)

## Keras - Convolutional Neural Network

In [11]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, MaxPooling2D

In [13]:
convnet = Sequential(
    [
        Convolution2D(64, kernel_size=3, input_shape=(28,28,1), activation="relu"),
        Convolution2D(32, kernel_size=3, activation="relu"),
        Flatten(),
        Dense(10, activation="softmax")
    ]
)

In [14]:
convnet.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"],
)

In [15]:
convnet.fit(train_X, train_y, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x10b1800f0>

## Prepare "submission.csv"

In [25]:
predictions = convnet.predict(test_X).argmax(axis=-1)
submissions = pd.DataFrame({"ImageId":range(1,len(predictions)+1), "Label":predictions})

In [28]:
submissions.to_csv("./submissions/submission.csv", index=False)

In [29]:
pd.read_csv("./submissions/submission.csv", index_col="ImageId").head()

Unnamed: 0_level_0,Label
ImageId,Unnamed: 1_level_1
1,2
2,0
3,9
4,9
5,3
