In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as graph
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.losses import categorical_crossentropy
from keras.callbacks import TensorBoard
from IPython.display import display

%matplotlib inline

def plot_number(xi, yi=''):
    graph.figure(figsize=(4, 4))
    graph.title('{}'.format(yi))
    graph.imshow(xi.reshape(28, 28), cmap='Greys')
    graph.grid(False)
    graph.show()

In [None]:
data = pd.read_csv('data/kaggle_mnist/train.csv')
print(data.info())

y = data.pop('label').values
x = data.values

print(y.shape, x.shape)
print(y[:5])

In [None]:
pos = np.random.randint(0, len(y))
plot_number(x[pos, :], y[pos])

# Train Model

In [None]:
# Encode
y_onehot = OneHotEncoder(sparse=False).fit_transform(y.reshape(-1, 1))
x_minmax = x / 255

# Train Test Split
x_train, x_val, y_train, y_val = train_test_split(x_minmax, y_onehot, test_size=0.3)
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)

plot_number(x_train[0, :], y_train[0, :])

In [None]:
# Initialise Type
model = Sequential()

# Tensorboard Tracker
tensor_board = TensorBoard(log_dir='./data/kaggle_mnist/', histogram_freq=1, write_graph=False)

# Layers
model.add(Dense(96, input_dim=x.shape[1], activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(96, activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(96, activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(y_onehot.shape[1], activation='softmax'))

# Compile
model.summary()
model.compile(
    loss=categorical_crossentropy,
    optimizer=Adam(lr=1e-4),
    metrics=['accuracy']
)

# Train Model
model.fit(
    x_train, y_train,
    epochs=1000, validation_split=0.25, batch_size=256,
    verbose=2, callbacks=[tensor_board]
)

In [None]:
test_score = model.evaluate(x_val, y_val, verbose=1)

print()
for metric, number in zip(model.metrics_names, test_score):
    print('{}: {}'.format(metric, number))

# Test Set

Let's hope...

In [None]:
answer_df = pd.DataFrame(columns['ImageId', 'Label'])
print(answer_df.info())

test_set = pd.read_csv('data/kaggle_mnist/test.csv')
print(test_set.info())
print(test_set.columns)
print(test_set.index)
print(test_set.shape)