# Kaggle Digit Recognition Problem
## MLP

## Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout, Activation
from keras.utils import np_utils

seed = 7
np.random.seed(seed)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Load in train and test data

In [2]:
train_data = pd.read_csv("train.csv", header=0)
test_data = pd.read_csv("test.csv", header=0)
print(train_data.shape)
print(test_data.shape)

(42000, 785)
(28000, 784)


## Split data up into train and test sets

In [3]:
X_train = train_data.iloc[:,1:].values.astype('float32')
Y_train = train_data.iloc[:,0].values.astype('int32')
X_test = test_data.values.astype('float32')
print(X_train.shape)
print(Y_train)
print(X_test)

(42000, 784)
[1 0 1 ... 7 6 9]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


## normalizing pixel values to 0 and 1

In [4]:
X_train = X_train / 255
X_test = X_test / 255

## One hot encode target variable (outputs)

In [5]:
Y_train = np_utils.to_categorical(Y_train)
print(Y_train)

[[0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


In [6]:
input_dim = X_train.shape[1]
num_classes = Y_train.shape[1]

## Define baseline MLP model

In [7]:
def baseline_model():
    model = Sequential()
    model.add(Dense(784, input_dim = input_dim, kernel_initializer = 'normal', activation = 'relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    #compile model:
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

## Fit and evaluate the model using the training data

In [8]:
model = baseline_model()
model.fit(X_train, Y_train, epochs=10, batch_size=200, verbose=2)

Epoch 1/10
 - 3s - loss: 0.3218 - acc: 0.9082
Epoch 2/10
 - 3s - loss: 0.1307 - acc: 0.9630
Epoch 3/10
 - 3s - loss: 0.0861 - acc: 0.9754
Epoch 4/10
 - 3s - loss: 0.0602 - acc: 0.9830
Epoch 5/10
 - 3s - loss: 0.0441 - acc: 0.9875
Epoch 6/10
 - 3s - loss: 0.0331 - acc: 0.9907
Epoch 7/10
 - 3s - loss: 0.0242 - acc: 0.9942
Epoch 8/10
 - 3s - loss: 0.0176 - acc: 0.9960
Epoch 9/10
 - 3s - loss: 0.0130 - acc: 0.9977
Epoch 10/10
 - 3s - loss: 0.0096 - acc: 0.9985


<keras.callbacks.History at 0x22484ddc1d0>

In [12]:
scores = model.evaluate(X_train, Y_train, verbose=0)
#print("Baseline Error: %.2f%%" % (100-scores[1]*100))
print('Accuracy:', scores[1])

Accuracy 0.9993571428571428


In [10]:
preds = model.predict_classes(X_test, verbose=0)
print(preds.shape)
print(preds)
my_csv = pd.DataFrame(preds)
my_csv.to_csv('first-submission.csv')

(28000,)
[2 0 9 ... 3 9 2]
