In [1]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential

# "Sequential" models let usv define a stack of neural network layers
from keras.models import Sequential

# import the core layers:
from keras.layers import Dense, Dropout, Activation, Flatten

import numpy as np
# import some utilities to transform/preprocess our data:

from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
import pandas as pd
 
train_df = pd.read_csv('train.csv')
train_df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
test_df = pd.read_csv('test.csv')
test_df.shape

(28000, 784)

In [29]:
feature_cols = test_df.columns.values
test = test_df[feature_cols]
test.shape

(28000, 784)

# data processing

In [4]:
feature_cols = train_df.columns.values

# use the above list to select the features from the original DataFrame
X = train_df[feature_cols]  
X = X.drop(['label'], axis=1)

# select target vector from the DataFrame
y = train_df['label']

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)

In [6]:
# simply scale the features to the range of [0,1]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [7]:
# OneHotEncoding for the output label:
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

In [8]:
# Label after OneHotEncoding:
print (y_train.shape)
print (y_train[:10,:])

(31500, 10)
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]


#  Keras

In [21]:
# Declare Sequential model to build our network:
model = Sequential()

In [22]:
input_size = 784
hidden_neurons = 100
out_size = 10

In [23]:
## Designing the NN Structure:

# -----------------------------------------
# first layer: input layer
# Input layer does not do any processing, so no need to define the input layer in this problem.

# -----------------------------------------
# second layer: hidden layer:
model.add(Dense(hidden_neurons, input_dim = input_size))  # Nuerons
model.add(Activation('sigmoid')) # Activation

# -----------------------------------------
# third layer: output layer:
model.add(Dense(out_size, input_dim = hidden_neurons))  # Nuerons
model.add(Activation('softmax')) # Activation

model.compile(loss='categorical_crossentropy',
              metrics=['accuracy'],
              optimizer='adam')

# more info about loss functions: https://keras.io/losses
# more infor about Optimizers: https://keras.io/optimizers

In [24]:
from keras.callbacks import ReduceLROnPlateau
# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [25]:
# fitted_model = model.fit(X_train, y_train, batch_size=32, epochs=15, verbose=1)
fitted_model = model.fit(X_train, y_train, validation_split=0.33,
                         callbacks=[learning_rate_reduction], batch_size=50, epochs=100, verbose=1)

Train on 21104 samples, validate on 10396 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100

Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 30/100
Epoch 31/100
Epoch 32/100

Epoch 00032: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 33/100
Epoch 34/100
Epoch 35/100

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100

Epoch 00040: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 41/100
Epoch 42/100
Epoch 43/100

Epoch 00043: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-

In [26]:
# Prediction:
y_pridict = model.predict(X_test, verbose=1)
print (y_pridict.shape)

(10500, 10)


In [27]:
# Evaluation:
score = model.evaluate(X_test, y_test, verbose=1)
print('The accuracy is: ', score[1])

The accuracy is:  0.9647619047619047


# Output csv file

In [32]:
# Prediction:
predicted_classes = model.predict_classes(test)
print (predicted_classes.shape)

(28000,)


In [34]:
predicted_classes

array([2, 0, 9, ..., 3, 9, 2], dtype=int64)

In [35]:
submissions = pd.DataFrame({"ImageId": list(range(1, len(predicted_classes)+1)),
                           "Label": predicted_classes})
submissions.to_csv("mnistSubmission.csv", index = False, header = True)