In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import pickle
import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
#loading the csv data with pandas dataframe
#then convert it to pickle file
#which is faster to get input

OriginalData = pd.read_csv('../input/train.csv')

label = np.array(OriginalData.iloc[:,0].values)
OriginalData = np.array(OriginalData.iloc[:,1:785].values)

OriginalTestData = pd.read_csv('../input/test.csv')
OriginalTestData = np.array(OriginalTestData.iloc[:,0:784].values)


f = open("train.pickle","wb")
f.write(pickle.dumps(OriginalData))
f.close

f = open("trainLabel.pickle","wb")
f.write(pickle.dumps(label))
f.close

f = open("test.pickle","wb")
f.write(pickle.dumps(OriginalTestData))
f.close()

In [None]:
#load the training data of Kaggle from pickle file

train_data = pickle.loads(open('train.pickle',"rb").read())
train_label = pickle.loads(open('trainLabel.pickle',"rb").read())

In [None]:
#modules need to import

import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Dropout, Activation,Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator

In [None]:
#function to display the digit in image form with its label
def digit_plot(data,label):
    
    fig = plt.figure()
    plt.imshow(data[0], cmap = 'binary')
    plt.title("class {}".format(label))
    plt.xticks(())
    plt.yticks(())
    
    fig

In [None]:
#setting the model training parameters
#and preprocess the training data

train_x,test_x,train_y,test_y = train_test_split(train_data,train_label,test_size = 0.3,random_state = 1)
nb_classes = 10
input_shape = (28,28,1)
batch_size = 1024
epochs = 20

train_x = train_x.astype('float64') / 255
test_x = test_x.astype('float64') / 255

train_x = train_x.reshape(-1,28,28,1)
test_x = test_x.reshape(-1,28,28,1)

train_y_1 = np_utils.to_categorical(train_y,nb_classes)
test_y_1 = np_utils.to_categorical(test_y,nb_classes)

In [None]:
#CNN model building function
#Using Conv2D, MaxPooling, Dropout, and Flattern
#before output, the activation method -> relu
#until output, activation method->softmax
#Using RMSporp optimizer 

def model_build():
    model = Sequential()
    model.add(Conv2D(32,kernel_size=(3,3),padding = 'same',activation='relu',
                input_shape = input_shape))
    model.add(Conv2D(64,kernel_size=(3,3),padding = 'same',activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.3))
    model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(128,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes,activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
             optimizer=keras.optimizers.RMSprop(),
             metrics=['accuracy'])
    
    return model

In [None]:
#Data Augmentation function
#which is used to expand the dataset by add data variability(rotation, zoom ...)
def DataAugmentation(data):
    
    ExpandData = ImageDataGenerator(rotation_range = 10,
                                   width_shift_range = 0.1,
                                   height_shift_range = 0.1,
                                   zoom_range = 0.1,
                                   )
    ExpandData.fit(data)
    
    return ExpandData

In [None]:
#generate expandign data
#build model, train model and save model
DataExpand = DataAugmentation(train_x)
model = model_build()
history = model.fit_generator(DataExpand.flow(train_x,train_y_1,batch_size=batch_size),
                              epochs=epochs,verbose=1,validation_data=(test_x,test_y_1),
                             steps_per_epoch=train_x.shape[0]//batch_size)
model.save('digital_recognizer.h5')

In [None]:
#Using test data to validate model
score = model.evaluate(test_x,test_y_1,verbose=0)
print("Test loss:{}",format(score[0]))
print("Test accuracy:{}",format(score[1]))

In [None]:
#plot the training and validation result

fig = plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.legend(['training','validation'],loc='lower right')
plt.plot()

fig = plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['training','validation'],loc='lower right')
plt.plot()

In [None]:
#training the model by all the kaggle train data
#After you finish tuning the model paramter
final_train_data = train_data.reshape(-1,28,28,1)
final_train_data = final_train_data.astype('float64')/255
final_train_label = np_utils.to_categorical(train_label,nb_classes)

DataExpand = DataAugmentation(final_train_data)
model = model_build()
model.fit_generator(DataExpand.flow(final_train_data,final_train_label,batch_size=batch_size),
                   epochs=30,verbose=1,steps_per_epoch=final_train_data.shape[0]//batch_size)

model.save('final_model.h5')


score = model.evaluate(test_x,test_y_1,verbose=2)
print('validation loss:{}',format(score[0]))
print('validation accuracy:{}',format(score[1]))

In [None]:
#import the module for model loading
from keras.models import load_model 

In [None]:
#Loading the model which was already trained
#Loading the test data of kaggle
#Output the prediction result to submission.csv
current_model = load_model('final_model.h5')
test_data = pickle.loads(open("test.pickle","rb").read())

test_data = test_data.reshape(-1,28,28,1)

prediction = current_model.predict(test_data,verbose=2)
prediction = np.round(prediction)
prediction = prediction.argmax(1)

#Checking some results from prediction
#with digit_plot function
for index,currentData in enumerate(test_data[0:20]):
    currentData = currentData.reshape(1,28,28)
    digit_plot(currentData,prediction[index])



ids = np.arange(0,prediction.shape[0],1)
output = pd.DataFrame({'ImageId':ids,"Label":prediction})
output.to_csv("submission.csv",index=False)

