In [1]:
import pandas as pd
import numpy as np
import csv

import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation, MaxPool2D
from keras.layers.core import Dense, Flatten, Dropout
from keras.optimizers import Adam, RMSprop
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.utils.np_utils import to_categorical
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import itertools
import matplotlib.pyplot as plt
%matplotlib inline

root = '/Users/schwalmdaniel/github/kaggle/mnist'
#root = 'd:/dev/python/kaggle/titanic'

train=pd.read_csv(root + "/train.csv")
test=pd.read_csv(root + "/test.csv")



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
X = train.drop(['label'], axis=1)
# create our feature matrix by removing the response variable
print ("learning from {} rows".format(X.shape[0]))
y = train['label']

learning from 42000 rows


In [3]:
# Normalize the data. Each pixel comes with a value between 0-255. CNN works better with values between 0-1.
X /= 255.0
test /= 255.0

In [4]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X = X.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)


In [5]:
# Encode labels to one hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
y = to_categorical(y, num_classes = 10)

In [6]:
model= Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(BatchNormalization())
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.2))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(512, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))
#model.compile(Adam(lr=.0001), loss = "categorical_crossentropy", metrics=["accuracy"])
model.compile(RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0), loss = "categorical_crossentropy", metrics=["accuracy"])

#history = model.fit(X, y, batch_size = 100, epochs = 10, validation_split=0.1,verbose = 2)

datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0.10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.15, # Randomly zoom image 
        shear_range = 0.15, # Shear Intensity (Shear angle in counter-clockwise direction in degrees)
        width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False,  # randomly flip images
)

X_train, X_val, Y_train, Y_val = train_test_split(X, y, test_size = 0.1)

datagen.fit(X_train)

epochs = 5
batch_size=64

history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=batch_size),epochs = epochs, validation_data=(X_val,Y_val),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size)

Epoch 1/5
 - 447s - loss: 0.4848 - acc: 0.8797 - val_loss: 0.1178 - val_acc: 0.9733
Epoch 2/5
 - 489s - loss: 0.1585 - acc: 0.9605 - val_loss: 0.0735 - val_acc: 0.9810
Epoch 3/5
 - 448s - loss: 0.1284 - acc: 0.9710 - val_loss: 0.0559 - val_acc: 0.9874
Epoch 4/5
 - 444s - loss: 0.1108 - acc: 0.9760 - val_loss: 0.0423 - val_acc: 0.9890
Epoch 5/5
 - 496s - loss: 0.1035 - acc: 0.9780 - val_loss: 0.0717 - val_acc: 0.9871


In [7]:
preds = model.predict_classes(test,batch_size=batch_size,verbose=2)

In [8]:
test=pd.read_csv(root + "/test.csv")

predicted = pd.DataFrame()
predicted['ImageId'] = test.index + 1
predicted['Label'] = preds
predicted[['ImageId', 'Label']] = predicted[['ImageId', 'Label']].astype(int)
predicted.to_csv(root + '/submission_keras.csv', index=False,quoting=csv.QUOTE_NONNUMERIC)

predicted.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
