In [1]:
from data import Data
data = Data()

In [2]:
data.data_size()

Train size is (42000, 784).
Test size is (28000, 784)


In [3]:
data.data_info()


Train:
--------------------------------------------------

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 251.2 MB

Test:
--------------------------------------------------

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB


In [71]:
import numpy as np
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
def pd_to_tensor(data_set):
    data_set = np.array(data_set)
    return np.array([obs.reshape(28, 28, 1) for obs in data_set])

def data_preprocess(train, train_target, test):    
    train = pd_to_tensor(train)
    test = pd_to_tensor(test)
    labels = np_utils.to_categorical(np.array(train_target), 10)
    xtrain, xtest, ytrain, ytest = train_test_split(train, labels, test_size = 0.25, random_state = 1)
    return xtrain, xtest, ytrain, ytest, test


In [72]:
xtrain, xtest, ytrain, ytest, test = data_preprocess(data.train, data.train_target, data.test)
print xtrain.shape
print ytest.shape

(31500, 28, 28, 1)
(10500, 10)


In [89]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense
from keras.models import Sequential
from keras import regularizers

model = Sequential()
model.add(Conv2D(filters = 16, 
                 kernel_size = 3,
                 padding = 'same',
                 activation = 'relu',
                 input_shape = (28,28,1)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters = 64, 
                 kernel_size = 2,
                 padding = 'same',
                 activation = 'relu'))
model.add(MaxPooling2D(pool_size=2))
# model.add(Dense(256, activation = 'relu', input_shape = (28, 28, 1)))
model.add(Dense(512, 
                activation = 'relu', 
                kernel_regularizer=regularizers.l2(0.02)))
# model.add(Dropout(0.1))
model.add(GlobalAveragePooling2D())
model.add(Dense(10, activation = 'softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_30 (Conv2D)           (None, 28, 28, 16)        160       
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 14, 14, 64)        4160      
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 7, 7, 64)          0         
_________________________________________________________________
dense_31 (Dense)             (None, 7, 7, 512)         33280     
_________________________________________________________________
global_average_pooling2d_16  (None, 512)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 10)                5130      
Total para

In [90]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [91]:
from keras.callbacks import ModelCheckpoint  

epochs = 40

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.hdf5', 
                               verbose=1, save_best_only=True)

model.fit(xtrain, ytrain, 
          validation_data=(xtest, ytest),
          epochs=epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 31500 samples, validate on 10500 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x115f47ad0>

In [81]:
model.load_weights('saved_models/weights.best.hdf5')

In [82]:
predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test]

In [84]:
data.write_submission(predictions, 'cnn_kernel_k001_a001.csv')