In [1]:
import pandas as pd
import numpy as np

In [2]:
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
train = pd.read_csv("train.csv")
# easy way to shuffle the dataset
train = train.sample(frac=1)

In [4]:
y_train = np.array(train["label"])
x_train = np.array(train.drop(labels = ["label"],axis = 1))
del train

In [6]:
count = np.zeros(10).astype('int64')

for i in y_train:
    count[i] += 1

min_val = np.min(count)
print(count)
print(min_val)

[4132 4684 4177 4351 4072 3795 4137 4401 4063 4188]
3795


In [7]:
count = np.zeros(10).astype('int64')
index = []
for i in range(len(y_train)):
    if count[y_train[i]] < min_val:
        count[y_train[i]] += 1
    else:
        index.append(i)

print(index)
        
x_train = np.delete(x_train, index,0)
y_train = np.delete(y_train, index,0)

[33592, 33597, 33607, 33614, 33617, 33639, 33642, 33646, 33656, 33663, 33665, 33673, 33676, 33701, 33702, 33737, 33745, 33749, 33751, 33752, 33774, 33792, 33820, 33832, 33833, 33851, 33869, 33870, 33871, 33890, 33924, 33929, 33939, 33942, 33943, 33954, 33979, 33992, 33997, 33999, 34002, 34005, 34012, 34020, 34021, 34029, 34039, 34049, 34061, 34071, 34077, 34079, 34085, 34110, 34123, 34132, 34151, 34152, 34177, 34179, 34180, 34185, 34202, 34210, 34224, 34228, 34234, 34247, 34266, 34283, 34297, 34304, 34316, 34317, 34329, 34336, 34339, 34361, 34380, 34389, 34396, 34403, 34407, 34408, 34409, 34430, 34437, 34447, 34471, 34477, 34481, 34484, 34498, 34501, 34506, 34524, 34533, 34551, 34557, 34567, 34584, 34600, 34601, 34613, 34623, 34643, 34687, 34691, 34694, 34702, 34703, 34708, 34715, 34721, 34731, 34734, 34738, 34752, 34776, 34778, 34818, 34824, 34827, 34842, 34860, 34871, 34880, 34881, 34894, 34898, 34906, 34912, 34915, 34936, 34941, 34942, 34953, 34978, 34991, 34993, 35004, 35008, 35020

In [24]:
y_train = to_categorical(y_train, num_classes = 10)

x_train = x_train.astype('float32')/255
x_train = x_train.reshape([-1,28,28,1])

In [25]:
epochs = 50
batch_size = 128

In [26]:
model = Sequential()

model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu', input_shape = (28,28,1)))
model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 512, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 512, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 512, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation = "softmax"))

In [27]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 128)       1280      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 256)       295168    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 14, 256)       590080    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 256)         0         
__________

In [28]:
model.compile(optimizer = Adam() , loss = "categorical_crossentropy", metrics=["accuracy"])

In [29]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

filepath="weights-improvement-{epoch:02d}-{val_acc:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

callbacks_list = [checkpoint, learning_rate_reduction]

In [35]:
history = model.fit(x_train,y_train, batch_size=batch_size,
                              epochs = epochs,
                              verbose = 1, validation_split=0.1,
                              callbacks=callbacks_list)

Train on 34155 samples, validate on 3795 samples
Epoch 1/50
Epoch 00001: val_acc improved from -inf to 0.98524, saving model to weights-improvement-01-0.9852.hdf5
Epoch 2/50
Epoch 00002: val_acc did not improve
Epoch 3/50
Epoch 00003: val_acc improved from 0.98524 to 0.98735, saving model to weights-improvement-03-0.9874.hdf5
Epoch 4/50
Epoch 00004: val_acc did not improve
Epoch 5/50
Epoch 00005: val_acc improved from 0.98735 to 0.98841, saving model to weights-improvement-05-0.9884.hdf5
Epoch 6/50
Epoch 00006: val_acc did not improve
Epoch 7/50
Epoch 00007: val_acc improved from 0.98841 to 0.98946, saving model to weights-improvement-07-0.9895.hdf5
Epoch 8/50
Epoch 00008: val_acc did not improve
Epoch 9/50
Epoch 00009: val_acc did not improve
Epoch 10/50
Epoch 00010: val_acc did not improve
Epoch 11/50
Epoch 00011: val_acc did not improve

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/50
Epoch 00012: val_acc improved from 0.98946 to 0.99209, 

In [36]:
# Convert model into JSON Format
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
    
model.save_weights("finished_{}.hdf5".format(epochs))