In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard

In [2]:
print("[*] data preprocessing")

train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

[*] data preprocessing


In [3]:
train_data = np.array(train_df.iloc[:,1:], dtype = 'float32')
test_data = np.array(test_df.iloc[:,1:], dtype='float32')
print(train_data)
print(test_data)

[[5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [6. 0. 0. ... 0. 0. 0.]
 [8. 0. 0. ... 0. 0. 0.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [4]:
x_train = train_data[:,1:]/255
y_train = train_data[:,0]
x_test= test_data/255
print(x_train)
print("----------")
print(y_train)
print("-----------")
print(x_test)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
----------
[5. 0. 4. ... 5. 6. 8.]
-----------
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [5]:
x_train,x_validate,y_train,y_validate = train_test_split(x_train,y_train,test_size = 0.2,
                                                         random_state = 12345)
print(x_train)
print(x_validate)
print(y_train)
print(y_validate)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[4. 7. 6. ... 1. 8. 4.]
[9. 0. 9. ... 4. 8. 7.]


In [9]:
x_train = x_train.reshape(x_train.shape[0],28,28,1)
x_test = x_test.reshape(x_test.shape[0],28,28,1)
x_validate = x_validate.reshape(x_validate.shape[0],28,28,1)

print(x_train)
print(x_validate)
print(y_train)
print(y_validate)

[[[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]]


 [[[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  ...

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.]
   [0.]
   [0.]
   ...
   [0.]
   [0.]
   [0.]]

  [[0.

In [10]:
print("[*] model generation")
cnn_model = Sequential([
    Conv2D(32, kernel_size=3, activation='relu', padding='same', input_shape = (28,28,1)),
    MaxPooling2D(2),
    Conv2D(64, kernel_size=3, activation='relu', padding='same'),
    MaxPooling2D(2),
    Conv2D(128, kernel_size=3, activation='relu', padding='same'),
    MaxPooling2D(2),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(.4),
    Dense(10, activation='softmax'),
])



[*] model generation


In [11]:
print("[*] model training..")
cnn_model.compile(optimizer='NAdam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

[*] model training..


In [12]:
checkpoint_cb = keras.callbacks.ModelCheckpoint('best-cnn-model.h5')
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
history = cnn_model.fit(x_train,y_train, epochs=20, validation_data=(x_validate, y_validate), 
                    callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


In [13]:
y_pred = cnn_model.predict(x_test)
y_pred = y_pred.argmax(axis=-1)



In [14]:
submission = pd.read_csv('./data/sample_submission.csv', encoding = 'utf-8')
submission['label'] = y_pred
submission.to_csv('mnist_submission.csv', index = False)