In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization, Activation
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
# load kaggle dataset
train = pd.read_csv("/digit-recognizer/train.csv")
test = pd.read_csv("/digit-recognizer/test.csv")

X = train.drop("label", axis=1)
y = train['label']

# preprecessing kaggle dataset
X = X / 255.0  #normalize
X = X.values.reshape(-1, 28, 28, 1) # reshape (28, 28)
y = to_categorical(y, num_classes=10) # 0~9 one hot encoding
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


# load from external dataset (tensorflow)
(X_mnist_train, y_mnist_train), (X_mnist_val, y_mnist_val) = mnist.load_data()

# normalization
X_mnist_train = X_mnist_train / 255.0
X_mnist_train = X_mnist_train.reshape(-1, 28, 28, 1)
y_mnist_train = to_categorical(y_mnist_train, num_classes=10)

X_mnist_val = X_mnist_val / 255.0
X_mnist_val = X_mnist_val.reshape(-1, 28, 28, 1)
y_mnist_val = to_categorical(y_mnist_val, num_classes=10)

# concatenate kaggle dataset and tensorflow dataset
X_train_combined = np.concatenate((X_train, X_mnist_train), axis=0)
y_train_combined = np.concatenate((y_train, y_mnist_train), axis=0)

X_val_combined = np.concatenate((X_val, X_mnist_val), axis=0)
y_val_combined = np.concatenate((y_val, y_mnist_val), axis=0)

In [None]:
model = Sequential([
    
    # Conv2D Group 1
    Conv2D(32, kernel_size=5, strides=1, activation='relu', input_shape=(28,28,1), kernel_regularizer=l2(0.0005)),
    Conv2D(32, kernel_size=5, strides=1, use_bias=False),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size=2, strides=2),
    Dropout(0.25),

    # Conv2D Group 2
    Conv2D(64, kernel_size=3, strides=1, activation='relu', kernel_regularizer=l2(0.0005)),
    Conv2D(64, kernel_size=3, strides=1, use_bias=False),
    BatchNormalization(),
    Activation('relu'),
    MaxPooling2D(pool_size=2, strides=2),
    Dropout(0.25),
    
    # Flatten make 1D 
    Flatten(),
    
    Dense(256, use_bias=False),
    BatchNormalization(),
    Activation('relu'),

    Dense(128, use_bias=False),
    BatchNormalization(),
    Activation('relu'),
    
    Dense(84, use_bias=False),
    BatchNormalization(),
    Activation('relu'),
    Dropout(0.25),
    
    Dense(10, activation='softmax')
])

# if you want to approach more than accuracy 99.8, you have change to learning rate 1e-6
model.compile(optimizer=Adam(learning_rate=1e-6), loss='categorical_crossentropy', metrics=['accuracy'])

# ModelCheckpoint Callback
checkpoint_path = "best_m.h5"
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose=1, save_weights_only=True, save_best_only=True)

# EarlyStopping Callback
early_stop = EarlyStopping(monitor='val_loss', patience=200, verbose=1)

# Model train 
# if you change learning rate smaller than 1e-5, you have to change the epochs value more than 1k
history = model.fit(X_train_combined, y_train_combined, batch_size=64, 
                    epochs=2000, verbose=1, 
                    validation_data=(X_val_combined, y_val_combined), 
                    callbacks=[checkpoint, early_stop])

# load weights from Best Model checkpoint
model.load_weights(checkpoint_path)

In [None]:
# Predict test data
test = test / 255.0
test = test.values.reshape(-1, 28, 28, 1)
predictions = model.predict(test)
predicted_labels = np.argmax(predictions, axis=1)

# save the submission.csv
submission = pd.DataFrame({'ImageId': range(1, len(predicted_labels) + 1), 'Label': predicted_labels})
submission.to_csv('submission.csv', index=False)