In [3]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Input, concatenate, GlobalAveragePooling2D, AveragePooling2D, Flatten, Dense
from tensorflow.keras.models import Model

Using TensorFlow backend.


In [13]:
df = pd.read_csv('train.csv')
y = df['label'].to_numpy()
X = df.drop('label', axis = 1)
X = X.to_numpy().reshape(df.shape[0],28,28,1)

df_extra = pd.read_csv('Dig-MNIST.csv')
y_extra = df_extra['label'].to_numpy()
X_extra = df_extra.drop('label', axis = 1)
X_extra = X_extra.to_numpy().reshape(df_extra.shape[0],28,28,1)


#X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.10, random_state=777)



df_test = pd.read_csv('test.csv')
X_test = df_test.drop('id', axis=1).to_numpy().reshape(df_test.shape[0],28,28,1)

In [5]:
train_datagen = ImageDataGenerator(rescale=1./255.,
                                   rotation_range=10,
                                   width_shift_range=0.25,
                                   height_shift_range=0.25,
                                   shear_range=0.1,
                                   zoom_range=0.25,
                                   horizontal_flip=False)

valid_datagen = ImageDataGenerator(rescale=1./255.)

In [14]:
from tensorflow.keras.callbacks import LearningRateScheduler



nets = 15
models = [0] * nets

for i in range(nets):
    
    models[i] = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(64, kernel_size=5, padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(128, kernel_size=5, padding='same', activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
        tf.keras.layers.Dropout(0.2),

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    models[i].compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])



lrs = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)


history = [0] * nets

for j in range(nets):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.1)
    history[j] = models[j].fit_generator(train_datagen.flow(X_train, y_train, batch_size=64),
                              steps_per_epoch=X_train.shape[0] // 64,
                              epochs=45,
                              validation_data=(X_val, y_val),
                              callbacks=[lrs],
                              verbose=0)
    
    print("CNN ", j + 1, 'Epochs=45 Train Accuracy=', max(history[j].history['acc']), 'Validation Accuracy=', max(history[j].history['val_acc']))

KeyboardInterrupt: 

In [4]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, kernel_size=3, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, kernel_size=5, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(512, kernel_size=5, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Conv2D(512, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])




learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                            verbose=1, patience=5)

es = EarlyStopping(monitor='val_loss', verbose=1, patience=10)

cp = ModelCheckpoint('best_model.h5', monitor='val_loss', verbose=1, save_best_only=True)


history = model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=256),
                              steps_per_epoch=100,
                              epochs=50,
                              validation_data=valid_datagen.flow(X_val, y_val),
                              validation_steps=50,
                              callbacks=[learning_rate_reduction, es, cp],
                              shuffle=True)

Epoch 1/50
Epoch 00001: val_loss improved from inf to 4.22267, saving model to best_model.h5
Epoch 2/50
Epoch 00002: val_loss did not improve from 4.22267
Epoch 3/50
Epoch 00003: val_loss did not improve from 4.22267
Epoch 4/50
Epoch 00004: val_loss did not improve from 4.22267
Epoch 5/50
Epoch 00005: val_loss improved from 4.22267 to 3.33556, saving model to best_model.h5
Epoch 6/50
Epoch 00006: val_loss improved from 3.33556 to 0.23634, saving model to best_model.h5
Epoch 7/50
Epoch 00007: val_loss improved from 0.23634 to 0.10475, saving model to best_model.h5
Epoch 8/50
Epoch 00008: val_loss improved from 0.10475 to 0.02348, saving model to best_model.h5
Epoch 9/50
Epoch 00009: val_loss did not improve from 0.02348
Epoch 10/50
Epoch 00010: val_loss did not improve from 0.02348
Epoch 11/50
Epoch 00011: val_loss did not improve from 0.02348
Epoch 12/50
Epoch 00012: val_loss did not improve from 0.02348
Epoch 13/50
Epoch 00013: val_loss improved from 0.02348 to 0.02137, saving model t

Epoch 29/50
Epoch 00029: val_loss did not improve from 0.01739
Epoch 30/50
Epoch 00030: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.

Epoch 00030: val_loss did not improve from 0.01739
Epoch 00030: early stopping


In [5]:
from tensorflow.keras.models import load_model

model = load_model('best_model.h5')
model.summary()

    

y_pred = model.predict_classes(X_test * 1.0 / 255.0)
answer = pd.DataFrame(y_pred, columns=['label'])
answer.index.name = 'id'
answer.to_csv('submission.csv')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 24, 24, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 128)       204928    
_________________________________________________________________
batch_normalization_2 (Batch (None, 24, 24, 128)       512       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 128)       0

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_298 (Conv2D)          (None, 26, 26, 64)        640       
_________________________________________________________________
conv2d_299 (Conv2D)          (None, 26, 26, 64)        36928     
_________________________________________________________________
max_pooling2d_46 (MaxPooling (None, 13, 13, 64)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 13, 13, 64)        0         
_________________________________________________________________
conv2d_300 (Conv2D)          (None, 11, 11, 128)       73856     
_________________________________________________________________
conv2d_301 (Conv2D)          (None, 11, 11, 128)       147584    
_________________________________________________________________
max_pooling2d_47 (MaxPooling (None, 5, 5, 128)         0

In [None]:
y_pred = model.predict_classes(X_test * 1.0 / 255.0)

In [13]:
answer = pd.DataFrame(y_pred, columns=['label'])
answer.index.name = 'id'
answer.to_csv('submission.csv')