In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [2]:
#Normalize the data between -1 and 1
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
X = train.drop('label', axis=1)/255.0
testX = test/255.0
y = train.label

In [4]:
np.random.seed(42)
tf.random.set_seed(42)

In [5]:
k = keras.backend

In [6]:
k.clear_session()

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_val, Y_train, Y_val = train_test_split(X,y, test_size=0.3, random_state=42)

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.1, random_state=42)

In [10]:
X_train, X_val, X_test = tf.Variable(X_train), tf.Variable(X_val), tf.Variable(X_test)

In [11]:
Xtrainpp = tf.reshape(X_train,[X_train.shape[0],28,28,1])
Xvalpp = tf.reshape(X_val,[X_val.shape[0],28,28,1])
Xtestpp = tf.reshape(X_test,[X_test.shape[0],28,28,1])

In [12]:
testX = tf.reshape(testX, [testX.shape[0],28,28,1])

In [13]:
my_callbacks = [
    keras.callbacks.EarlyStopping(patience=5),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.0001),
]

In [14]:
#Simple CNN -> VGG

from functools import partial

Default2D = partial(keras.layers.Conv2D, kernel_size=3, activation = 'relu', padding='SAME')
MaxPool2D = partial(keras.layers.MaxPool2D, pool_size=2)

In [15]:
model_vgg = keras.models.Sequential([Default2D(filters=32, kernel_size=5, input_shape=[28,28,1]),
                                    MaxPool2D(),
                                    Default2D(filters=64),
                                    MaxPool2D(),
                                    Default2D(filters=128),
                                    MaxPool2D(),
                                    Default2D(filters=256, kernel_size=2),
                                    MaxPool2D(),
                                    
                                    keras.layers.Flatten(),
                                    keras.layers.Dense(50, activation='relu'),
                                    keras.layers.Dropout(0.2),
                                    keras.layers.Dense(25, activation='relu'),
                                    keras.layers.Dropout(0.2),
                                    keras.layers.Dense(10,activation='softmax')
                                    ]
)

In [16]:
model_vgg.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 128)         73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 128)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 3, 256)         1

In [19]:
model_vgg.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'],optimizer='nadam')

In [20]:
history_vgg = model_vgg.fit(Xtrainpp,Y_train,epochs=20, validation_data=(Xvalpp,Y_val), callbacks=my_callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


In [21]:
model_vgg.evaluate(Xtestpp,Y_test)



[0.09133099764585495, 0.9887754917144775]

In [22]:
model_vgg.save('mnist_cnn_vgg_9887_009.h5')

In [23]:
#Inception CNN -> GoogleNet

In [24]:
class Inception2d(keras.layers.Layer):
    def __init__(self,f11,f311,f333,f511,f555,fMP11,**kwargs):
        super().__init__(**kwargs)
        self.f11 = f11
        self.f311 = f311
        self.f333 = f333
        self.f511 = f511
        self.f555 = f555
        self.fMP11 = fMP11
        
        self.Conv1x1 = keras.layers.Conv2D(filters=self.f11,kernel_size=1, activation='relu', padding= 'same')
        self.Conv3SL1x1 = keras.layers.Conv2D(filters=self.f333, kernel_size=3, activation='relu', padding ='same')
        self.Conv3SL3x3 = keras.layers.Conv2D(filters=self.f333,kernel_size=3,activation='relu',padding='same')
        self.Conv5SL1x1 = keras.layers.Conv2D(filters=self.f511,kernel_size=1,activation='relu',padding='same')
        self.Conv5SL5x5 = keras.layers.Conv2D(filters=self.f555,kernel_size=5,activation='relu',padding='same')
        self.MaxPool = keras.layers.MaxPooling2D(pool_size=3,strides=1,padding='same')
        self.ConvMP1x1 = keras.layers.Conv2D(filters=self.fMP11,kernel_size=1,activation='relu',padding='same')
        
    def call(self,inputs):
        #1x1 Input
        out11 = self.Conv1x1(inputs)
        
        #Smart Layer inputs (1x1) and (3x3)
        x=self.Conv1x1(inputs)
        out33=self.Conv3SL1x1(x)
        
        #Smart Layer inputs (1x1) and (5x5)
        x=self.Conv1x1(inputs)
        out55=self.Conv5SL1x1(x)
        
        #Input via Max Pool
        x=self.MaxPool(inputs)
        outMP11=self.ConvMP1x1(x)
        
        #outputs
        output=keras.layers.Concatenate(axis=-1)([out11,out33,out55,outMP11])
        
        return output
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'f11':self.f11, 'f311':self.f311, 'f333':self.f333,
                'f511': self.f511, 'f555': self.f555, 'fMP11': self.fMP11
            
        }
                                         

In [27]:
from functools import partial
Default2D=partial(keras.layers.Conv2D,kernel_size=3,activation='relu',padding='same')
MaxPool2D=partial(keras.layers.MaxPool2D,pool_size=2,padding='same')
model_gnet=keras.models.Sequential([
    Default2D(filters=64,kernel_size=7,input_shape=[28,28,1]),
    MaxPool2D(),
    Default2D(filters=32,kernel_size=1),
    Default2D(filters=128),
    MaxPool2D(),
    Inception2d(f11=32,f311=16,f333=64,f511=16,f555=32,fMP11=16),
    Inception2d(f11=64,f311=32,f333=96,f511=32,f555=64,fMP11=32),
    MaxPool2D(),
    Inception2d(f11=96,f311=64,f333=108,f511=64,f555=96,fMP11=64),
    Inception2d(f11=108,f311=96,f333=128,f511=64,f555=108,fMP11=64),
    MaxPool2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(90,activation='relu'),
    keras.layers.Dense(45,activation='relu'),
    keras.layers.Dense(10,activation='softmax')
])

In [28]:
model_gnet.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 28, 28, 64)        3200      
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 14, 14, 32)        2080      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 14, 14, 128)       36992     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
inception2d (Inception2d)    (None, 7, 7, 128)         25216     
_________________________________________________________________
inception2d_1 (Inception2d)  (None, 7, 7, 224)        

In [29]:
model_gnet.compile(loss='sparse_categorical_crossentropy',metrics=['accuracy'],optimizer='nadam')

In [31]:
history_gnet=model_gnet.fit(Xtrainpp,Y_train,epochs=20,validation_data=(Xvalpp,Y_val),callbacks=my_callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


In [32]:
model_gnet.evaluate(Xtestpp,Y_test)



[0.041687507182359695, 0.99217689037323]

In [33]:
model_gnet.save('mnist_cnn_inception_992_004.h5')

In [34]:
#Residual Module -> ResNet

In [35]:
class ResidualBlock(keras.layers.Layer):
    def __init__(self,filters,strides=1,activation='relu',**kwargs):
        super().__init__(**kwargs)
        self.filters=filters
        self.strides=strides
        self.activation=keras.activations.get(activation)
        self.main_layers=[
            keras.layers.Conv2D(filters,2,strides=strides,padding='same',use_bias=False),
            keras.layers.BatchNormalization(),
            self.activation,
            keras.layers.Conv2D(filters,2,strides=1,padding='same',use_bias=False),
            keras.layers.BatchNormalization()
        ]
        self.skip_layers=[]
        if strides>1:
            self.skip_layers=[
                keras.layers.Conv2D(filters,1,strides=strides,padding='same',use_bias=False),
                keras.layers.BatchNormalization()
            ]
    def call(self,inputs):
        Z=inputs
        for layer in self.main_layers:
            Z=layer(Z)
        skip_Z=inputs
        for layer in self.skip_layers:
            skip_Z=layer(skip_Z)
        return self.activation(Z+skip_Z)
    
    def get_config(self):
        base_config=super().get_config()
        return {**base_config,"filters":self.filters,"strides":self.strides,"activation":keras.activations.serialize(self.activation)}

In [36]:
Default2D=partial(keras.layers.Conv2D,kernel_size=3,activation='relu',padding='same')
MaxPool2D=partial(keras.layers.MaxPool2D,pool_size=2,padding='same')
model_rnet=keras.models.Sequential([
    Default2D(filters=32,kernel_size=5,input_shape=[28,28,1]),
    MaxPool2D(),
    ResidualBlock(filters=64,strides=2),
    ResidualBlock(filters=64),
    ResidualBlock(filters=128,strides=2),
    ResidualBlock(filters=128),
    MaxPool2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(50,activation='relu'),
    keras.layers.Dense(25,activation='relu'),
    keras.layers.Dense(10,activation='softmax')
])

In [37]:
model_rnet.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
residual_block (ResidualBloc (None, 7, 7, 64)          27392     
_________________________________________________________________
residual_block_1 (ResidualBl (None, 7, 7, 64)          33280     
_________________________________________________________________
residual_block_2 (ResidualBl (None, 4, 4, 128)         108032    
_________________________________________________________________
residual_block_3 (ResidualBl (None, 4, 4, 128)         132096    
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 2, 2, 128)        

In [38]:
model_rnet.compile(loss='sparse_categorical_crossentropy',metrics=['accuracy'],optimizer='nadam')

In [39]:
history_rnet=model_rnet.fit(Xtrainpp,Y_train,epochs=20,validation_data=(Xvalpp,Y_val),callbacks=my_callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


In [40]:
model_rnet.evaluate(Xtestpp,Y_test)



[0.0454375334084034, 0.9901360273361206]

In [41]:
model_rnet.save('mnist_cnn_resnet_990_004.h5')