In [4]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Flatten, BatchNormalization, Activation
from keras.optimizers import Adam, Nadam, Adadelta, RMSprop, SGD
from keras.utils import to_categorical
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [5]:
def load_data():
    from mnist import MNIST
    mnist = MNIST('C:/Users/My/Desktop/OneShot/mnist/')
    x_train, y_train = mnist.load_training() 
    x_test, y_test = mnist.load_testing()
    x_train = np.asarray(x_train).astype(np.float32)
    y_train = np.asarray(y_train).astype(np.int32)
    x_test = np.asarray(x_test).astype(np.float32)
    y_test = np.asarray(y_test).astype(np.int32)
    
    return x_train/255, y_train, x_test/255, y_test

x_train, y_train, x_test, y_test = load_data()

In [6]:
y_test = to_categorical(y_test,10)
y_train = to_categorical(y_train, 10)

y_test.shape

(10000, 10)

In [7]:
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

In [15]:
def build_model():
    
    model = Sequential()
    
    ########################## Version v1 (2*Conv-pool-drop-2*Conv-pool-drop-flat-dense-drop-dense ##########################
    
    filters = [32,64]
    kernel_sizes = [5,3]
    
    '''model.add(Conv2D(filters[0], kernel_sizes[0], input_shape=(28,28,1), strides=(1, 1), padding='same', activation='relu', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), activity_regularizer=None))
    model.add(Conv2D(filters[0], kernel_sizes[0], input_shape=(28,28,1), strides=(1, 1), padding='same', activation='relu', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), activity_regularizer=None))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(filters[1], kernel_sizes[1],  strides=(2, 2), padding='same', activation='relu', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), activity_regularizer=None))
    model.add(Conv2D(filters[1], kernel_sizes[1],  strides=(1, 1), padding='same', activation='relu', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), activity_regularizer=None))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax', use_bias=True, kernel_initializer='he_uniform', bias_initializer='zeros'))'''
    
    ###########################################################################################################
    
    #### Version v2 (Conv+batch+Conv+batch+Conv+batch+drop+Conv+batch+Conv+batch+Conv+batch+drop+Conv+batch+flat+drop+dense ####
    
    '''model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())    
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())   
    model.add(Conv2D(32, kernel_size=5, strides=2, padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu'))
    model.add(BatchNormalization())   
    model.add(Dropout(0.25))
    
    model.add(Conv2D(128, kernel_size=4, activation='relu'))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))'''
    
    ######### 0.98 - test ################### 0.992 validation on train #################################
    ######### with data augmentation 0.995 test; 0.992 train
     
    ################### V3 0.995;  30 epochs ############################
    '''  
    model.add(Conv2D(32, kernel_size=3,input_shape=(28,28,1)))
    model.add(BatchNormalization()) 
    model.add(Activation('relu'))
    model.add(MaxPooling2D(2,2))
    
    model.add(Conv2D(64, kernel_size=3,input_shape=(28,28,1)))
    model.add(BatchNormalization()) 
    model.add(Activation('relu'))
    model.add(MaxPooling2D(2,2))
    
    model.add(Flatten())
    model.add(Dense(256))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation='softmax'))'''
    
    ################### V3 0.995 ############################
    
    ################### V4 ##################################
    
    '''model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    '''
    ################### V4 ##################################
    
    ################### V5 ##################################
    '''
    model.add(Conv2D(128, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(128, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    model.add(Conv2D(128, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    
    model.add(Conv2D(128, kernel_size=1, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    
    model.add(Conv2D(128, kernel_size=1, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))
    
    model.add(Conv2D(128, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(2,2))'''
    
    ################### V5 ##################################
    ################### V6 ##################################
    
    '''model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Conv2D(128, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(128, kernel_size=3, activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))'''
    
    ################### V6 ##################################
    
    
    
    model.add(Conv2D(64, (3,3), activation='relu', input_shape=(28,28,1), kernel_initializer='random_uniform'))
    
    model.add(MaxPooling2D((2,2)))
    model.add(Conv2D(128, (3,3), activation='relu', kernel_initializer='random_uniform'))
    
    model.add(MaxPooling2D())
    model.add(Dropout(.25))
    model.add(Conv2D(128, (2,2), activation='relu', kernel_initializer='random_uniform'))
    
    model.add(MaxPooling2D((2,2)))
    model.add(Dropout(.25))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))  
    
    
    return model

model = build_model()
adm = Adam(learning_rate=0.002, beta_1=0.9, beta_2=0.999, amsgrad=False)
ndm = Nadam(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
# 0.002, beta_1=0.8, beta_2=0.9 - 0.9657
# 0.002, beta_1=0.8, beta_2=0.8 - 0.9658
# 0.002, beta_1=0.8, beta_2=0.5 - 0.9662
# 0.002, beta_1=0.8, beta_2=0.7 - 0.9664

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)


ada = Adadelta()
rms = RMSprop(learning_rate=0.001, rho=0.9)

model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy'])

In [16]:
data_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3, 
                              height_shift_range=0.08, zoom_range=0.08)
data_gen.fit(x_train)



In [73]:
model.fit_generator(data_gen.flow(x_train, y_train, batch_size=64),steps_per_epoch=len(x_train)//32, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x2ac924760f0>

In [17]:
model.compile(loss = 'categorical_crossentropy', optimizer = ndm, metrics=['accuracy'])
model.fit_generator(data_gen.flow(x_train, y_train, batch_size=16),steps_per_epoch=len(x_train)//64, epochs=5)
model.compile(loss = 'categorical_crossentropy', optimizer = adm, metrics=['accuracy'])
model.fit_generator(data_gen.flow(x_train, y_train, batch_size=16),steps_per_epoch=len(x_train)//65, epochs=5)
model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics=['accuracy'])
model.fit_generator(data_gen.flow(x_train, y_train, batch_size=16),steps_per_epoch=len(x_train)//64, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1bd2c848a90>

In [None]:
'''epochs = 10
h = model.fit(x_train[:1000], y_train[:1000], validation_split=.25, batch_size=32, verbose=2, epochs=epochs)'''

In [18]:
score = model.evaluate(x_test, y_test)
score



[1.1920930376163597e-07, 0.1005999967455864]

In [44]:
model.save('mnist_augmentation.h5')