# [Deep Learning for Vision Systems](https://www.manning.com/books/deep-learning-for-vision-systems?a_aid=compvisionbookcom&a_bid=90abff15) Book


## AlexNet implementation with Keras


<img src="./images/alexnet_architecture.png" width="900" height="600">

In [1]:
from keras.models import Sequential
from keras.layers import Conv2D, AveragePooling2D, Flatten, Dense,Activation,MaxPool2D, BatchNormalization, Dropout
from keras.regularizers import l2

In [2]:
# Add MINIST
from keras.datasets import mnist
import numpy as np

# use Keras to import pre-shuffled MNIST database
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print("The MNIST database has a training set of %d examples." % len(X_train))
print("The MNIST database has a test set of %d examples." % len(X_test))

The MNIST database has a training set of 60000 examples.
The MNIST database has a test set of 10000 examples.


In [3]:
# normalize the data to accelerate learning
mean = np.mean(X_train)
std = np.std(X_train)

# 이번 학기의 목적 Batch norm을 잘 쓸수 있으면 성공적입니다.
X_train = (X_train-mean)/(std+1e-7)
X_test = (X_test-mean)/(std+1e-7)

print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

X_train shape: (60000, 28, 28)
60000 train samples
10000 test samples


In [4]:
from keras.utils import np_utils

num_classes = 10
# print first ten (integer-valued) training labels
print('Integer-valued labels:')
print(y_train[:10])

# one-hot encode the labels
# convert class vectors to binary class matrices
# 난 to_dummy를 썻었는데 요건 Keras껄 쓴건가
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

Integer-valued labels:
[5 0 4 1 9 2 1 3 1 4]


In [5]:
# input image dimensions 28x28 pixel images.
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [22]:
# Instantiate an empty sequential model
model = Sequential(name="Alexnet")
# 1st layer (conv + pool + batchnorm)
# model.add(Conv2D(filters= 96, kernel_size= (11,11), strides=(4,4), padding='valid', kernel_regularizer=l2(0.0005), input_shape = (227,227,3)))
# Change iput-shape
model.add(Conv2D(filters= 64, kernel_size= (3,3), strides=(1,1), padding='valid', kernel_regularizer=l2(0.0005), input_shape = (28,28,1)))
model.add(Activation('relu'))  #<---- activation function can be added on its own layer or within the Conv2D function
# model.add(MaxPool2D(pool_size=(3,3), strides= (2,2), padding='valid'))
model.add(MaxPool2D(pool_size=(2,2), strides= (2,2), padding='valid'))
model.add(BatchNormalization())
    
# 2nd layer (conv + pool + batchnorm)
# model.add(Conv2D(filters=256, kernel_size=(5,5), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Conv2D(filters=128, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
# model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='valid'))
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2), padding='valid'))
model.add(BatchNormalization())
            
# layer 3 (conv + batchnorm)      <--- note that the authors did not add a POOL layer here
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
        
# layer 4 (conv + batchnorm)      <--- similar to layer 3
model.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
            
# layer 5 (conv + batchnorm)  
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same', kernel_regularizer=l2(0.0005)))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(3,3), strides=(2,2), padding='valid'))

# Flatten the CNN output to feed it with fully connected layers
model.add(Flatten())

# layer 6 (Dense layer + dropout)  
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))

# layer 7 (Dense layers) 
model.add(Dense(units = 4096, activation = 'relu'))
model.add(Dropout(0.5))
                           
# layer 8 (softmax output layer) 
model.add(Dense(units = 10, activation = 'softmax'))

# print the model summary
model.summary()


Model: "Alexnet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_44 (Conv2D)          (None, 26, 26, 64)        640       
                                                                 
 activation_44 (Activation)  (None, 26, 26, 64)        0         
                                                                 
 max_pooling2d_28 (MaxPoolin  (None, 13, 13, 64)       0         
 g2D)                                                            
                                                                 
 batch_normalization_42 (Bat  (None, 13, 13, 64)       256       
 chNormalization)                                                
                                                                 
 conv2d_45 (Conv2D)          (None, 13, 13, 128)       73856     
                                                                 
 activation_45 (Activation)  (None, 13, 13, 128)       0   

In [23]:
# the loss function is categorical cross entropy since we have multiple classes (10)


# compile the model by defining the loss function, optimizer, and performance metric
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [24]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

# set the learning rate schedule as created in the original paper
def lr_schedule(epoch):
    if epoch <= 2:
        lr = 5e-4
    elif epoch > 2 and epoch <= 5:
        lr = 2e-4
    elif epoch > 5 and epoch <= 9:
        lr = 5e-5
    else:
        lr = 1e-5
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

# set the checkpointer
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1,
                               save_best_only=True)

# train the model
hist = model.fit(X_train, y_train, batch_size=32, epochs=20,
          validation_data=(X_test, y_test), callbacks=[checkpointer, lr_scheduler],
          verbose=2, shuffle=True)

Epoch 1/20

Epoch 1: val_loss improved from inf to 0.36095, saving model to model.weights.best.hdf5
1875/1875 - 565s - loss: 0.6231 - accuracy: 0.9495 - val_loss: 0.3609 - val_accuracy: 0.9827 - lr: 5.0000e-04 - 565s/epoch - 301ms/step
Epoch 2/20

Epoch 2: val_loss improved from 0.36095 to 0.27696, saving model to model.weights.best.hdf5
1875/1875 - 541s - loss: 0.3123 - accuracy: 0.9765 - val_loss: 0.2770 - val_accuracy: 0.9747 - lr: 5.0000e-04 - 541s/epoch - 288ms/step
Epoch 3/20

Epoch 3: val_loss improved from 0.27696 to 0.26122, saving model to model.weights.best.hdf5
1875/1875 - 590s - loss: 0.2636 - accuracy: 0.9763 - val_loss: 0.2612 - val_accuracy: 0.9828 - lr: 5.0000e-04 - 590s/epoch - 315ms/step
Epoch 4/20

Epoch 4: val_loss improved from 0.26122 to 0.13076, saving model to model.weights.best.hdf5
1875/1875 - 580s - loss: 0.1688 - accuracy: 0.9891 - val_loss: 0.1308 - val_accuracy: 0.9895 - lr: 2.0000e-04 - 580s/epoch - 309ms/step
Epoch 5/20

Epoch 5: val_loss improved from 

In [25]:
# load the weights that yielded the best validation accuracy
model.load_weights('model.weights.best.hdf5')

In [26]:
# evaluate test accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]

# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)

Test accuracy: 99.5000%
