## Import the dependencies

In [1]:
import keras

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import ModelCheckpoint
from keras import regularizers, optimizers

import numpy as np
from matplotlib import pyplot

## Create the model

In [12]:
def get_alexnet_model():
    md = Sequential()
    
    #CONV_1 (input 227x227x3), depth=96, kernel=11x11, strides=4 (output 55x55x96)
    #       activation relu
    #POOL_2 pool_size=3, stride=2 (output 27x27x96)
    #batchnorm
    md.add(Conv2D(filters=96, kernel_size=11, strides=4, padding='valid',
                  activation='relu',
                  input_shape=(227, 227, 3)))
    md.add(MaxPooling2D(pool_size=3, strides=2))
    md.add(BatchNormalization())
    
    #CONV_3 depth=256, kernel=5, padding=2, strides=1 (output 27x27x256)
    #POOL_4 pool_size=3, stride=2 (output 13x13x256)
    #batchnorm
    md.add(Conv2D(filters=256, kernel_size=5, strides=1, padding='same',
                  kernel_regularizer=regularizers.l2(0.0005),
                  activation='relu'))
    md.add(MaxPooling2D(pool_size=3, strides=2))
    md.add(BatchNormalization())
    
    #CONV_5 depth=384, kernel, padding=1, stride=1 (output 13x13x384)
    #batchnorm
    md.add(Conv2D(filters=384, kernel_size=3, strides=1, padding='same',
                  kernel_regularizer=regularizers.l2(0.0005),
                  activation='relu'))
    md.add(BatchNormalization())
    
    #CONV_6 k=384, f=3x3, padding=1, stride=1 (output 13x13x384)
    md.add(Conv2D(filters=384, kernel_size=3, strides=1, padding='same',
                  kernel_regularizer=regularizers.l2(0.0005),
                  activation='relu'))
    md.add(BatchNormalization())
    
    #CONV_7 k=256, f=3x3, padding=1, stride=1 (output 13x13x256)
    md.add(Conv2D(filters=256, kernel_size=3, strides=1, padding='same',
                  kernel_regularizer=regularizers.l2(0.0005),
                  activation='relu'))
    md.add(BatchNormalization())
    
    #POOL_8 f=3x3, stride=2 (output 6x6x256)
    md.add(MaxPooling2D(pool_size=3, strides=2))
    
    #FLATTEN
    model.add(Flatten())
    
    #FC_9 n=4096
    #dropout
    model.add(Dense(units=4096, activation='relu'))
    model.add(Dropout(0.5))
    
    #FC_10 n=4096
    model.add(Dense(units=4096, activation='relu'))
    model.add(Dropout(0.5))
    
    #SOFT_MAX n=1000
    model.add(Dense(units=1000, activation='softmax'))    
    
    return md

model = get_alexnet_model()
model.summary()

ResourceExhaustedError: failed to allocate memory [Op:AddV2]

## Create the model by book:

In [9]:
def get_alexnet_model2():
    md = Sequential()
    
    # 1st layer (CONV + POOL + batchnorm)
    md.add(Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4),
                  padding='valid', input_shape=(227, 227, 3)))
    md.add(Activation('relu'))
    md.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    md.add(BatchNormalization())
    
    # 2nd layer (CONV + POOL + batchnorm)
    md.add(Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1),
                  padding='same', kernel_regularizer=regularizers.l2(0.0005)))
    md.add(Activation('relu'))
    md.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
    md.add(BatchNormalization())
    
    # 3rd layer (CONV + batchnorm)
    md.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1),
                  padding='same', kernel_regularizer=regularizers.l2(0.0005)))
    md.add(Activation('relu'))
    md.add(BatchNormalization())
    
    # 4th layer (CONV + batchnorm)
    md.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1),
                  padding='same', kernel_regularizer=regularizers.l2(0.0005)))
    md.add(Activation('relu'))
    md.add(BatchNormalization())
    
    # 5th layer (CONV  + batchnorm + POOLING)
    md.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1),
                  padding='same', kernel_regularizer=regularizers.l2(0.0005)))
    md.add(Activation('relu'))
    md.add(BatchNormalization())
    md.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
    
    # Flatten
    md.add(Flatten())
    
    # 6th layer (Dense + dropout)
    md.add(Dense(units=4096, activation='relu'))
    md.add(Dropout(0.5))
    
    # 7th layer (Dense + dropout)
    md.add(Dense(units=4096, activation='relu'))
    md.add(Dropout(0.5))
    
    # 8th layer (softmax output)
    md.add(Dense(units=1000, activation='softmax'))
    
    return md

model = get_alexnet_model2()
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 55, 55, 96)        34944     
_________________________________________________________________
activation_12 (Activation)   (None, 55, 55, 96)        0         
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 27, 27, 96)        0         
_________________________________________________________________
batch_normalization_11 (Batc (None, 27, 27, 96)        384       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 27, 27, 256)       614656    
_________________________________________________________________
activation_13 (Activation)   (None, 27, 27, 256)       0         
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 13, 13, 256)      

## setting up the learning hyperparameters

In [18]:
import tensorflow.keras.callbacks as callbacks


In [21]:
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1))
optimizer = tensorflow.keras.optimizers.SGD(lr = 0.01, momentum = 0.9)
model.compile(loss='categorical_crossentropy', optimizer=optimizer,
              metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, batch_size=128, epochs=90,
          validation_data=(X_test, y_test), verbose=2, 
          callbacks=[reduce_lr])