# Model 3: Improve model with a better optimizer


## Load modules

In [1]:
from keras.models import Sequential, clone_model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.regularizers import L1L2

import helpers

: 

## Load dataset

In [55]:
# Load CIFAR-10 dataset
x_train, y_train, x_test, y_test = helpers.load_dataset()

## Model 3 architecture

Let's recompile the same model with Adam, which is ususally prefered over SGD for image classification. Reasons:

- Adaptive Learning Rate: Adam adapts the learning rate for each parameter, making it more efficient in handling sparse gradients or noisy data, while SGD uses a fixed learning rate unless momentum or schedules are applied.

- Faster Convergence: Adam combines momentum and adaptive learning rates, helping it converge faster and more efficiently, especially in complex models.

- Less Hyperparameter Tuning: Adam works well with default settings, whereas SGD typically requires more careful tuning of learning rates and momentum.



In [57]:
input_shape = x_train.shape[1:]

model = Sequential()
model.add(Input(shape=input_shape))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(10, activation='softmax'))

print(model.summary())

In [21]:
# compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# implement callbacks
lr_schedule = LearningRateScheduler(lambda epoch: 1e-3 * 10**(-epoch / 20))

# train model
history = model.fit(
    x_train, y_train, 
    batch_size=512, 
    epochs=15, 
    validation_split=0.1,
    callbacks=[lr_schedule]
)


In [22]:
# plot results
helpers.evaluate_model(model, x_test, y_test)
helpers.plot_model_history(history)
helpers.plot_confusion_matrix(model, x_test, y_test)