In [1]:
import warnings
warnings.filterwarnings("ignore")

## Optimization Algorithms

In [2]:
from keras.optimizers.legacy import SGD, Adam, RMSprop
# from keras.optimizers import SGD, Adam, RMSprop ## use this line if you don't use M1/M2 mac

# SGD optimizer
sgd_optimizer = SGD(learning_rate=0.01, momentum=0.0)

# RMSprop optimizer
rmsprop_optimizer = RMSprop(learning_rate=0.001, rho=0.9)

# Adam optimizer
adam_optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)


https://github.com/keras-team/keras/tree/v3.0.5/keras/optimizers

### Example on MNIST Dataset

In [3]:
from keras.datasets import mnist
from keras.utils import to_categorical

# Load data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images to 0-1 range
train_images = train_images.reshape((60000, 28 * 28)).astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28)).astype('float32') / 255

# One-hot encode labels
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)


In [9]:
from keras.models import Sequential
from keras.layers import Dense
from keras.initializers import glorot_uniform, he_normal, lecun_normal

def create_model(initializer, optimizer):
    if initializer == 'glorot_uniform':
        activation = 'tanh'
    elif initializer == 'he_normal':
        activation = 'relu'
    elif initializer == 'lecun_normal':
        activation = 'selu'
    else:
        activation = 'relu'
        
    model = Sequential([
        Dense(512, activation=activation, kernel_initializer=initializer, input_shape=(28 * 28,)),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [10]:
import time
from keras.optimizers.legacy import SGD, Adam, RMSprop

# Define initializers and optimizers to test
initializers = {
    'Glorot': 'glorot_uniform',
    'He': 'he_normal',
    'LeCun': 'lecun_normal'
}

optimizers = {
    'SGD': SGD(learning_rate=0.01),
    'Adam': Adam(learning_rate=0.001),
    'RMSprop': RMSprop(learning_rate=0.001)
}

# Train models and measure performance
results = {}
for init_name, initializer in initializers.items():
    for opt_name, optimizer in optimizers.items():
        model = create_model(initializer, optimizer)
        start_time = time.time()
        history = model.fit(train_images, train_labels, epochs=5, batch_size=128, validation_data=(test_images, test_labels), verbose=0)
        training_time = time.time() - start_time
        test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=0)
        results[f'{init_name}_{opt_name}'] = (test_acc, training_time)


In [11]:
for config, (acc, time) in results.items():
    print(f"{config} - Accuracy: {acc:.4f}, Training Time: {time:.2f} seconds")


Glorot_SGD - Accuracy: 0.9062, Training Time: 4.32 seconds
Glorot_Adam - Accuracy: 0.9750, Training Time: 4.52 seconds
Glorot_RMSprop - Accuracy: 0.9754, Training Time: 6.02 seconds
He_SGD - Accuracy: 0.9136, Training Time: 4.12 seconds
He_Adam - Accuracy: 0.9808, Training Time: 4.43 seconds
He_RMSprop - Accuracy: 0.9790, Training Time: 5.77 seconds
LeCun_SGD - Accuracy: 0.9125, Training Time: 4.73 seconds
LeCun_Adam - Accuracy: 0.9741, Training Time: 5.01 seconds
LeCun_RMSprop - Accuracy: 0.9706, Training Time: 6.68 seconds
