In [3]:
!pip install tensorflow



In [9]:
import time
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

# Function to create the model with specified activation function
def create_model(activation='relu', optimizer='adam'):
    model = Sequential([
        Input(shape=(784,)), # Define the input shape explicitly with an Input layer
        Dense(512, activation=activation),
        Dense(256, activation=activation),
        Dense(10, activation='softmax'), # Output layer for 10 classes
    ])
    
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess the data: Flatten and normalize the images
x_train = x_train.reshape(-1, 784).astype('float32') / 255
x_test = x_test.reshape(-1, 784).astype('float32') / 255

# Convert class vectors to binary class matrices (for use with categorical_crossentropy)
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# List of activation functions to test
activation_functions = ['linear', 'relu', 'sigmoid']

# Dictionary to store results
results = {}

# Experiment with different activation functions
for activation in activation_functions:
    model = create_model(activation=activation)
    
    # Measure execution time
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=128, verbose=0, validation_split=0.1)
    exec_time = time.time() - start_time
    
    # Evaluate model
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    
    # Store results
    results[activation] = {'accuracy': test_acc, 'execution_time': exec_time}
    print(f"Activation: {activation}, Accuracy: {test_acc:.4f}, Execution Time: {exec_time:.2f} seconds")

Activation: linear, Accuracy: 0.9216, Execution Time: 39.92 seconds
Activation: relu, Accuracy: 0.9812, Execution Time: 42.89 seconds
Activation: sigmoid, Accuracy: 0.9781, Execution Time: 43.90 seconds


In [11]:
# List of optimizer
optimizers = {
    'SGD': 'sgd',
    'RMSprop': 'rmsprop',
    'Adagrad': 'adagrad'
}

# Dictionary to store results
results = {}

# Experiment with different optimizers.
for opt_name, optimizer in optimizers.items():
    model = create_model(optimizer=optimizer)
    
    # Measure execution time
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=128, verbose=0, validation_split=0.1)
    exec_time = time.time() - start_time
    
    # Evaluate model
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    
    # Store results
    results[opt_name] = {'accuracy': test_acc, 'execution_time': exec_time}
    print(f"Optimizer: {opt_name}, Accuracy: {test_acc:.4f}, Execution Time: {exec_time:.2f} seconds")

Optimizer: SGD, Accuracy: 0.9380, Execution Time: 38.34 seconds
Optimizer: RMSprop, Accuracy: 0.9847, Execution Time: 40.16 seconds
Optimizer: Adagrad, Accuracy: 0.9110, Execution Time: 37.19 seconds


# Regularization

In [12]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.initializers import HeNormal, GlorotUniform

def create_model(use_dropout=False, dropout_rate=0.5, use_batchnorm=False, weight_initializer='glorot_uniform'):
    model = Sequential()
    model.add(Input(shape=(784,)))

    # First hidden layer
    if use_batchnorm:
        model.add(Dense(512, kernel_initializer=weight_initializer))
        model.add(BatchNormalization())
        model.add(keras.layers.Activation('relu'))
    else:
        model.add(Dense(512, activation='relu', kernel_initializer=weight_initializer))
    
    if use_dropout:
        model.add(Dropout(dropout_rate))
    
    # Second hidden layer
    if use_batchnorm:
        model.add(Dense(256, kernel_initializer=weight_initializer))
        model.add(BatchNormalization())
        model.add(keras.layers.Activation('relu'))
    else:
        model.add(Dense(256, activation='relu', kernel_initializer=weight_initializer))
    
    if use_dropout:
        model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(10, activation='softmax'))
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [15]:
# Configuration options for experiments
experiments = {
    "Baseline": {"use_dropout": False, "use_batchnorm": False, "weight_initializer": "glorot_uniform"},
    "Dropout": {"use_dropout": True, "dropout_rate": 0.2, "use_batchnorm": False, "weight_initializer": "glorot_uniform"},
    "BatchNorm": {"use_dropout": False, "use_batchnorm": True, "weight_initializer": "glorot_uniform"},
    "HeNormal Initialization": {"use_dropout": False, "use_batchnorm": False, "weight_initializer": HeNormal()},
}

# Dictionary to store results
results = {}

for name, config in experiments.items():
    model = create_model(**config)
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=10, batch_size=128, verbose=0, validation_split=0.1)
    exec_time = time.time() - start_time
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    results[name] = {'accuracy': test_acc, 'execution_time': exec_time}
    print(f"{name}: Accuracy = {test_acc:.4f}, Execution Time = {exec_time:.2f} seconds")

Baseline: Accuracy = 0.9785, Execution Time = 37.99 seconds
Dropout: Accuracy = 0.9830, Execution Time = 47.93 seconds
BatchNorm: Accuracy = 0.9761, Execution Time = 48.50 seconds
HeNormal Initialization: Accuracy = 0.9806, Execution Time = 44.20 seconds
