In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import mnist #to import our dataset
from tensorflow.keras.models import Sequential, Model # imports our type of network
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, BatchNormalization # imports our layers we want to use

from tensorflow.python.keras.losses import categorical_crossentropy #loss function
from tensorflow.keras.optimizers import Adam, SGD #optimisers
from tensorflow.keras.utils import to_categorical #some function for data preparation

from tensorflow.keras.callbacks import ModelCheckpoint #checkpoints used to keep track of best model

from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('float32') 

In [14]:
batch_size = 128
num_classes = 10
epochs = 20

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28)
y_train shape: (60000,)
60000 train samples
10000 test samples


In [None]:
input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = Dense(200, activation='relu')(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model.summary()

2025-11-17 21:21:03.000696: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-11-17 21:21:03.000827: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-11-17 21:21:03.001035: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-11-17 21:21:03.001453: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-11-17 21:21:03.001964: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
history1=model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20


2025-11-17 21:21:04.125489: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - accuracy: 0.8800 - loss: 0.4233 - val_accuracy: 0.8902 - val_loss: 0.4074
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.8437 - loss: 1.0070 - val_accuracy: 0.8005 - val_loss: 1.8105
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8110 - loss: 2.6784 - val_accuracy: 0.8031 - val_loss: 4.1457
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.8064 - loss: 4.4114 - val_accuracy: 0.7515 - val_loss: 8.5616
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8069 - loss: 6.6509 - val_accuracy: 0.8597 - val_loss: 4.8640
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8014 - loss: 8.7267 - val_accuracy: 0.8232 - val_loss: 7.9509
Epoch 7/20
[1m469/469[0m [32m

In [5]:
# Let's add some regularisation:

from tensorflow.keras.regularizers import l1, l2

#dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
y = Dense(num_classes, activation='softmax')(x)

model= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])
model.summary()

In [6]:
# Dropout

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu',activity_regularizer=l1(0.1))(x)
x = Dropout(rate=dropout_rate)(x)
x = Dense(200, activation='relu',activity_regularizer=l1(0.2))(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_dropout= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_dropout.compile(loss='mse',optimizer=opt, metrics=['accuracy'])
model_dropout.summary()


In [7]:
history4=model_dropout.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.1419 - loss: 18120.5449 - val_accuracy: 0.4867 - val_loss: 20225.0098
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2028 - loss: 20675.4238 - val_accuracy: 0.5324 - val_loss: 21636.6934
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2498 - loss: 21758.3711 - val_accuracy: 0.5546 - val_loss: 22471.2129
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2742 - loss: 22411.0332 - val_accuracy: 0.5691 - val_loss: 23030.3789
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2843 - loss: 23010.9043 - val_accuracy: 0.5771 - val_loss: 23716.5801
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2846 - loss: 23600.0703 - val_accuracy: 0.53

In [8]:
tf.__version__

'2.16.2'

In [9]:
# Batch-Normalisation

dropout_rate = 0.2

input_network = Input(shape=(28,28))
x = Flatten()(input_network)
x = Dense(200, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(200, activation='relu')(x)
x = Dropout(rate=dropout_rate)(x)
y = Dense(num_classes, activation='softmax')(x)

model_batch_normalisation= Model(input_network,outputs=y)
opt = Adam(learning_rate=0.001)
model_batch_normalisation.compile(loss='mse',optimizer=opt, metrics=['accuracy'])
model_batch_normalisation.summary()

In [10]:
history4=model_batch_normalisation.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - accuracy: 0.8585 - loss: 0.0213 - val_accuracy: 0.9022 - val_loss: 0.0161
Epoch 2/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8684 - loss: 0.0222 - val_accuracy: 0.9062 - val_loss: 0.0163
Epoch 3/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step - accuracy: 0.8661 - loss: 0.0241 - val_accuracy: 0.9043 - val_loss: 0.0174
Epoch 4/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8633 - loss: 0.0252 - val_accuracy: 0.9010 - val_loss: 0.0186
Epoch 5/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8680 - loss: 0.0249 - val_accuracy: 0.9041 - val_loss: 0.0181
Epoch 6/20
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.8632 - loss: 0.0261 - val_accuracy: 0.9034 - val_loss: 0.0184
Epoch 7/20
[1m469/469

In [11]:
# Saving a model

# Save the model
model_batch_normalisation.save('mnist_model_batch_normalisation.keras')


# Load the model
loaded_model = tf.keras.models.load_model('mnist_model_batch_normalisation.keras')
print("Model loaded from 'mnist_model_batch_normalisation.keras")

# Evaluate the loaded model
loss, accuracy = loaded_model.evaluate(x_test, y_test)

Model loaded from 'mnist_model_batch_normalisation.keras
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8841 - loss: 0.0230


In [12]:
# keeping the best model on the test/validation data.

# Define a ModelCheckpoint callback
checkpoint = ModelCheckpoint('best_model.keras', 
                             monitor='val_accuracy',  # You can monitor 'val_loss' or any other metric
                             save_best_only=True, 
                             mode='max',  # Use 'max' if monitoring accuracy; 'min' if monitoring loss
                             verbose=1)

history4=model_batch_normalisation.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=[checkpoint],
          validation_data=(x_test, y_test))

best_model = tf.keras.models.load_model('best_model.keras')

Epoch 1/20
[1m467/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 15ms/step - accuracy: 0.8595 - loss: 0.0279
Epoch 1: val_accuracy improved from None to 0.87270, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step - accuracy: 0.8600 - loss: 0.0278 - val_accuracy: 0.8727 - val_loss: 0.0252
Epoch 2/20
[1m467/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 14ms/step - accuracy: 0.8529 - loss: 0.0291
Epoch 2: val_accuracy improved from 0.87270 to 0.88180, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.8555 - loss: 0.0286 - val_accuracy: 0.8818 - val_loss: 0.0235
Epoch 3/20
[1m465/469[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.8565 - loss: 0.0285
Epoch 3: val_accuracy improved from 0.88180 to 0.89750, saving model to best_model.keras
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14m

In [13]:
import optuna

# Optuna objective function
def objective(trial):
    # Suggest hyperparameters
    num_layers = trial.suggest_int('num_layers', 1, 3)
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid'])
    dropout_rate = trial.suggest_float('dropout_rate', 0.0, 0.5)
    units = trial.suggest_int('units', 32, 128)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    
    # Build the model
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    
    for _ in range(num_layers):
        model.add(Dense(units=units, activation=activation))
        model.add(Dropout(rate=dropout_rate))
    
    model.add(Dense(10, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    # Train the model
    history = model.fit(x_train, y_train, 
                        validation_split=0.2,
                        epochs=5,  # To keep it quick for demonstration; you can increase it
                        batch_size=128, 
                        verbose=0)
    
    # Evaluate the model
    score = model.evaluate(x_test, y_test, verbose=0)
    accuracy = score[1]
    return accuracy

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # Set the number of trials

# Print the best trial
print('Best trial:')
print(f' Value: {study.best_trial.value}')
print(' Params: ')
for key, value in study.best_trial.params.items():
    print(f'    {key}: {value}')

ModuleNotFoundError: No module named 'optuna'