## Importing libraries

In [1]:
import numpy as np
import keras.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



## Data preparation

In [2]:
# Data loading and spliting
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
(X_train, y_train), (X_test, y_test)

((array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         ...,
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0

In [3]:
# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [4]:
# Convert labels to encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Now you have preprocessed the data and split it into training, validation, and test sets, which can be used for training and evaluating your deep learning models for handwritten digit recognition.

In [5]:
# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (48000, 28, 28)
Validation set shape: (12000, 28, 28)
Test set shape: (10000, 28, 28)


In [6]:
(X_train, y_train), (X_test, y_test)

((array([[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         ...,
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
         

## A baseline model using traditional machine learning algorithms


In [7]:
# Flatten the images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Initialize the logistic regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train_flat, np.argmax(y_train, axis=1))

# Make predictions on the validation set
val_predictions = model.predict(X_val_flat)

# Calculate accuracy on the validation set
val_accuracy = accuracy_score(np.argmax(y_val, axis=1), val_predictions)
print("Validation Accuracy:", val_accuracy)

# Make predictions on the test set
test_predictions = model.predict(X_test_flat)

# Calculate accuracy on the test set
test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)
print("Test Accuracy:", test_accuracy)

Validation Accuracy: 0.9219166666666667
Test Accuracy: 0.9239


### Evaluate the baseline model's performance.


In [8]:
# Calculate accuracy on the test set
test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)
print("Test Accuracy:", test_accuracy)

# Calculate the confusion matrix
confusion_mat = confusion_matrix(np.argmax(y_test, axis=1), test_predictions)
print("Confusion Matrix:")
print(confusion_mat)

# Generate the classification report
class_report = classification_report(np.argmax(y_test, axis=1), test_predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.9239
Confusion Matrix:
[[ 962    0    1    3    1    4    4    4    1    0]
 [   0 1113    2    2    0    1    4    1   12    0]
 [   7   10  922   18    9    4   13   11   34    4]
 [   3    1   17  921    1   23    3   10   20   11]
 [   1    3    5    3  919    0   10    6    7   28]
 [   8    2    3   39   11  773   14    7   30    5]
 [   9    3    8    2    7   17  909    2    1    0]
 [   0    7   25    9    6    1    0  945    2   33]
 [   9   10    7   25    8   26    9   10  859   11]
 [   8    8    1    9   27    7    1   23    9  916]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.96      0.98      0.97      1135
           2       0.93      0.89      0.91      1032
           3       0.89      0.91      0.90      1010
           4       0.93      0.94      0.93       982
           5       0.90      0.87      0.88       892
           6       0.94    

##  Ensemble of Machine Learning Algorithms

In [9]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.svm import SVC
# from sklearn.ensemble import VotingClassifier
# from sklearn.tree import DecisionTreeClassifier

# # Shrinking traning data set
# X_train_small, _, y_train_small, _ = train_test_split(X_train_flat, y_train, test_size=0.1, random_state=42)

# # Initialize individual models
# decision_tree = DecisionTreeClassifier()
# random_forest = RandomForestClassifier(n_estimators=100)
# svm_model = SVC(kernel='linear', probability=True)

# # Initialize the voting classifier with the individual models
# voting_classifier = VotingClassifier(estimators=[('decision_tree', decision_tree),
#                                                  ('random_forest', random_forest),
#                                                  ('svm', svm_model)],voting='soft', n_jobs=-1) 

# # Fit the classifier using the smaller training set
# voting_classifier.fit(X_train_small, np.argmax(y_train_small, axis=1))

# # Predict te test data
# test_predictions_ensemble = voting_classifier.predict(X_test_flat)

In [10]:
# class_report_ensemble = classification_report(np.argmax(y_test, axis=1), test_predictions_ensemble)
# print("Classification Report (Ensemble): \n", class_report_ensemble)

<img src='class_report_ensemble.jpg' alt='classifier report img'>

## Neural Network Model Design
• Design and implement multiple neural network architectures for handwritten digit recognition.

In [11]:
from tensorflow import keras
from tensorflow.keras import layers

### Multi-Layer Perceptron (MLP)

In [12]:
#Creating the model MLP 
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))


  super().__init__(**kwargs)


### Deep Convolutional Neural Network (CNN)

In [13]:
##Creating the model deep CNN 
model_deep_cnn = keras.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(10, activation="softmax")
])

  super().__init__(


In [14]:
# Compile the models
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model_deep_cnn.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [15]:
# Train the models
model.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8458 - loss: 0.5501 - val_accuracy: 0.9571 - val_loss: 0.1458
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9642 - loss: 0.1216 - val_accuracy: 0.9668 - val_loss: 0.1077
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9777 - loss: 0.0758 - val_accuracy: 0.9732 - val_loss: 0.0838
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9834 - loss: 0.0540 - val_accuracy: 0.9711 - val_loss: 0.0922
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9886 - loss: 0.0379 - val_accuracy: 0.9775 - val_loss: 0.0747
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9929 - loss: 0.0257 - val_accuracy: 0.9758 - val_loss: 0.0834
Epoch 7/10
[1m375/375[0m 

<keras.src.callbacks.history.History at 0x23f81ebbd60>

In [16]:
model_deep_cnn.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 52ms/step - accuracy: 0.8341 - loss: 0.5266 - val_accuracy: 0.9772 - val_loss: 0.0739
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 51ms/step - accuracy: 0.9696 - loss: 0.0992 - val_accuracy: 0.9842 - val_loss: 0.0525
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 60ms/step - accuracy: 0.9785 - loss: 0.0693 - val_accuracy: 0.9881 - val_loss: 0.0404
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 71ms/step - accuracy: 0.9817 - loss: 0.0561 - val_accuracy: 0.9889 - val_loss: 0.0354
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 102ms/step - accuracy: 0.9867 - loss: 0.0440 - val_accuracy: 0.9893 - val_loss: 0.0362
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 78ms/step - accuracy: 0.9868 - loss: 0.0399 - val_accuracy: 0.9901 - val_loss: 0.0350
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x23f820a5ee0>

### regularization techniques.

In [20]:
from tensorflow.keras.regularizers import l2

# Creating the model MLP
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dropout(0.5))  # Dropout regularization with 50% dropout 
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))

  super().__init__(**kwargs)


## Hyperparameter Tuning and Overfitting Mitigation

In [21]:
# Creating the model deep CNN
model_deep_cnn = keras.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),  # Dropout regularization with 25% dropout 
    layers.Flatten(),
    layers.Dense(128, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)),  # L2 regularization with regularization of 0.001
    layers.Dropout(0.25),  # Dropout regularization with 50% dropout 
    layers.Dense(10, activation="softmax")
])

  super().__init__(


In [22]:
# Compile the models
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_deep_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the models
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8312 - loss: 0.5458 - val_accuracy: 0.9553 - val_loss: 0.1473
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9428 - loss: 0.1830 - val_accuracy: 0.9641 - val_loss: 0.1180
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9527 - loss: 0.1506 - val_accuracy: 0.9707 - val_loss: 0.1074
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9632 - loss: 0.1238 - val_accuracy: 0.9740 - val_loss: 0.0881
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9651 - loss: 0.1150 - val_accuracy: 0.9755 - val_loss: 0.0843
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9691 - loss: 0.0975 - val_accuracy: 0.9753 - val_loss: 0.0911
Epoch 7/10
[1m1

<keras.src.callbacks.history.History at 0x23f822caf10>

In [23]:
model_deep_cnn.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 28ms/step - accuracy: 0.8987 - loss: 0.4958 - val_accuracy: 0.9800 - val_loss: 0.2145
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 34ms/step - accuracy: 0.9689 - loss: 0.2428 - val_accuracy: 0.9819 - val_loss: 0.1983
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 34ms/step - accuracy: 0.9746 - loss: 0.2151 - val_accuracy: 0.9844 - val_loss: 0.1809
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 34ms/step - accuracy: 0.9764 - loss: 0.2036 - val_accuracy: 0.9793 - val_loss: 0.1833
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 33ms/step - accuracy: 0.9790 - loss: 0.1846 - val_accuracy: 0.9862 - val_loss: 0.1628
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 33ms/step - accuracy: 0.9800 - loss: 0.1815 - val_accuracy: 0.9847 - val_loss: 0.1645
Epoc

<keras.src.callbacks.history.History at 0x23f8248da90>

In [24]:
# Evaluate the models 
mlp_test_loss, mlp_test_accuracy = model.evaluate(X_test, y_test)
deep_cnn_test_loss, deep_cnn_test_accuracy = model_deep_cnn.evaluate(X_test, y_test)

print("MLP Test Loss:", mlp_test_loss)
print("MLP Test Accuracy:", mlp_test_accuracy)
print("Deep CNN Test Loss:", deep_cnn_test_loss)
print("Deep CNN Test Accuracy:", deep_cnn_test_accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 751us/step - accuracy: 0.9776 - loss: 0.0827
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9859 - loss: 0.1506
MLP Test Loss: 0.07006821036338806
MLP Test Accuracy: 0.9800999760627747
Deep CNN Test Loss: 0.14251644909381866
Deep CNN Test Accuracy: 0.989300012588501


In [13]:
from sklearn.model_selection import RandomizedSearchCV
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from scikeras.wrappers import KerasClassifier


# Define a function to create the model
def create_model(optimizer='adam'):
    model = Sequential([
        Input(shape=[28, 28]),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


model_wrapper = KerasClassifier(model=create_model, epochs=10, batch_size=32,verbose=0)

param_grid = {
    'optimizer': ['adam', 'sgd'],  # Tune the optimizer
    'epochs': [5, 10, 15]  # Tune the number of epochs
}

# Perform random search
random_search = RandomizedSearchCV(estimator=model_wrapper, param_distributions=param_grid, n_iter=5, cv=3, verbose=0)
random_search.fit(X_train, y_train)

print("Best parameters found: ", random_search.best_params_)

Best parameters found:  {'optimizer': 'adam', 'epochs': 15}
