In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import numpy as np

In [6]:
# 1. Data Preparation
# Loads the MNIST dataset 28x28 grayscale
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [7]:
# Normalize the pixel values (e.g., scale to 0-1 range)
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

In [8]:
# Flatten the 28x28 images into a 784-dimensional vector 
x_train = x_train.reshape(-1, 28 * 28)
x_test  = x_test.reshape(-1, 28 * 28)

In [9]:
# One-hot encode labels (10 classes)
num_classes = 10
y_train_oh = to_categorical(y_train, num_classes)
y_test_oh  = to_categorical(y_test, num_classes)

In [10]:
# 2. Model Architecture ---
model = models.Sequential([
    layers.Input(shape=(784,)),               # Input layer (784)
    layers.Dense(128, activation="relu"),     # Hidden layer 1
    layers.Dense(64, activation="relu"),      # Hidden layer 2 
    layers.Dense(num_classes, activation="softmax")  # Output layer (10, softmax)
])




In [11]:
# 3. Model Training
# Compile/Define Loss and Optimizer
model.compile(
    loss="categorical_crossentropy",  # multi-class CE
    optimizer="adam",                 # Adam optimizer
    metrics=["accuracy"]
)




In [12]:
# Train the model for a reasonable number of epochs (e.g., 5-10).
history = model.fit(
    x_train, y_train_oh,
    validation_split=0.1,  # keep 10% of train for validation
    epochs=8,              # 5â€“10 is fine; 8 is a good middle ground
    batch_size=128,
    verbose=2
)

Epoch 1/8


422/422 - 2s - loss: 0.3567 - accuracy: 0.8990 - val_loss: 0.1360 - val_accuracy: 0.9632 - 2s/epoch - 4ms/step
Epoch 2/8
422/422 - 1s - loss: 0.1462 - accuracy: 0.9579 - val_loss: 0.1100 - val_accuracy: 0.9687 - 848ms/epoch - 2ms/step
Epoch 3/8
422/422 - 1s - loss: 0.1021 - accuracy: 0.9700 - val_loss: 0.0899 - val_accuracy: 0.9733 - 860ms/epoch - 2ms/step
Epoch 4/8
422/422 - 1s - loss: 0.0782 - accuracy: 0.9765 - val_loss: 0.0889 - val_accuracy: 0.9738 - 850ms/epoch - 2ms/step
Epoch 5/8
422/422 - 1s - loss: 0.0615 - accuracy: 0.9813 - val_loss: 0.0879 - val_accuracy: 0.9713 - 851ms/epoch - 2ms/step
Epoch 6/8
422/422 - 1s - loss: 0.0503 - accuracy: 0.9845 - val_loss: 0.0770 - val_accuracy: 0.9767 - 885ms/epoch - 2ms/step
Epoch 7/8
422/422 - 1s - loss: 0.0387 - accuracy: 0.9883 - val_loss: 0.0730 - val_accuracy: 0.9792 - 851ms/epoch - 2ms/step
Epoch 8/8
422/422 - 1s - loss: 0.0319 - accuracy: 0.9903 - val_loss: 0.0746 - val_accuracy: 0.9773 - 866ms/epoch - 2ms/step


In [13]:
# 4. Evaluation
test_loss, test_acc = model.evaluate(x_test, y_test_oh, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Test accuracy: 0.9761
