In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import numpy as np




In [2]:
# 1. Data Preparation
# Loads the MNIST dataset 28x28 grayscale
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
# Normalize the pixel values (e.g., scale to 0-1 range)
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

In [4]:
# Flatten the 28x28 images into a 784-dimensional vector 
x_train = x_train.reshape(-1, 28 * 28)
x_test  = x_test.reshape(-1, 28 * 28)

In [5]:
# One-hot encode labels (10 classes)
num_classes = 10
y_train_oh = to_categorical(y_train, num_classes)
y_test_oh  = to_categorical(y_test, num_classes)

In [6]:
# 2. Model Architecture ---
model = models.Sequential([
    layers.Input(shape=(784,)),               # Input layer (784)
    layers.Dense(128, activation="relu"),     # Hidden layer 1
    layers.Dense(64, activation="relu"),      # Hidden layer 2 
    layers.Dense(num_classes, activation="softmax")  # Output layer (10, softmax)
])




In [7]:
# 3. Model Training
# Compile/Define Loss and Optimizer
model.compile(
    loss="categorical_crossentropy",  # multi-class CE
    optimizer="adam",                 # Adam optimizer
    metrics=["accuracy"]
)




In [8]:
# Train the model for a reasonable number of epochs (e.g., 5-10).
history = model.fit(
    x_train, y_train_oh,
    validation_split=0.1,  # keep 10% of train for validation
    epochs=8,              # 5â€“10 is fine; 8 is a good middle ground
    batch_size=128,
    verbose=2
)

Epoch 1/8


422/422 - 2s - loss: 0.3488 - accuracy: 0.9005 - val_loss: 0.1489 - val_accuracy: 0.9585 - 2s/epoch - 6ms/step
Epoch 2/8
422/422 - 1s - loss: 0.1456 - accuracy: 0.9577 - val_loss: 0.1060 - val_accuracy: 0.9712 - 1s/epoch - 2ms/step
Epoch 3/8
422/422 - 1s - loss: 0.1010 - accuracy: 0.9700 - val_loss: 0.0872 - val_accuracy: 0.9738 - 950ms/epoch - 2ms/step
Epoch 4/8
422/422 - 1s - loss: 0.0754 - accuracy: 0.9775 - val_loss: 0.0865 - val_accuracy: 0.9730 - 988ms/epoch - 2ms/step
Epoch 5/8
422/422 - 1s - loss: 0.0578 - accuracy: 0.9828 - val_loss: 0.0741 - val_accuracy: 0.9790 - 925ms/epoch - 2ms/step
Epoch 6/8
422/422 - 1s - loss: 0.0473 - accuracy: 0.9853 - val_loss: 0.0747 - val_accuracy: 0.9787 - 997ms/epoch - 2ms/step
Epoch 7/8
422/422 - 1s - loss: 0.0366 - accuracy: 0.9888 - val_loss: 0.0746 - val_accuracy: 0.9782 - 922ms/epoch - 2ms/step
Epoch 8/8
422/422 - 1s - loss: 0.0298 - accuracy: 0.9908 - val_loss: 0.0780 - val_accuracy: 0.9768 - 940ms/epoch - 2ms/step


In [9]:
# 4. Evaluation
test_loss, test_acc = model.evaluate(x_test, y_test_oh, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

Test accuracy: 0.9758


In [10]:
# Final evaluation on the test set
test_loss, test_acc = model.evaluate(x_test, y_test_oh, verbose=0)
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")


Test loss: 0.0818
Test accuracy: 0.9758


In [15]:
import numpy as np

# Pick a random test index
idx = np.random.randint(0, x_test.shape[0])

# Get the image and label
x_input = x_test[idx].reshape(1, -1)
true_label = int(y_test[idx])

# Predict
probs = model.predict(x_input, verbose=0)[0]
pred_label = int(np.argmax(probs))

print(f"Random test index: {idx}")
print(f"True label: {true_label}")
print(f"Predicted label: {pred_label}")

# Show top-3 class probabilities
top3 = sorted(list(enumerate(probs)), key=lambda t: t[1], reverse=True)[:3]
print("\nTop-3 probabilities:")
for cls, p in top3:
    print(f"  digit {cls}: {p:.3f}")

Random test index: 7619
True label: 2
Predicted label: 2

Top-3 probabilities:
  digit 2: 0.960
  digit 3: 0.037
  digit 7: 0.003
