In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
import matplotlib.pyplot as plt
import numpy as np

# Load and preprocess data
(train_images, train_labels), (test_images, test_labels) = (
    tf.keras.datasets.mnist.load_data()
)

# Reshape for CNN input (add channel dimension)
train_images = train_images.reshape((60000, 28, 28, 1)).astype("float32") / 255.0
test_images = test_images.reshape((10000, 28, 28, 1)).astype("float32") / 255.0

# Convert labels to one-hot encoding (optional, but common for CNNs with softmax)
# For sparse_categorical_crossentropy, this step is not strictly necessary if labels are integers.
# train_labels = tf.keras.utils.to_categorical(train_labels, 10)
# test_labels = tf.keras.utils.to_categorical(test_labels, 10)

# 1. Model Definition: Using a Convolutional Neural Network (CNN)
model = models.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.BatchNormalization(),  # Added Batch Normalization
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation="relu"),
        layers.BatchNormalization(),  # Added Batch Normalization
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation="relu"),  # Added another Conv layer
        layers.BatchNormalization(),  # Added Batch Normalization
        layers.Flatten(),
        layers.Dense(
            128, activation="relu", kernel_regularizer=regularizers.l2(0.0001)
        ),  # L2 Regularization
        layers.Dropout(0.3),  # Added Dropout
        layers.Dense(10, activation="softmax"),
    ]
)

# 2. Compile the Model
# Experiment with different optimizers and learning rates
optimizer = tf.keras.optimizers.Adam(
    learning_rate=0.001
)  # Adam is generally good, try tuning learning rate
model.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",  # Use this if labels are integers (0-9)
    # loss='categorical_crossentropy', # Use this if labels are one-hot encoded
    metrics=["accuracy"],
)

model.summary()  # Print model summary to see layers and parameters

# 3. Train the Model
# EarlyStopping helps prevent overfitting and saves training time
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", patience=5, restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.2, patience=3, min_lr=0.00001
    ),  # Reduce learning rate on plateau
]

history = model.fit(
    train_images,
    train_labels,
    epochs=30,  # Increased epochs
    batch_size=128,
    validation_data=(test_images, test_labels),
    callbacks=callbacks,
)

# 4. Evaluate the Model
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print(f"\nTest accuracy: {test_acc:.4f}")

# 5. Plotting Training History (for analysis)
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.ylim([0.9, 1])
plt.legend(loc="lower right")
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.title("Training and Validation Loss")
plt.show()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 63ms/step - accuracy: 0.9133 - loss: 0.3052 - val_accuracy: 0.9202 - val_loss: 0.2963 - learning_rate: 0.0010
Epoch 2/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 74ms/step - accuracy: 0.9872 - loss: 0.0666 - val_accuracy: 0.9850 - val_loss: 0.0735 - learning_rate: 0.0010
Epoch 3/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 94ms/step - accuracy: 0.9907 - loss: 0.0552 - val_accuracy: 0.9919 - val_loss: 0.0482 - learning_rate: 0.0010
Epoch 4/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 72ms/step - accuracy: 0.9930 - loss: 0.0462 - val_accuracy: 0.9908 - val_loss: 0.0521 - learning_rate: 0.0010
Epoch 5/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 98ms/step - accuracy: 0.9939 - loss: 0.0393 - val_accuracy: 0.9898 - val_loss: 0.0564 - learning_rate: 0.0010
Epoch 6/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

KeyboardInterrupt: 