## CNN image classification (MNIST-as-JPG)

This notebook loads grayscale digit images from a directory, builds a small CNN in Keras, trains it with a `tf.data` pipeline, then runs a single-image prediction.


In [None]:
import os

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as pltimg

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D, Dropout


## Quick image preview

Optional sanity check: render a couple of sample images from the dataset and print the image shape.


In [None]:
sample_img_1 = pltimg.imread("/kaggle/input/mnistasjpg/trainingSet/trainingSet/6/img_10129.jpg")
sample_img_2 = pltimg.imread("/kaggle/input/mnistasjpg/trainingSet/trainingSet/4/img_10034.jpg")

fig, axes = plt.subplots(1, 2, figsize=(6, 3))
axes[0].imshow(sample_img_1, cmap="gray")
axes[0].axis("off")
axes[1].imshow(sample_img_2, cmap="gray")
axes[1].axis("off")
plt.show()
print("sample_img_2.shape:", sample_img_2.shape)

## Data pipeline (tf.data)

Build a standard, fast input pipeline with `tf.data`: load images from disk, normalize to `[0, 1]`, cache decoded batches, shuffle the training split, apply augmentation **only** on the training split, then `prefetch()` to overlap input work with GPU compute.

We also create a `train_eval_ds` (same training split, but **no augmentation** and no shuffle) for an apples-to-apples comparison against validation when sanity-checking metrics.


In [None]:
SEED = 42
IMG_SIZE = (28, 28)
BATCH_SIZE = 32
SHUFFLE_BUFFER = 1000
AUTOTUNE = tf.data.AUTOTUNE
DATA_DIR = "/kaggle/input/mnistasjpg/trainingSet/trainingSet"
MODEL_PATH = "model2.keras"

# Make randomness reproducible (dataset shuffles + augmentation + dropout init).
tf.keras.utils.set_random_seed(SEED)

augmentation_layer = tf.keras.Sequential(
    [
        tf.keras.layers.RandomRotation(10 / 360.0),
        tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        tf.keras.layers.RandomZoom(0.1),
    ]
)

rescale_layer = tf.keras.layers.Rescaling(1.0 / 255.0)

def normalize(images, labels):
    return rescale_layer(images), labels

def augment(images, labels):
    # Apply augmentation only for training.
    images = augmentation_layer(images, training=True)
    return images, labels

In [None]:
train_ds_raw = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=True,
    seed=SEED,
    validation_split=0.2,
    subset="training",
)

# Same training split, but ordered (no shuffle) for a fair, stable evaluation metric.
train_eval_ds_raw = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=False,
    seed=SEED,
    validation_split=0.2,
    subset="training",
)

val_ds_raw = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    shuffle=False,
    seed=SEED,
    validation_split=0.2,
    subset="validation",
)

# Common tf.data pattern: cache -> (optional) shuffle -> prefetch
train_ds = (
    train_ds_raw
    .map(normalize, num_parallel_calls=AUTOTUNE)
    .cache()
    .shuffle(SHUFFLE_BUFFER, seed=SEED, reshuffle_each_iteration=True)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .prefetch(AUTOTUNE)
)

train_eval_ds = (
    train_eval_ds_raw
    .map(normalize, num_parallel_calls=AUTOTUNE)
    .cache()
    .prefetch(AUTOTUNE)
)

val_ds = (
    val_ds_raw
    .map(normalize, num_parallel_calls=AUTOTUNE)
    .cache()
    .prefetch(AUTOTUNE)
)

## Model

Define a small CNN for 10-class classification and compile with Adam + categorical cross-entropy.

Training uses common callbacks:
- `ModelCheckpoint`: saves the **best** model (by `val_accuracy`) to `MODEL_PATH`
- `EarlyStopping`: may stop before the maximum epoch count if validation stops improving
- `ReduceLROnPlateau`: lowers the learning rate when validation loss plateaus


In [None]:
model = Sequential(
    [
        Input(shape=(28, 28, 1)),
        Conv2D(32, (3, 3), activation="relu", padding="same"),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation="relu", padding="same"),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation="relu"),
        Dropout(0.3),
        Dense(10, activation="softmax"),
    ]
)

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        MODEL_PATH, monitor="val_accuracy", mode="max", save_best_only=True
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", mode="max", patience=3, restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=2, min_lr=1e-5
    ),
]

history = model.fit(train_ds, epochs=20, validation_data=val_ds, callbacks=callbacks)

In [None]:
# Plot training vs validation accuracy
acc = history.history.get("accuracy")
val_acc = history.history.get("val_accuracy")

if acc is None or val_acc is None:
    raise KeyError(
        "Missing accuracy history. Available keys: " + ", ".join(sorted(history.history.keys()))
    )

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(7, 4))
plt.plot(epochs, acc, label="train accuracy")
plt.plot(epochs, val_acc, label="val accuracy")
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()


## Inference example

Load the saved model and run prediction on a single image using the same normalization as training.


In [None]:
pil_img = tf.keras.utils.load_img(
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/3/img_57.jpg",
    color_mode="grayscale",
)
img_array = tf.keras.utils.img_to_array(pil_img)
plt.imshow(img_array.squeeze(), cmap="gray")
plt.axis("off")
plt.show()

input_batch = rescale_layer(np.expand_dims(img_array, axis=0))

model = load_model(MODEL_PATH, compile=False)
pred_probs = model.predict(input_batch)
pred_class = int(np.argmax(pred_probs, axis=1)[0])
print("pred_class:", pred_class)