## CNN digit classifier (MNIST-as-JPG)

Train a small Keras CNN to classify digits **0–9** from grayscale JPGs on disk.

Expected layout:

```
DATA_DIR/0/*.jpg
DATA_DIR/1/*.jpg
...
DATA_DIR/9/*.jpg
```

In [None]:
import os

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as pltimg

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D, Dropout


## Quick image preview

Optional sanity check: display 3 sample images and print their shapes (paths are hard-coded; adjust if needed).


In [None]:
sample_paths = [
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/8/img_14050.jpg",
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/4/img_10034.jpg",
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/3/img_57.jpg",
]

sample_imgs = [pltimg.imread(p) for p in sample_paths]

fig, axes = plt.subplots(1, len(sample_imgs), figsize=(3 * len(sample_imgs), 3))
if len(sample_imgs) == 1:
    axes = [axes]

for i, (ax, img) in enumerate(zip(axes, sample_imgs), start=1):
    ax.imshow(img, cmap="gray")
    ax.axis("off")
    print(f"sample_img_{i}.shape:", img.shape)

plt.tight_layout()
plt.show()

## Data pipeline (tf.data)

`tf.data` pipeline: decode grayscale JPG → resize to `IMG_SIZE` → rescale to **[0,1]** → one-hot labels.

Datasets:
- `train_ds`: shuffled + augmented
- `val_ds`: no augmentation

Uses `cache()` + `prefetch()` for speed.


In [None]:
SEED = 42
IMG_SIZE = (28, 28)
BATCH_SIZE = 32
SHUFFLE_BUFFER = 1000
AUTOTUNE = tf.data.AUTOTUNE
DATA_DIR = "/kaggle/input/mnistasjpg/trainingSet/trainingSet"
MODEL_PATH = "model2.keras"

tf.keras.utils.set_random_seed(SEED)

augmentation_layer = tf.keras.Sequential(
    [
        tf.keras.layers.RandomRotation(10 / 360.0),
        tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
        tf.keras.layers.RandomZoom(0.1),
    ]
)

rescale_layer = tf.keras.layers.Rescaling(1.0 / 255.0)

def augment(images, labels):
    images = augmentation_layer(images, training=True)
    return images, labels

## Train/validation split

Create a **stratified 80/20** split per digit folder (`0..9`) under `DATA_DIR`, then shuffle within each split.


In [None]:
from pathlib import Path

# Stratified 80/20 split per digit folder
train_paths, train_labels, val_paths, val_labels = [], [], [], []

for d in range(10):
    cls_paths = sorted(str(p) for p in (Path(DATA_DIR) / str(d)).glob("*.jpg"))
    rng = np.random.default_rng(SEED + d)
    rng.shuffle(cls_paths)

    n_train = int(round(0.8 * len(cls_paths)))
    train_paths += cls_paths[:n_train]
    train_labels += [d] * n_train
    val_paths += cls_paths[n_train:]
    val_labels += [d] * (len(cls_paths) - n_train)

# Shuffle within each split
train_perm = np.random.default_rng(SEED).permutation(len(train_paths))
val_perm = np.random.default_rng(SEED).permutation(len(val_paths))
train_paths = [train_paths[i] for i in train_perm]
train_labels = [train_labels[i] for i in train_perm]
val_paths = [val_paths[i] for i in val_perm]
val_labels = [val_labels[i] for i in val_perm]

print("Dataset split:")
print(f"- total: {len(train_paths) + len(val_paths)}")
print(f"- train: {len(train_paths)}")
print(f"- val:   {len(val_paths)}")

In [None]:
NUM_CLASSES = 10

def load_example(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=1)
    img = tf.image.resize(img, IMG_SIZE)
    img = rescale_layer(tf.cast(img, tf.float32))
    return img, tf.one_hot(label, depth=NUM_CLASSES)

train_raw = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
val_raw = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))

train_base_ds = (
    train_raw
    .map(load_example, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
)

train_ds = (
    train_base_ds
    .shuffle(SHUFFLE_BUFFER, seed=SEED, reshuffle_each_iteration=True)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .prefetch(AUTOTUNE)
)

val_ds = (
    val_raw
    .map(load_example, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTOTUNE)
)


## Split sanity check

Verify the train/val split is **disjoint** (no overlapping file paths) and print per-class counts (should be roughly **80/20**).


In [None]:
from collections import Counter

# Sanity check: disjoint + per-class counts
train_set, val_set = set(train_paths), set(val_paths)
overlap = train_set & val_set

print(f"train: {len(train_paths)}")
print(f"val:   {len(val_paths)}")
print(f"overlap: {len(overlap)}")

train_counts = Counter(int(Path(p).parent.name) for p in train_paths)
val_counts = Counter(int(Path(p).parent.name) for p in val_paths)
print("per-class (train/val):")
for d in range(10):
    print(f"- {d}: {train_counts[d]}/{val_counts[d]}")

if overlap:
    print("examples:")
    for p in sorted(overlap)[:10]:
        print(f"- {p}")



## Model

A small CNN that learns to map each 28×28 grayscale image to one of the 10 digit classes (0–9).

Trained with Adam and categorical cross-entropy; learning rate is reduced automatically if validation loss stops improving.


In [None]:
model = Sequential(
    [
        Input(shape=(28, 28, 1)),
        Conv2D(32, (3, 3), activation="relu", padding="same"),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation="relu", padding="same"),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation="relu"),
        Dropout(0.3),
        Dense(10, activation="softmax"),
    ]
)

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(MODEL_PATH, monitor="val_accuracy", mode="max", save_best_only=True),
    tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", mode="max", patience=3, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, min_lr=1e-5),
]

history = model.fit(train_ds, epochs=20, validation_data=val_ds, callbacks=callbacks)

## Training curve

Train vs validation **accuracy** over epochs; a growing gap usually means **overfitting**.


In [None]:
# Plot training vs validation accuracy
acc = history.history.get("accuracy")
val_acc = history.history.get("val_accuracy")

if acc is None or val_acc is None:
    raise KeyError(
        "Missing accuracy history. Available keys: " + ", ".join(sorted(history.history.keys()))
    )

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(7, 4))
plt.plot(epochs, acc, label="train accuracy")
plt.plot(epochs, val_acc, label="val accuracy")
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()


## Inference example

Load best model from `MODEL_PATH` and predict on 3 images (apply the same `rescale_layer` as training).


In [None]:
test_paths = [
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/2/img_7169.jpg",
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/6/img_16973.jpg",
    "/kaggle/input/mnistasjpg/trainingSet/trainingSet/7/img_23407.jpg",
]

model = load_model(MODEL_PATH, compile=False)

fig, axes = plt.subplots(1, len(test_paths), figsize=(3 * len(test_paths), 3))
if len(test_paths) == 1:
    axes = [axes]

for ax, path in zip(axes, test_paths):
    pil_img = tf.keras.utils.load_img(path, color_mode="grayscale")
    img_array = tf.keras.utils.img_to_array(pil_img)

    input_batch = rescale_layer(np.expand_dims(img_array, axis=0))
    pred_probs = model.predict(input_batch, verbose=0)
    pred_class = int(np.argmax(pred_probs, axis=1)[0])

    ax.imshow(img_array.squeeze(), cmap="gray")
    ax.set_title(f"pred: {pred_class}")
    ax.axis("off")

plt.tight_layout()
plt.show()