In [1]:
# fashion_mnist_cnn_dense.py
import os, tensorflow as tf
tf.random.set_seed(42)

In [2]:
# ======================
# 1) Load & preprocess
# ======================
(num_classes, img_size) = (10, 28)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
# Add channel dim and scale to [0,1]
x_train = x_train[..., None].astype("float32") / 255.0
x_test  = x_test[..., None].astype("float32") / 255.0

# tf.data pipelines
batch_size = 128
AUTOTUNE = tf.data.AUTOTUNE

def build_ds(x, y, training=False):
    ds = tf.data.Dataset.from_tensor_slices((x, y))
    if training:
        ds = ds.shuffle(10_000, reshuffle_each_iteration=True)
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

train_ds = build_ds(x_train, y_train, training=True)
test_ds  = build_ds(x_test,  y_test,  training=False)

# Optional: small validation split from train
val_split = 0.1
val_size = int(len(x_train) * val_split)
val_ds   = build_ds(x_train[:val_size], y_train[:val_size], training=False)
train_ds = build_ds(x_train[val_size:], y_train[val_size:], training=True)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:

# ======================
# 2) Data augmentation
# ======================
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
])


In [None]:


# ======================
# 3) Model: CNN + Deep Dense
# ======================
def conv_block(filters, kernel=3, pool=True, dropout=0.0):
    layers = [
        tf.keras.layers.Conv2D(filters, kernel, padding="same", use_bias=False),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Activation("relu"),
    ]
    if pool:
        layers.append(tf.keras.layers.MaxPool2D())
    if dropout > 0:
        layers.append(tf.keras.layers.Dropout(dropout))
    return layers

def build_model():
    inputs = tf.keras.Input(shape=(img_size, img_size, 1))
    x = data_augmentation(inputs)

    # Feature extractor (CNN)
    for f, d in [(32, 0.10), (64, 0.15), (128, 0.20)]:
        for layer in conv_block(f, kernel=3, pool=False, dropout=0.0):
            x = layer(x)
        x = tf.keras.layers.MaxPool2D()(x)
        x = tf.keras.layers.Dropout(d)(x)

    x = tf.keras.layers.Conv2D(128, 3, padding="same", use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation("relu")(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    # “Deep” classifier head (Dense)
    x = tf.keras.layers.Dense(256, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.30)(x)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.25)(x)

    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(x)
    model = tf.keras.Model(inputs, outputs, name="fashion_mnist_cnn_dense")
    return model

model = build_model()
model.summary()

In [5]:
# ======================
# 4) Compile
# ======================
lr = 1e-3
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)


In [6]:
model.summary()

In [7]:


# ======================
# 5) Callbacks
# ======================
checkpoint_path = "checkpoints/fashion_cnn_dense"
os.makedirs(checkpoint_path, exist_ok=True)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(checkpoint_path, "epoch{epoch:02d}_valacc{val_accuracy:.4f}.keras"),
        monitor="val_accuracy", save_best_only=True
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", patience=5, restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=2, min_lr=1e-5
    ),
]


In [8]:



# ======================
# 6) Train
# ======================
history = model.fit(
    train_ds,
    epochs=20,
    validation_data=val_ds,
    callbacks=callbacks,
    verbose=1
)

# ======================
# 7) Evaluate & sample predictions
# ======================
test_loss, test_acc = model.evaluate(test_ds, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

# Predict first 10 and print
probs = model.predict(x_test[:10])
preds = probs.argmax(axis=1)
print("Predictions (first 10):", preds.tolist())
print("Labels      (first 10):", y_test[:10].tolist())


Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 52ms/step - accuracy: 0.7467 - loss: 0.6877 - val_accuracy: 0.7523 - val_loss: 0.6850 - learning_rate: 0.0010
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 55ms/step - accuracy: 0.8334 - loss: 0.4618 - val_accuracy: 0.8587 - val_loss: 0.3925 - learning_rate: 0.0010
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 56ms/step - accuracy: 0.8526 - loss: 0.4093 - val_accuracy: 0.8280 - val_loss: 0.4432 - learning_rate: 0.0010
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 59ms/step - accuracy: 0.8629 - loss: 0.3748 - val_accuracy: 0.8705 - val_loss: 0.3443 - learning_rate: 0.0010
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 61ms/step - accuracy: 0.8685 - loss: 0.3573 - val_accuracy: 0.8907 - val_loss: 0.2988 - learning_rate: 0.0010
Epoch 6/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37