In [1]:
import tensorflow as tf
from tensorflow import keras
from keras import layers, models, callbacks, optimizers
import numpy as np

# Load Fashion-MNIST dataset
(train_images, train_labels), (test_images, test_labels) = keras.datasets.fashion_mnist.load_data()

# Preprocessing
train_images = train_images.reshape(-1, 28, 28, 1).astype('float32') / 255.0
test_images = test_images.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# One-hot encode labels
train_labels_cat = keras.utils.to_categorical(train_labels, 10)
test_labels_cat = keras.utils.to_categorical(test_labels, 10)

# print versions
print("Tensorflow", tf.__version__)
print("Keras", keras.__version__)
print("Np", np.__version__)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Tensorflow 2.19.0
Keras 3.10.0
Np 2.0.2


In [3]:
# Turn on CUDA
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:


# Data Augmentation
data_augmentation = keras.Sequential([
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1),
])

# Build Improved Model with Batch Normalization and Residual Connections
def create_improved_model():
    inputs = layers.Input(shape=(28, 28, 1))

    # Data augmentation (only applied during training)
    x = data_augmentation(inputs)

    # Block 1
    x = layers.Conv2D(64, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, (3, 3), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)

    # Dense layers
    x = layers.Dense(256, use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)

    # Output layer
    outputs = layers.Dense(10, activation='softmax')(x)

    model = models.Model(inputs=inputs, outputs=outputs)
    return model

# Create model
model = create_improved_model()

# Compile with modern optimizer settings
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.AdamW(learning_rate=0.001, weight_decay=1e-5),
    metrics=['accuracy']
)

# Print model summary
model.summary()

# Callbacks for better training
callback_list = [
    # Reduce learning rate when validation loss plateaus
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    # Stop training when validation loss stops improving
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    # Save best model
    callbacks.ModelCheckpoint(
        'best_fashion_mnist_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]




In [7]:
# Train model with validation split
history = model.fit(
    train_images, train_labels_cat,
    batch_size=128,
    epochs=5,
    validation_split=0.15,
    callbacks=callback_list,
    verbose=1
)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels_cat, verbose=0)
print(f'\nTest Accuracy: {test_accuracy:.4f}')
print(f'Test Loss: {test_loss:.4f}')

Epoch 1/5
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6910 - loss: 0.8266
Epoch 1: val_accuracy improved from -inf to 0.16311, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 51ms/step - accuracy: 0.6911 - loss: 0.8263 - val_accuracy: 0.1631 - val_loss: 3.9932 - learning_rate: 0.0010
Epoch 2/5
[1m398/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 46ms/step - accuracy: 0.7855 - loss: 0.5941
Epoch 2: val_accuracy improved from 0.16311 to 0.81744, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 49ms/step - accuracy: 0.7856 - loss: 0.5940 - val_accuracy: 0.8174 - val_loss: 0.5071 - learning_rate: 0.0010
Epoch 3/5
[1m398/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 46ms/step - accuracy: 0.8124 - loss: 0.5238
Epoch 3: val_accuracy did not improve from 0.81744
[1m399/399[0m [32m━━━━━━━━━━━

In [8]:
# Train model with validation split
history = model.fit(
    train_images, train_labels_cat,
    batch_size=128,
    epochs=15,
    validation_split=0.15,
    callbacks=callback_list,
    verbose=1
)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels_cat, verbose=0)
print(f'\nTest Accuracy: {test_accuracy:.4f}')
print(f'Test Loss: {test_loss:.4f}')

Epoch 1/15
[1m398/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 47ms/step - accuracy: 0.8409 - loss: 0.4437
Epoch 1: val_accuracy did not improve from 0.84733
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 49ms/step - accuracy: 0.8409 - loss: 0.4436 - val_accuracy: 0.8404 - val_loss: 0.4396 - learning_rate: 0.0010
Epoch 2/15
[1m398/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 47ms/step - accuracy: 0.8498 - loss: 0.4138
Epoch 2: val_accuracy improved from 0.84733 to 0.86333, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 49ms/step - accuracy: 0.8498 - loss: 0.4138 - val_accuracy: 0.8633 - val_loss: 0.3909 - learning_rate: 0.0010
Epoch 3/15
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.8565 - loss: 0.3927
Epoch 3: val_accuracy improved from 0.86333 to 0.86900, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━

In [9]:
model.save('final_fashion_mnist_model.keras')
print("Model saved as final_fashion_mnist_model.keras")

Model saved as final_fashion_mnist_model.keras


In [10]:
# we can run standard Linux commands here with !
!ls -l

total 28740
-rw-r--r-- 1 root root 14710487 Jan 16 12:42 best_fashion_mnist_model.keras
-rw-r--r-- 1 root root 14710487 Jan 16 12:44 final_fashion_mnist_model.keras
drwxr-xr-x 1 root root     4096 Dec  9 14:42 sample_data


## MOre efficient model

In [11]:
def create_efficient_model():
    """Lighter model with ~93% accuracy, faster training"""
    model = models.Sequential([
        layers.Input(shape=(28, 28, 1)),

        # Data augmentation
        layers.RandomRotation(0.1),
        layers.RandomZoom(0.1),
        layers.RandomTranslation(0.1, 0.1),

        # Convolutional blocks
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Conv2D(32, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Conv2D(64, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),

        layers.Conv2D(128, (3, 3), padding='same'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),

        # Dense layers
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])

    return model

In [13]:
efficient_model=create_efficient_model()
efficient_model

<Sequential name=sequential_2, built=True>

In [14]:
model = efficient_model # alias

In [15]:
# Compile with modern optimizer settings
model.compile(
    loss='categorical_crossentropy',
    optimizer=optimizers.AdamW(learning_rate=0.001, weight_decay=1e-5),
    metrics=['accuracy']
)

# Print model summary
model.summary()

In [16]:


# Callbacks for better training
callback_list = [
    # Reduce learning rate when validation loss plateaus
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    # Stop training when validation loss stops improving
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    # Save best model
    callbacks.ModelCheckpoint(
        'best_fashion_mnist_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

# Train model with validation split
history = model.fit(
    train_images, train_labels_cat,
    batch_size=128,
    epochs=10,
    validation_split=0.15,
    callbacks=callback_list,
    verbose=1
)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels_cat, verbose=0)
print(f'\nTest Accuracy: {test_accuracy:.4f}')
print(f'Test Loss: {test_loss:.4f}')

Epoch 1/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.4318 - loss: 1.5883
Epoch 1: val_accuracy improved from -inf to 0.12056, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 28ms/step - accuracy: 0.4321 - loss: 1.5874 - val_accuracy: 0.1206 - val_loss: 5.0916 - learning_rate: 0.0010
Epoch 2/10
[1m397/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 25ms/step - accuracy: 0.6850 - loss: 0.8417
Epoch 2: val_accuracy improved from 0.12056 to 0.62478, saving model to best_fashion_mnist_model.keras
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 27ms/step - accuracy: 0.6852 - loss: 0.8414 - val_accuracy: 0.6248 - val_loss: 1.1245 - learning_rate: 0.0010
Epoch 3/10
[1m398/399[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 25ms/step - accuracy: 0.7283 - loss: 0.7344
Epoch 3: val_accuracy improved from 0.62478 to 0.68667, saving model to best_fas