In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

2024-08-05 15:53:33.481505: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-05 15:53:33.481617: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-05 15:53:33.615645: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Load the Fashion MNIST dataset
(train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data()

# Preprocess the data
train_data = train_data / 255.0
test_data = test_data / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
# Calculate the class weights
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

# Convert the class weights to a dictionary
class_weights_dict = {i: class_weights[i] for i in np.unique(train_labels)}


In [4]:
class_weights_dict

{0: 1.0,
 1: 1.0,
 2: 1.0,
 3: 1.0,
 4: 1.0,
 5: 1.0,
 6: 1.0,
 7: 1.0,
 8: 1.0,
 9: 1.0}

In [5]:
from tensorflow.keras.callbacks import ModelCheckpoint
# Create a callback that saves the model with the best validation accuracy
checkpoint_callback = ModelCheckpoint(
    filepath='best_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
import tensorflow_datasets as tfds

# MobileMNIST model
def MobileMNIST(input_shape, num_classes, num_channels, dim, expansion_ratio, num_layers=[2, 4, 3]):
    inputs = layers.Input(input_shape)

    # Stem
    x = layers.Conv2D(filters=num_channels[0], kernel_size=3, strides=2, padding="same", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)
    x = inverted_residual_block(x, num_channels[1], strides=1, expansion_ratio=expansion_ratio)

    # Stage 1
    x = inverted_residual_block(x, num_channels[2], strides=2, expansion_ratio=expansion_ratio)
    x = inverted_residual_block(x, num_channels[3], strides=1, expansion_ratio=expansion_ratio)
    skip_1 = inverted_residual_block(x, num_channels[4], strides=1, expansion_ratio=expansion_ratio)

    # Stage 2
    x = inverted_residual_block(skip_1, num_channels[5], strides=2, expansion_ratio=expansion_ratio)
    skip_2 = mobile_block(x, num_channels[6], dim[0], num_layers=num_layers[0])

    # Stage 3
    x = inverted_residual_block(skip_2, num_channels[7], strides=2, expansion_ratio=expansion_ratio)
    skip_3 = mobile_block(x, num_channels[8], dim[1], num_layers=num_layers[1])

    # Classification head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.01))(x)

    return models.Model(inputs, x)

# Define inverted residual block (assuming you already have this function)
def inverted_residual_block(inputs, num_filters, strides=1, expansion_ratio=1):
    x = layers.Conv2D(filters=expansion_ratio * inputs.shape[-1], kernel_size=1, padding="same", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    x = layers.DepthwiseConv2D(kernel_size=3, strides=strides, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    x = layers.Conv2D(filters=num_filters, kernel_size=1, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    if strides == 1 and inputs.shape == x.shape:
        return layers.Add()([inputs, x])
    return x

# Define mobile vit block (assuming you already have this function)
def mobile_block(inputs, num_filters, dim, patch_size=2, num_layers=1):
    B, H, W, C = inputs.shape

    x = layers.SeparableConv2D(filters=num_filters, kernel_size=3, padding="same", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("swish")(x)

    return x

# Define transformer encoder (assuming you already have this function)
def transformer_encoder(x, num_heads, dim, mlp_dim):
    skip_1 = x
    x = layers.LayerNormalization()(x)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=dim)(x, x)
    x = layers.Add()([x, skip_1])

    skip_2 = x
    x = layers.LayerNormalization()(x)
    x = mlp(x, mlp_dim, dim)
    x = layers.Add()([x, skip_2])

    return x

def mlp(x, mlp_dim, dim, dropout_rate=0.1):
    x = layers.Dense(mlp_dim, activation="swish")(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(dim)(x)
    x = layers.Dropout(dropout_rate)(x)
    return x


In [7]:
input_shape = (28, 28,1)
num_classes = 10
model = MobileMNIST(input_shape, num_classes, num_channels=[16, 16, 24, 24, 24, 48, 64, 64, 80], dim=[64, 80, 96], expansion_ratio=2, num_layers=[2, 4, 3])

In [8]:
model.summary()

In [9]:


def softmax_loss(y_true, y_pred):
    """
    Custom softmax loss function.

    :param y_true: Sparse target labels.
    :param y_pred: Logits.
    :return: Computed loss value.
    """
    y_true = tf.cast(tf.reshape(y_true, [-1]), tf.int32)
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    return tf.reduce_mean(ce)

loss = softmax_loss

model.compile(loss = loss,
                    optimizer = tf.keras.optimizers.Adam(),
                    metrics = ['accuracy'])





In [None]:
history = model.fit(train_data , train_labels , epochs = 150 , validation_data = (test_data ,test_labels),class_weight=class_weights_dict,callbacks=[checkpoint_callback])

Epoch 1/150
[1m  28/1875[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 6ms/step - accuracy: 0.2400 - loss: 2.3992  

I0000 00:00:1722873248.282743      82 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7363 - loss: 1.8217
Epoch 1: val_accuracy improved from -inf to 0.85870, saving model to best_model.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 8ms/step - accuracy: 0.7363 - loss: 1.8217 - val_accuracy: 0.8587 - val_loss: 1.6388
Epoch 2/150
[1m1866/1875[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8651 - loss: 1.6297
Epoch 2: val_accuracy did not improve from 0.85870
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8651 - loss: 1.6296 - val_accuracy: 0.8532 - val_loss: 1.6289
Epoch 3/150
[1m1868/1875[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8777 - loss: 1.6046
Epoch 3: val_accuracy improved from 0.85870 to 0.86170, saving model to best_model.keras
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8777 - loss: 1.60

In [None]:
# Convert the history to a DataFrame
history_df = pd.DataFrame(history.history)

# Plot the loss
plt.figure(figsize=(10, 5))
plt.plot(history_df['loss'], label='Training Loss')
plt.plot(history_df['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Plot the accuracy
plt.figure(figsize=(10, 5))
plt.plot(history_df['accuracy'], label='Training Accuracy')
plt.plot(history_df['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()