In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
import numpy as np

In [2]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Print dataset shapes
print("Training data shape:", x_train.shape)  # (60000, 28, 28)
print("Training labels shape:", y_train.shape)  # (60000,)
print("Test data shape:", x_test.shape)  # (10000, 28, 28)
print("Test labels shape:", y_test.shape)  # (10000,)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2us/step
Training data shape: (60000, 28, 28)
Training labels shape: (60000,)
Test data shape: (10000, 28, 28)
Test labels shape: (10000,)


In [3]:
# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape data to flatten 28x28 images to 784-dimensional vectors
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

In [4]:
# Convert labels to one-hot encoded vectors
y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=10)

In [5]:
class CustomDense(Layer):
    def __init__(self, units, activation=None):
        super(CustomDense, self).__init__()
        self.units = units
        self.activation = activation
    
    def build(self, input_shape):
        # Initialize weights and biases
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        # Perform matrix multiplication and add bias
        z = tf.matmul(inputs, self.w) + self.b
        # Apply activation if specified
        if self.activation is not None:
            return self.activation(z)
        return z

In [6]:
class MNISTModel(Model):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.dense1 = CustomDense(128, activation=tf.nn.relu)
        self.dense2 = CustomDense(64, activation=tf.nn.relu)
        self.output_layer = CustomDense(10, activation=tf.nn.softmax)
    
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return self.output_layer(x)

model = MNISTModel()

In [7]:
def custom_sparse_categorical_crossentropy(y_true, y_pred):
    # Convert one-hot encoded y_true to class indices
    y_true = tf.argmax(y_true, axis=1)
    # Calculate negative log probabilities
    loss = -tf.math.log(tf.gather_nd(y_pred, 
                                    tf.stack([tf.range(tf.shape(y_pred)[0]), 
                                             tf.cast(y_true, tf.int32)], axis=1)))
    return tf.reduce_mean(loss)

In [8]:
def custom_accuracy(y_true, y_pred):
    # Convert one-hot encoded y_true to class indices
    y_true = tf.argmax(y_true, axis=1)
    # Get predicted class indices
    y_pred = tf.argmax(y_pred, axis=1)
    # Compare predictions with true labels
    correct = tf.equal(y_true, y_pred)
    return tf.reduce_mean(tf.cast(correct, tf.float32))

In [9]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=custom_sparse_categorical_crossentropy,
    metrics=[custom_accuracy]
)

In [10]:
history = model.fit(
    x_train, y_train_onehot,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_onehot)
)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - custom_accuracy: 0.8535 - loss: 0.5327 - val_custom_accuracy: 0.9579 - val_loss: 0.1412
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - custom_accuracy: 0.9632 - loss: 0.1233 - val_custom_accuracy: 0.9696 - val_loss: 0.0978
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - custom_accuracy: 0.9749 - loss: 0.0832 - val_custom_accuracy: 0.9737 - val_loss: 0.0802
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - custom_accuracy: 0.9811 - loss: 0.0620 - val_custom_accuracy: 0.9700 - val_loss: 0.0987
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - custom_accuracy: 0.9862 - loss: 0.0452 - val_custom_accuracy: 0.9774 - val_loss: 0.0736
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - custom_accuracy: 0.

In [11]:
test_loss, test_acc = model.evaluate(x_test, y_test_onehot)
print(f"Test accuracy: {test_acc:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - custom_accuracy: 0.9679 - loss: 0.1315
Test accuracy: 0.9739
