In [57]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models

# Loading the dataset

In [58]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Preprocessing the dataset

In [59]:
# normalise the data  to 0-1
x_train = x_train / 255.0
x_test = x_test / 255.0

# flatten the data
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

# convert the output to categorical
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Initializing the baseline model

We are initializing the baseline model and then tune it with SVD with singular value 20

In [60]:
input_dim = 784
num_classes = 10

baseline = models.Sequential([
    layers.Input(shape=(784,)),

    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(1024, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(10, activation='softmax')
])
baseline.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [61]:
# Extract weights for fine-tuning
W1, b1 = baseline.layers[0].get_weights()
W2, b2 = baseline.layers[15].get_weights()

# Initialize trainable tensors
W1_var = tf.Variable(W1, dtype=tf.float32)
b1_var = tf.Variable(b1, dtype=tf.float32)
W2_var = tf.Variable(W2, dtype=tf.float32)
b2_var = tf.Variable(b2, dtype=tf.float32)

Accuray of untrained base model

In [62]:
test_loss, test_accuracy = baseline.evaluate(x_test, y_test, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Loss: 2.3053
Test Accuracy: 0.1003


In [63]:
epochs = 10
batch_size = 128


# Finetuning the basemodel using svd in every epoch

1.  feedforward pass always uses
$$ {W}^{(l)} = {U}^{(l)}_{:,1:20} {S}^{(l)}_{1:20,1:20} {{V}^{(l)}}^{‚ä§}_{:,1:20}$$

In [64]:
learning_rate = 0.001  
D = 20 

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.shuffle(buffer_size=1024).batch(batch_size)

optimizer = tf.keras.optimizers.Adam(learning_rate)

for epoch in range(epochs):
    print(f"\nEpoch_____:{epoch+1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_ds):
        with tf.GradientTape() as tape:
            s1, u1, v1 = tf.linalg.svd(W1_var, full_matrices=False)
            W1_svd = tf.matmul(u1[:, :D], tf.matmul(tf.linalg.diag(s1[:D]), tf.transpose(v1[:, :D])))
            s2, u2, v2 = tf.linalg.svd(W2_var, full_matrices=False)
            W2_svd = tf.matmul(u2[:, :D], tf.matmul(tf.linalg.diag(s2[:D]), tf.transpose(v2[:, :D])))
            x1 = tf.nn.relu(tf.matmul(tf.cast(x_batch, tf.float32), W1_svd) + b1_var)
            logits = tf.matmul(x1, W2_svd) + b2_var
            loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_batch, tf.nn.softmax(logits))) 

        grads = tape.gradient(loss, [W1_var, b1_var, W2_var, b2_var])
        optimizer.apply_gradients(zip(grads, [W1_var, b1_var, W2_var, b2_var]))
        if step % 100 == 0:
            print(f"Step {step}: Loss = {loss.numpy():.4f}")

s1, u1, v1 = tf.linalg.svd(W1_var, full_matrices=False)
W1_svd = tf.matmul(u1[:, :D], tf.matmul(tf.linalg.diag(s1[:D]), tf.transpose(v1[:, :D])))
s2, u2, v2 = tf.linalg.svd(W2_var, full_matrices=False)
W2_svd = tf.matmul(u2[:, :D], tf.matmul(tf.linalg.diag(s2[:D]), tf.transpose(v2[:, :D])))


x1_test = tf.nn.relu(tf.matmul(tf.cast(x_test, tf.float32), W1_svd) + b1_var)
logits_test = tf.matmul(x1_test, W2_svd) + b2_var
preds = tf.argmax(tf.nn.softmax(logits_test), axis=1)
acc = tf.reduce_mean(tf.cast(tf.equal(preds, tf.argmax(y_test, axis=1)), tf.float32)) 

print(f"\nFinal Test Accuracy (with D={D}): {acc.numpy() * 100:.2f}%")


Epoch_____:1/10
Step 0: Loss = 2.3119
Step 100: Loss = 0.3634
Step 200: Loss = 0.2867
Step 300: Loss = 0.2310
Step 400: Loss = 0.1545

Epoch_____:2/10
Step 0: Loss = 0.1727
Step 100: Loss = 0.1914
Step 200: Loss = 0.0406
Step 300: Loss = 0.0942
Step 400: Loss = 0.0894

Epoch_____:3/10
Step 0: Loss = 0.0949
Step 100: Loss = 0.1395
Step 200: Loss = 0.0840
Step 300: Loss = 0.0356
Step 400: Loss = 0.0748

Epoch_____:4/10
Step 0: Loss = 0.0734
Step 100: Loss = 0.0322
Step 200: Loss = 0.0933
Step 300: Loss = 0.0929
Step 400: Loss = 0.0222


2025-11-12 19:59:41.876079: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence



Epoch_____:5/10
Step 0: Loss = 0.1119
Step 100: Loss = 0.0646
Step 200: Loss = 0.0945
Step 300: Loss = 0.0555
Step 400: Loss = 0.0334

Epoch_____:6/10
Step 0: Loss = 0.0393
Step 100: Loss = 0.0639
Step 200: Loss = 0.0624
Step 300: Loss = 0.0926
Step 400: Loss = 0.0317

Epoch_____:7/10
Step 0: Loss = 0.0172
Step 100: Loss = 0.0244
Step 200: Loss = 0.0696
Step 300: Loss = 0.0176
Step 400: Loss = 0.0644

Epoch_____:8/10
Step 0: Loss = 0.0338
Step 100: Loss = 0.0385
Step 200: Loss = 0.0427
Step 300: Loss = 0.0689
Step 400: Loss = 0.0158

Epoch_____:9/10
Step 0: Loss = 0.0325
Step 100: Loss = 0.0140
Step 200: Loss = 0.0028
Step 300: Loss = 0.0053
Step 400: Loss = 0.0171

Epoch_____:10/10
Step 0: Loss = 0.0082
Step 100: Loss = 0.0171
Step 200: Loss = 0.0570
Step 300: Loss = 0.0135
Step 400: Loss = 0.0314

Final Test Accuracy (with D=20): 97.68%
