In [8]:
# importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

#prevent oom
x_train = x_train[:10000]
y_train = y_train[:10000]

# Normalize images to the range [0, 1]
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)

# Flatten the images to be vectors
x_train = x_train.reshape(-1, 28 * 28).astype(np.float32)
x_test = x_test.reshape(-1, 28 * 28).astype(np.float32)

# Convert the digit labels to even/odd labels:
# Even -> 0, Odd -> 1
y_train_even_odd = np.array([label % 2 for label in y_train], dtype=np.int32)
y_test_even_odd = np.array([label % 2 for label in y_test], dtype=np.int32)


In [9]:
import tensorflow as tf
device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"



In [10]:
class CustomDenseLayerTF:
    def __init__(self, num_inputs, num_neurons):
        with tf.device(device):  # to run the operation on gpu
            self.weights = tf.Variable(tf.random.normal([num_inputs, num_neurons], stddev=0.01, dtype=tf.float32))
            self.Gweights = tf.Variable(tf.random.normal([num_inputs, num_neurons], stddev=0.01, dtype=tf.float32))
            self.bias = tf.Variable(tf.zeros([1, num_neurons], dtype=tf.float32))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = tf.matmul(inputs, self.weights) + tf.matmul(inputs, self.Gweights) + self.bias

    def update_weights(self, gradients, learning_rate=0.01):
        self.weights.assign_sub(learning_rate * gradients[0])  # Update weights
        self.Gweights.assign_sub(learning_rate * gradients[0])  # Update weights
        self.bias.assign_sub(learning_rate * gradients[1])      # Update bias

In [11]:
#defining the activation functions

class ActivationSigmoidTF:
    def forward(self, inputs):
        self.output = tf.nn.sigmoid(inputs)

class LossBinaryCrossentropyTF:
    def calculate(self, output, y_true):
        output = tf.clip_by_value(output, 1e-7, 1 - 1e-7)  # Avoid log(0)
        return tf.reduce_mean(- (y_true * tf.math.log(output) + (1 - y_true) * tf.math.log(1 - output)))


In [12]:
# training function 
@tf.function  # Compiles function for efficiency (Graph Mode)
def train_step(dense_layer, activation, loss_function, X_batch, y_batch, optimizer):
    with tf.GradientTape() as tape:
        # Forward pass of the custom dense network
        dense_layer.forward(X_batch)
        # incorporating the activation function 
        activation.forward(dense_layer.output)
        #calculating the loss values
        loss = loss_function.calculate(activation.output, y_batch)

    # Compute gradients
    gradients = tape.gradient(loss, [dense_layer.weights, dense_layer.bias])

    # Ensure valid gradients
    if gradients is None or any(g is None for g in gradients):
        return None  # If gradient calculation fails, return None

    # Apply gradients
    optimizer.apply_gradients(zip(gradients, [dense_layer.weights, dense_layer.bias]))

    # Compute accuracy
    predictions = tf.cast(activation.output > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, y_batch), dtype=tf.float32))

    return loss, accuracy

In [13]:
#defining the main training loop:
def train_custom_nn(X_train, y_train, epochs=5, batch_size=32, learning_rate=0.01):
    num_samples = X_train.shape[0]

    # Convert input data & labels to TensorFlow tensors
    X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
    y_train_tf = tf.convert_to_tensor(y_train.reshape(-1, 1), dtype=tf.float32)

    # Create TensorFlow dataset for efficient training
    dataset = tf.data.Dataset.from_tensor_slices((X_train_tf, y_train_tf))
    dataset = dataset.shuffle(num_samples).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    # Initialize custom model
    with tf.device(device):
        dense_layer = CustomDenseLayerTF(X_train.shape[1], 1)
        activation = ActivationSigmoidTF()
        loss_function = LossBinaryCrossentropyTF()
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)  # Adam optimizer

    # Training loop
    for epoch in range(epochs):
        total_loss = tf.Variable(0.0, dtype=tf.float32)
        total_accuracy = tf.Variable(0.0, dtype=tf.float32)
        num_batches = tf.Variable(0, dtype=tf.int32)

        # Process data in batches using TensorFlow dataset
        for X_batch, y_batch in dataset:
            result = train_step(dense_layer, activation, loss_function, X_batch, y_batch, optimizer)
            
            if result is None:
                continue  # Skip this batch if train_step() returned None
            
            loss, accuracy = result
            total_loss.assign_add(loss)
            total_accuracy.assign_add(accuracy)
            num_batches.assign_add(1)

        # Compute average loss and accuracy per epoch
        avg_loss = total_loss / tf.cast(num_batches, tf.float32)
        avg_accuracy = total_accuracy / tf.cast(num_batches, tf.float32)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss.numpy():.4f}, Accuracy: {avg_accuracy.numpy():.4f}")
    
    return dense_layer, activation

In [14]:
# Train the custom neural network with a batch size of 64
dense_layer, activation = train_custom_nn(x_train, y_train_even_odd, epochs=5, batch_size=64)


2025-02-27 23:42:12.663250: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 1/5 - Loss: 0.3102, Accuracy: 0.8658


2025-02-27 23:42:13.129584: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2/5 - Loss: 0.2617, Accuracy: 0.8948
Epoch 3/5 - Loss: 0.2512, Accuracy: 0.8993


2025-02-27 23:42:14.034830: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 4/5 - Loss: 0.2446, Accuracy: 0.9038
Epoch 5/5 - Loss: 0.2456, Accuracy: 0.9027


### Comparing with prebuild keras model


In [14]:
from tensorflow import keras
# ================== TENSORFLOW MODEL (Using GPU) ==================

# Define the equivalent TensorFlow model
with tf.device(device):  # Run on GPU
    model = keras.Sequential([
        keras.layers.Dense(1, activation='sigmoid', input_shape=(28 * 28,))
    ])

    # Compile the model
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()
    # Train for 5 epochs
    model.fit(x_train, y_train_even_odd, epochs=15, verbose=1, batch_size=64)

    # Evaluate on test data
    loss_tf, accuracy_tf = model.evaluate(x_train, y_train_even_odd, verbose=0)

print("\n===== Comparison =====")
print(f"Custom Neural Network Accuracy: {acc:.4f}")
print(f"TensorFlow Neural Network Accuracy: {accuracy_tf:.4f}")

Epoch 1/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6971 - loss: 0.5676
Epoch 2/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8570 - loss: 0.3602
Epoch 3/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8635 - loss: 0.3261
Epoch 4/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8725 - loss: 0.3050
Epoch 5/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8814 - loss: 0.2855
Epoch 6/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8869 - loss: 0.2782
Epoch 7/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8861 - loss: 0.2734
Epoch 8/15
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8915 - loss: 0.2648
Epoch 9/15
[1m157/157[0m [32m━━━━━━━━

In [None]:
class CustomDenseLayerTF:
    def __init__(self, num_inputs, num_neurons):
        with tf.device(device):  # to run the operation on gpu
            self.weights = tf.Variable(tf.random.normal([num_inputs, num_neurons], stddev=0.01, dtype=tf.float32))
            self.Gweights = tf.Variable(tf.random.normal([num_inputs, num_neurons], stddev=0.01, dtype=tf.float32))
            self.bias = tf.Variable(tf.zeros([1, num_neurons], dtype=tf.float32))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = tf.matmul(inputs, self.weights) + tf.matmul(inputs, self.Gweights) + self.bias

    def update_weights(self, gradients, learning_rate=0.01):
        self.weights.assign_sub(learning_rate * gradients[0])  # Update weights
        self.Gweights.assign_sub(learning_rate * gradients[0])  # Update weights
        self.bias.assign_sub(learning_rate * gradients[1])      # Update bias

    #defining the activation functions

class ActivationSigmoidTF:
    def forward(self, inputs):
        self.output = tf.nn.sigmoid(inputs)

class LossBinaryCrossentropyTF:
    def calculate(self, output, y_true):
        output = tf.clip_by_value(output, 1e-7, 1 - 1e-7)  # Avoid log(0)
        return tf.reduce_mean(- (y_true * tf.math.log(output) + (1 - y_true) * tf.math.log(1 - output)))
# training function 
@tf.function  # Compiles function for efficiency (Graph Mode)
def train_step(dense_layer, activation, loss_function, X_batch, y_batch, optimizer):
    with tf.GradientTape() as tape:
        # Forward pass of the custom dense network
        dense_layer.forward(X_batch)
        # incorporating the activation function 
        activation.forward(dense_layer.output)
        #calculating the loss values
        loss = loss_function.calculate(activation.output, y_batch)

    # Compute gradients
    gradients = tape.gradient(loss, [dense_layer.weights, dense_layer.bias])

    # Ensure valid gradients
    if gradients is None or any(g is None for g in gradients):
        return None  # If gradient calculation fails, return None

    # Apply gradients
    optimizer.apply_gradients(zip(gradients, [dense_layer.weights, dense_layer.bias]))

    # Compute accuracy
    predictions = tf.cast(activation.output > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, y_batch), dtype=tf.float32))

    return loss, accuracy
#defining the main training loop:
def train_custom_nn(X_train, y_train, epochs=5, batch_size=32, learning_rate=0.01):
    num_samples = X_train.shape[0]

    # Convert input data & labels to TensorFlow tensors
    X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
    y_train_tf = tf.convert_to_tensor(y_train.reshape(-1, 1), dtype=tf.float32)

    # Create TensorFlow dataset for efficient training
    dataset = tf.data.Dataset.from_tensor_slices((X_train_tf, y_train_tf))
    dataset = dataset.shuffle(num_samples).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    # Initialize custom model
    with tf.device(device):
        dense_layer = CustomDenseLayerTF(X_train.shape[1], 1)
        activation = ActivationSigmoidTF()
        loss_function = LossBinaryCrossentropyTF()
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)  # Adam optimizer

    # Training loop
    for epoch in range(epochs):
        total_loss = tf.Variable(0.0, dtype=tf.float32)
        total_accuracy = tf.Variable(0.0, dtype=tf.float32)
        num_batches = tf.Variable(0, dtype=tf.int32)

        # Process data in batches using TensorFlow dataset
        for X_batch, y_batch in dataset:
            result = train_step(dense_layer, activation, loss_function, X_batch, y_batch, optimizer)
            
            if result is None:
                continue  # Skip this batch if train_step() returned None
            
            loss, accuracy = result
            total_loss.assign_add(loss)
            total_accuracy.assign_add(accuracy)
            num_batches.assign_add(1)

        # Compute average loss and accuracy per epoch
        avg_loss = total_loss / tf.cast(num_batches, tf.float32)
        avg_accuracy = total_accuracy / tf.cast(num_batches, tf.float32)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss.numpy():.4f}, Accuracy: {avg_accuracy.numpy():.4f}")
    
    return dense_layer, activation