# EXP 3: WAP to evaluate the performance of perceptron with linear and sigmoid activation functions for a regression and binary classification problem respectively

The code implements a three-layer neural network using TensorFlow (without Keras) to classify handwritten digits from the MNIST dataset. It follows a structured approach involving:

1. Data Preprocessing:
   - Normalizing pixel values to [0, 1].
   - Flattening the images from 28x28 to 784 dimensions.
   - One-hot encoding the labels for classification.

2. Model Architecture:
   - Input Layer: 784 neurons.
   - Hidden Layer 1: 128 neurons, Activation Function: ReLU.
   - Hidden Layer 2: 64 neurons, Activation Function: ReLU.
   - Output Layer: 10 neurons (for classes 0-9).

3. Training Process:
   - Forward Propagation: Matrix multiplication followed by activation functions.
   - Loss Calculation: Categorical Cross-Entropy.
   - Backpropagation: Using TensorFlow's automatic differentiation.
   - Optimization: Adam optimizer with a learning rate scheduler.
   - Regularization: Batch normalization and dropout applied to prevent overfitting.

4. Evaluation:
   - Computes accuracy on the test dataset after training.

The code demonstrates efficient training using TensorFlow's low-level API with improvements for better performance and stability. It prints training progress for each epoch and displays final test accuracy.

In [None]:
import tensorflow as tf
import numpy as np

# Load dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Preprocess dataset
x_train, x_test = x_train.reshape(-1, 28*28).astype(np.float32) / 255.0, x_test.reshape(-1, 28*28).astype(np.float32) / 255.0
y_train, y_test = tf.one_hot(y_train, depth=10), tf.one_hot(y_test, depth=10)

# Define network parameters
input_size = 784
hidden_size1 = 128
hidden_size2 = 64
output_size = 10
learning_rate = 0.001
batch_size = 128
epochs = 20

# Initialize weights and biases
initializer = tf.keras.initializers.HeNormal()

weights = {
    'W1': tf.Variable(initializer([input_size, hidden_size1])),
    'W2': tf.Variable(initializer([hidden_size1, hidden_size2])),
    'W3': tf.Variable(initializer([hidden_size2, output_size])),
}

biases = {
    'b1': tf.Variable(tf.zeros([hidden_size1])),
    'b2': tf.Variable(tf.zeros([hidden_size2])),
    'b3': tf.Variable(tf.zeros([output_size])),
}

# Define feed-forward function with Batch Normalization and Dropout

def forward_propagation(x, training=True):
    z1 = tf.matmul(x, weights['W1']) + biases['b1']
    a1 = tf.nn.relu(tf.keras.layers.BatchNormalization()(z1, training=training))
    a1 = tf.nn.dropout(a1, rate=0.2)

    z2 = tf.matmul(a1, weights['W2']) + biases['b2']
    a2 = tf.nn.relu(tf.keras.layers.BatchNormalization()(z2, training=training))
    a2 = tf.nn.dropout(a2, rate=0.2)

    z3 = tf.matmul(a2, weights['W3']) + biases['b3']
    return z3  # No softmax here; handled by loss function

# Loss function
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Optimizer with learning rate scheduler
learning_rate_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=learning_rate,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate_schedule)

# Training step function

def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = forward_propagation(x_batch, training=True)
        loss = loss_fn(y_batch, logits)
    gradients = tape.gradient(loss, list(weights.values()) + list(biases.values()))
    optimizer.apply_gradients(zip(gradients, list(weights.values()) + list(biases.values())))
    return loss

# Evaluation function

def evaluate(x_data, y_data):
    logits = forward_propagation(x_data, training=False)
    predictions = tf.argmax(logits, axis=1)
    true_labels = tf.argmax(y_data, axis=1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, true_labels), tf.float32))
    return accuracy

# Training loop
num_batches = x_train.shape[0] // batch_size

for epoch in range(epochs):
    avg_loss = 0
    for i in range(num_batches):
        batch_x = x_train[i * batch_size:(i + 1) * batch_size]
        batch_y = y_train[i * batch_size:(i + 1) * batch_size]
        loss = train_step(batch_x, batch_y)
        avg_loss += loss / num_batches

    train_acc = evaluate(x_train, y_train)
    test_acc = evaluate(x_test, y_test)
    print(f"Epoch {epoch+1}, Loss: {avg_loss.numpy():.4f}, Train Accuracy: {train_acc.numpy() * 100:.2f}%, Test Accuracy: {test_acc.numpy() * 100:.2f}%")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2us/step
Epoch 1, Loss: 0.4589, Train Accuracy: 86.27%, Test Accuracy: 86.85%
Epoch 2, Loss: 0.2187, Train Accuracy: 89.17%, Test Accuracy: 89.02%
Epoch 3, Loss: 0.1705, Train Accuracy: 90.25%, Test Accuracy: 90.33%
Epoch 4, Loss: 0.1431, Train Accuracy: 88.49%, Test Accuracy: 88.26%
Epoch 5, Loss: 0.1260, Train Accuracy: 88.41%, Test Accuracy: 88.30%
Epoch 6, Loss: 0.1137, Train Accuracy: 90.28%, Test Accuracy: 89.31%
Epoch 7, Loss: 0.1026, Train Accuracy: 90.20%, Test Accuracy: 89.24%
Epoch 8, Loss: 0.0970, Train Accuracy: 90.18%, Test Accuracy: 89.28%
Epoch 9, Loss: 0.0909, Train Accuracy: 90.40%, Test Accuracy: 90.07%
Epoch 10, Loss: 0.0858, Train Accuracy: 89.97%, Test Accuracy: 89.54%
Epoch 11, Loss: 0.0816, Train Accuracy: 90.90%, Test Accuracy: 89.86%
Epoch 12, Loss: 0.0751, Train Accuracy: 92.26%, Test Accuracy