<a href="https://colab.research.google.com/github/as3091/IITJ/blob/ML_Assign/ML/Assign_2/Assign_2_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Q3: Implementation of Neural Networks from Scratch Using NumPy and Comparison with Sklearn (20 marks):
  
    1. Load and preprocess the MNIST Digits Dataset. (3 marks)
    2. Implement a neural network with one input layer, one hidden layer, and one output layer using NumPy. (5 marks)
    3. Train the neural network with various hyperparameters (e.g., learning rate, number of hidden nodes). (3 marks)
    4. Evaluate the performance of the neural network on the testing set. (2 marks)
    5. Implement the same neural network using sklearn and compare the results with the NumPy implementation. (4 marks)
    6. Plot the training and validation loss/accuracy curves (for both experiments). (3 marks)



In [8]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
import warnings,gc,sys
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay, classification_report
from sklearn.pipeline import Pipeline
from tqdm import tqdm
import tensorflow as tf
device_name = tf.test.gpu_device_name()

In [2]:
%%script echo skipping
from sklearn.datasets import fetch_openml

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
X["Y"] = y
X.to_csv("mnist_784.csv",index=False,header=True)
del X,y

# %%skip_if True  # Replace True with your condition
# print("This will be skipped if the condition is True")

In [3]:
mnist_784_df = pd.read_csv("mnist_784.csv")
# display(mnist_784_df.head().T)

In [11]:
def sigmoid(x):
    return tf.sigmoid(x)

def sigmoid_derivative(x):
    return x * (1 - x)  # Remains the same

def relu(x):
    return tf.nn.relu(x)

def relu_derivative(x):
    return tf.cast(tf.greater(x, 0), tf.float32)

In [12]:
inputs = mnist_784_df.drop(columns="Y").values
targets = mnist_784_df["Y"].values
result = np.where(targets == 5, targets, 0)

IL_n = mnist_784_df.shape[-1]-1
n_neurons_HL = int(np.sqrt(IL_n-1))

In [13]:
# Normalize the array
max_values = np.max(inputs, axis=0)
min_values = np.min(inputs, axis=0)

normalized_array = (inputs - min_values) / (max_values - min_values)
normalized_array = np.nan_to_num(normalized_array)  # Replace NaN values with 0

# print(normalized_array[0])

scaler = StandardScaler()
inputs = pd.DataFrame(scaler.fit_transform(inputs), columns=inputs)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    #inputs
    normalized_array, result,
    test_size=0.20, random_state=42,
    stratify=result,shuffle=True)

In [32]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim,inputs, targets):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.inputs = tf.convert_to_tensor(inputs, dtype=tf.float32)
        self.targets = tf.convert_to_tensor(targets.reshape(-1, 1), dtype=tf.float32)

        self.weights1 = tf.random.normal((self.input_dim, self.hidden_dim))
        self.weights2 = tf.random.normal((self.hidden_dim, self.output_dim))
        self.bias1 = tf.zeros((1, self.hidden_dim))
        self.bias2 = tf.zeros((1, self.output_dim))

    def forward_propagation(self):

        hidden_layer_input = tf.matmul(self.inputs, self.weights1) + self.bias1
        self.hidden_layer_output = relu(hidden_layer_input)

        output_layer_input = tf.matmul(self.hidden_layer_output, self.weights2) + self.bias2
        self.output_layer_output = sigmoid(output_layer_input)

    def backward_propagation(self):
        d_output = 2 * (self.output_layer_output - self.targets)
        self.d_weights2 = tf.matmul(tf.transpose(self.hidden_layer_output), d_output * sigmoid_derivative(self.output_layer_output))
        self.d_bias2 = tf.reduce_sum(d_output * sigmoid_derivative(self.output_layer_output), axis=0, keepdims=True)

        d_hidden_layer = tf.matmul(d_output * sigmoid_derivative(self.output_layer_output), tf.transpose(self.weights2)) * relu_derivative(self.hidden_layer_output)

        self.d_weights1 = tf.matmul(tf.transpose(self.inputs), d_hidden_layer)
        self.d_bias1 = tf.reduce_sum(d_hidden_layer, axis=0, keepdims=True)

    def update_weights(self,learning_rate):

        self.weights1 -= learning_rate * self.d_weights1
        self.bias1 -= learning_rate * self.d_bias1
        self.weights2 -= learning_rate * self.d_weights2
        self.bias2 -= learning_rate * self.d_bias2

    def runner(self, epochs, learning_rate):

        for epoch in tqdm(range(epochs)):

            self.forward_propagation()
            self.backward_propagation()
            self.update_weights(learning_rate=learning_rate)

                # Print loss every 1000 epochs
            if epoch % 1000 == 0:
                loss = tf.reduce_mean(tf.square(self.output_layer_output - self.targets))
                print(f"\nEpoch {epoch+1}, Loss: {loss}")
            gc.collect()

        # _, final_output =
        self.forward_propagation(self.inputs)
        print("Final Predictions:")
        print(self.output_layer_output)

In [None]:
with tf.device('/device:GPU:0'):
    nn = NeuralNetwork(input_dim=IL_n, hidden_dim=n_neurons_HL, output_dim=1,inputs=inputs,targets=targets)
    learning_rate = 0.1
    epochs = 10000
    nn.runner(epochs, learning_rate)

  0%|          | 1/10000 [00:00<55:02,  3.03it/s]


Epoch 1, Loss: 23.462247848510742


  1%|▏         | 132/10000 [00:30<35:29,  4.63it/s]

In [None]:
sys.exit()

In [None]:
nn = NeuralNetwork(input_dim=IL_n, hidden_dim=n_neurons_HL, output_dim=1,input=inputs,targets=targets)


learning_rate = 0.1
epochs = 10000

for epoch in range(epochs):
    hidden_layer_output, output_layer_output = nn.forward_propagation(inputs)
    d_weights1, d_bias1, d_weights2, d_bias2 = nn.backward_propagation(inputs, targets, hidden_layer_output, output_layer_output)
    nn.update_weights(d_weights1, d_bias1, d_weights2, d_bias2, learning_rate)

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        loss = np.mean(np.square(output_layer_output - targets))
        print(f"Epoch {epoch+1}, Loss: {loss}")

# Final predictions
_, final_output = nn.forward_propagation(inputs)
print("Final Predictions:")
print(final_output)


In [None]:
class Neuron:
    def __init__(self,value):
        self.value = value
class hidden_layer


In [None]:
class aHL:
    def __init__(self,mnist_784_df):


    def forward_prop_relu(self,input,weight):
        return np.maximum(0, input*weight)




In [None]:
X.to_csv("mnist_784.csv",index=False,header=True)

In [None]:
display(X.head().T)

In [None]:
df = pd.DataFrame(X, columns=[f'Pixel_{i}'for i in range(X.shape[-1])])

In [None]:
display(df.head())

In [None]:
y.head()

In [None]:
class NeuralNetwork:
    def __init__(self, input_dim, hidden_dim, output_dim,inputs, targets):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.inputs = inputs
        self.targets = targets

        self.weights1 = np.random.rand(input_dim, hidden_dim)
        self.weights2 = np.random.rand(hidden_dim, output_dim)
        self.bias1 = np.zeros((1, hidden_dim))
        self.bias2 = np.zeros((1, output_dim))

    def forward_propagation(self):

        hidden_layer_input = np.dot(self.inputs, self.weights1) + self.bias1
        hidden_layer_output = relu(hidden_layer_input)

        output_layer_input = np.dot(hidden_layer_output, self.weights2) + self.bias2
        output_layer_output = sigmoid(output_layer_input)

        return hidden_layer_output, output_layer_output

    def backward_propagation(self, targets, hidden_layer_output, output_layer_output):

        d_output = 2 * (output_layer_output - targets)
        d_weights2 = np.dot(hidden_layer_output.T, d_output * sigmoid_derivative(output_layer_output))
        d_bias2 = np.sum(d_output * sigmoid_derivative(output_layer_output), axis=0, keepdims=True)

        d_hidden_layer = np.dot(d_output * sigmoid_derivative(output_layer_output), self.weights2.T) * relu_derivative(hidden_layer_output)
        d_weights1 = np.dot(self.inputs.T, d_hidden_layer)
        d_bias1 = np.sum(d_hidden_layer, axis=0, keepdims=True)

        return d_weights1, d_bias1, d_weights2, d_bias2

    def update_weights(self, d_weights1, d_bias1, d_weights2, d_bias2, learning_rate):
        self.weights1 -= learning_rate * d_weights1
        self.bias1 -= learning_rate * d_bias1
        self.weights2 -= learning_rate * d_weights2
        self.bias2 -= learning_rate * d_bias2

    def runner(self, epochs, learning_rate):
        # inputs, targets, epochs, learning_rate
        for i in range(epochs):
            # hidden_layer_output, output_layer_output = self.forward_propagation(inputs)
            # d_weights1, d_bias1, d_weights2, d_bias2 = self.backward_propagation(inputs, targets, hidden_layer_output, output_layer_output)
            # self.update_weights(d_weights1, d_bias1, d_weights2, d_bias2, learning_rate)

            self.forward_propagation()
            self.backward_propagation()
            self.update_weights()

            if i % 100 == 0:
                print(f"Loss: {np.mean(np.square(output_layer_output - targets))}")