<a href="https://colab.research.google.com/github/chaitragopalappa/MIE590-690D/blob/main/2a_NN_Backprop_SymbolicDiff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Neural networks are universal function approximators
That is, they can fit any function with the right number of nodes and activation functions even with just one layer.

In [None]:
# @title Backpropagation algorithm
#AI prompt: write a code for backpropagation algorithm for a x^2. Create a neural network with one hidden layer and multiple nodes. Create some samples to test the model. Add a graph to vizualize output from each hidden node, plot lines on same graph with x on x axis
#My Modification: modifed the resulting code to use symbolic diff; used AI promts to add vizualizations for further analyses
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
#from tensorboardX import SummaryWriter
from torch.utils.tensorboard import SummaryWriter

In [None]:
# @title Generate Synthetic Data and Fit a model to that data { vertical-output: true }
X = np.linspace(-4, 4, 50).reshape(-1, 1)
Y = X ** 2 + np.random.randn(50, 1) * 2
#Y = sin(X)
plt.scatter(X, Y, label='Data')
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.title(' Fit a NN to predict y=f(x)')
plt.show()

In [None]:
# @title Data processing and handling occur at this point (cleaning, vizualizing, normalizing, train and test sets, minibatching, etc.) { vertical-output: true }
#Skipping these steps to keep the focus on understanding NN

In [None]:
#@title Initialize NN parameters { vertical-output: true }
# Define the neural network architecture
input_size =np.shape(X)[1]
output_size = np.shape(Y)[1]

# Hyperparameters (try different values)
#Note: Number of hidden layer fixed at 1
hidden_layer_size = 10  # Number of nodes in hidden layer
learning_rate = 0.001 #try 0.001
epochs = 10000

# Initialize weights and biases
W1 = np.random.randn(input_size, hidden_layer_size)
b1 = np.zeros((1, hidden_layer_size))
W2 = np.random.randn(hidden_layer_size, output_size)
b2 = np.zeros((1, output_size))

# Define the activation function (sigmoid)
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

# Define the derivative of the sigmoid function
def sigmoid_derivative(x):
  return x * (1 - x)


In [None]:
#@title Apply BackProp to training data { vertical-output: true }
loss_history = []
predicted_output_history = []
writer = SummaryWriter()

# BackProp Training loop
for epoch in range(epochs):
  # Forward pass
  hidden_layer_input = np.dot(X, W1) + b1
  hidden_layer_output = sigmoid(hidden_layer_input)
  output_layer_input = np.dot(hidden_layer_output, W2) + b2
  output_layer_output = output_layer_input # No activation for regression

  # Compute loss (mean squared error)
  loss = np.mean((Y - output_layer_output) ** 2)
  writer.add_scalar("Loss/train", loss, epoch)

  # Calculate gradients (using Symobolic differentiation)
  d_output = -(Y - output_layer_output)  #  e_p
  dL_db2= np.sum(d_output, axis=0, keepdims=True)
  dL_dW2= np.dot(hidden_layer_output.T, d_output)
  dL_db1= np.sum(np.dot(d_output, W2.T) * sigmoid_derivative(hidden_layer_output), axis=0, keepdims=True)
  dL_dW1= np.dot(X.T, np.dot(d_output, W2.T) * sigmoid_derivative(hidden_layer_output))

  # Update weights and biases (Stochastic gradient descent update)
  W2 -= learning_rate * dL_dW2  #
  b2 -= learning_rate * dL_db2
  W1 -= learning_rate * dL_dW1
  b1 -= learning_rate * dL_db1

  # Store predicted output for animation
  if epoch % 100 == 0:
    predicted_output_history.append(output_layer_output)
writer.close()

In [None]:
#@title Plot predicted and actual data - also vizualize the outputs from each hidden node to understand the inner workings
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 2))
# Plot actual vs predicted in the first subplot
ax1.scatter(X, Y, label="Actual", s=5)
ax1.plot(X, output_layer_output, label="Predicted")
ax1.set_xlabel("X")
ax1.set_ylabel("Y")
ax1.set_title("Neural Network Output")
ax1.legend()

# Plot the output from each hidden node in the second subplot
for i in range(hidden_layer_size):
  ax2.plot(X, hidden_layer_output[:,i], label=f"Hidden Node {i+1}")

ax2.set_xlabel("X")
ax2.set_ylabel("Y")
ax2.set_title("Hidden Node Outputs")
#ax2.legend()

In [None]:
'''
#fig, ax = plt.subplots(figsize=(4, 4))
line_predicted, = ax3.plot(X, predicted_output_history[0], label="Predicted")
line_actual, = ax3.plot(X, Y, label="Actual")
ax3.set_xlabel("X")
ax3.set_ylabel("Y")
ax3.set_title("Animation")
ax3.legend()

def animate(i):
  line_predicted.set_data(X, predicted_output_history[i])
  return line_predicted,

ani = FuncAnimation(fig, animate, frames=len(predicted_output_history), interval=1, blit=True)
plt.tight_layout()  # Adjust spacing between subplots
plt.show()
# Display the animation
HTML(ani.to_jshtml())
'''

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs