<a href="https://colab.research.google.com/github/chahatpatel2003/CSCI-167/blob/main/notebook_7_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Set seed so we always get the same random numbers
np.random.seed(0)

# Number of hidden layers
K = 5
# Number of neurons per layer
D = 6
# Input layer
D_i = 1
# Output layer
D_o = 1

# Make empty lists
all_weights = [None] * (K+1)
all_biases = [None] * (K+1)

# Create input and output layers
all_weights[0]  = np.random.normal(size=(D, D_i))
all_weights[-1] = np.random.normal(size=(D_o, D))
all_biases[0]   = np.random.normal(size=(D,1))
all_biases[-1]  = np.random.normal(size =(D_o,1))

# Create intermediate layers
for layer in range(1, K):
  all_weights[layer] = np.random.normal(size=(D, D))
  all_biases[layer]  = np.random.normal(size=(D,1))

def ReLU(preactivation):
  return preactivation.clip(0.0)

def compute_network_output(net_input, all_weights, all_biases):
  K = len(all_weights) - 1
  all_f = [None] * (K+1)
  all_h = [None] * (K+1)
  all_h[0] = net_input

  # Forward through hidden layers 0..K-1
  for layer in range(K):
      all_f[layer]   = np.matmul(all_weights[layer], all_h[layer]) + all_biases[layer]
      all_h[layer+1] = ReLU(all_f[layer])

  # Output layer preactivation f_K
  all_f[K] = np.matmul(all_weights[K], all_h[K]) + all_biases[K]

  net_output = all_f[K]
  return net_output, all_f, all_h

# Define input
net_input = np.ones((D_i,1)) * 1.2
# Compute network output
net_output, all_f, all_h = compute_network_output(net_input, all_weights, all_biases)
print("True output = %3.3f, Your answer = %3.3f"%(1.907, net_output[0,0]))

def least_squares_loss(net_output, y):
  return np.sum((net_output-y) * (net_output-y))

def d_loss_d_output(net_output, y):
  return 2*(net_output - y)

y = np.ones((D_o,1)) * 20.0
loss = least_squares_loss(net_output, y)
print("y = %3.3f Loss = %3.3f"%(float(y), loss))

def indicator_function(x):
  x_in = np.array(x)
  x_in[x_in>0]  = 1
  x_in[x_in<=0] = 0
  return x_in

def backward_pass(all_weights, all_biases, all_f, all_h, y):
  all_dl_dweights = [None] * (K+1)
  all_dl_dbiases  = [None] * (K+1)
  all_dl_df       = [None] * (K+1)
  all_dl_dh       = [None] * (K+1)

  # dL/df_K (output layer)
  all_dl_df[K] = np.array(d_loss_d_output(all_f[K], y))

  # Backprop through layers K ... 0
  for layer in range(K, -1, -1):
    # dL/db_layer = dL/df_layer
    all_dl_dbiases[layer] = np.array(all_dl_df[layer])

    # dL/dW_layer = (dL/df_layer) @ h_layer^T
    all_dl_dweights[layer] = np.matmul(all_dl_df[layer], all_h[layer].T)

    # dL/dh_layer = W_layer^T @ (dL/df_layer)
    all_dl_dh[layer] = np.matmul(all_weights[layer].T, all_dl_df[layer])

    if layer > 0:
      # dL/df_{layer-1} = (dL/dh_layer) ⊙ ReLU'(f_{layer-1})
      all_dl_df[layer-1] = all_dl_dh[layer] * indicator_function(all_f[layer-1])

  return all_dl_dweights, all_dl_dbiases

all_dl_dweights, all_dl_dbiases = backward_pass(all_weights, all_biases, all_f, all_h, y)

np.set_printoptions(precision=3)
all_dl_dweights_fd = [None] * (K+1)
all_dl_dbiases_fd  = [None] * (K+1)
delta_fd = 0.000001

# Finite-difference check for biases
for layer in range(K+1):
  dl_dbias  = np.zeros_like(all_dl_dbiases[layer])
  for row in range(all_biases[layer].shape[0]):
    all_biases_copy = [np.array(x) for x in all_biases]
    all_biases_copy[layer][row] += delta_fd
    network_output_1, *_ = compute_network_output(net_input, all_weights, all_biases_copy)
    network_output_2, *_ = compute_network_output(net_input, all_weights, all_biases)
    dl_dbias[row] = (least_squares_loss(network_output_1, y) - least_squares_loss(network_output_2, y))/delta_fd
  all_dl_dbiases_fd[layer] = np.array(dl_dbias)
  print("-----------------------------------------------")
  print("Bias %d, derivatives from backprop:"%(layer))
  print(all_dl_dbiases[layer])
  print("Bias %d, derivatives from finite differences"%(layer))
  print(all_dl_dbiases_fd[layer])
  if np.allclose(all_dl_dbiases_fd[layer], all_dl_dbiases[layer], rtol=1e-05, atol=1e-08, equal_nan=False):
    print("Success!  Derivatives match.")
  else:
    print("Failure!  Derivatives different.")

# Finite-difference check for weights
for layer in range(K+1):
  dl_dweight  = np.zeros_like(all_dl_dweights[layer])
  for row in range(all_weights[layer].shape[0]):
    for col in range(all_weights[layer].shape[1]):
      all_weights_copy = [np.array(x) for x in all_weights]
      all_weights_copy[layer][row][col] += delta_fd
      network_output_1, *_ = compute_network_output(net_input, all_weights_copy, all_biases)
      network_output_2, *_ = compute_network_output(net_input, all_weights, all_biases)
      dl_dweight[row][col] = (least_squares_loss(network_output_1, y) - least_squares_loss(network_output_2, y))/delta_fd
  all_dl_dweights_fd[layer] = np.array(dl_dweight)
  print("-----------------------------------------------")
  print("Weight %d, derivatives from backprop:"%(layer))
  print(all_dl_dweights[layer])
  print("Weight %d, derivatives from finite differences"%(layer))
  print(all_dl_dweights_fd[layer])
  if np.allclose(all_dl_dweights_fd[layer], all_dl_dweights[layer], rtol=1e-05, atol=1e-08, equal_nan=False):
    print("Success!  Derivatives match.")
  else:
    print("Failure!  Derivatives different.")


True output = 1.907, Your answer = 1.907
y = 20.000 Loss = 327.371
-----------------------------------------------
Bias 0, derivatives from backprop:
[[ -4.486]
 [  4.947]
 [  6.812]
 [ -3.883]
 [-24.935]
 [  0.   ]]
Bias 0, derivatives from finite differences
[[ -4.486]
 [  4.947]
 [  6.812]
 [ -3.883]
 [-24.935]
 [  0.   ]]
Success!  Derivatives match.
-----------------------------------------------
Bias 1, derivatives from backprop:
[[ -0.   ]
 [-11.297]
 [  0.   ]
 [  0.   ]
 [-10.722]
 [  0.   ]]
Bias 1, derivatives from finite differences
[[  0.   ]
 [-11.297]
 [  0.   ]
 [  0.   ]
 [-10.722]
 [  0.   ]]
Success!  Derivatives match.
-----------------------------------------------
Bias 2, derivatives from backprop:
[[-0.   ]
 [-0.   ]
 [ 0.938]
 [ 0.   ]
 [-9.993]
 [ 0.508]]
Bias 2, derivatives from finite differences
[[ 0.   ]
 [ 0.   ]
 [ 0.938]
 [ 0.   ]
 [-9.993]
 [ 0.508]]
Success!  Derivatives match.
-----------------------------------------------
Bias 3, derivatives from ba

  print("y = %3.3f Loss = %3.3f"%(float(y), loss))
