**Prompt**: Using pure Numpy to build a 3 layer neural network (with relu nonlinearity) to train on data.

In [1]:
import numpy as np

# Creating Nonlinear Equation, Generating Data

In [2]:
# Generating data of 3 dim input and 2 dim output
n, d = 500, 3
x = np.random.uniform(-1, 1, (n, d))
weights_true = np.array([[5,1,5],[1,2,1]]).T
bias_true = np.array([1,2])
y_true = (x**2) @ weights_true + x @ weights_true + bias_true
print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')

x: (500, 3), weights: (3, 2), bias: (2,), y: (500, 2)


# Building 3 layer NN with pure Numpy


## Loss Function (MSE)

In [3]:
class MSE:
  def __call__(self, y_pred, y_true):
    self.y_pred = y_pred
    self.y_true = y_true
    return ((y_pred - y_true) ** 2).mean()

  def backward(self):
    n = self.y_true.shape[0]
    self.gradient = 2. * (self.y_pred - self.y_true) / n
    # print('MSE backward', self.y_pred.shape, self.y_true.shape, self.gradient.shape)
    return self.gradient

## Linear Layer
* Randomly initialize weight and bias
* Compute forward pass and gradient descent
* Update learnable parameters

In [4]:
class Linear:
  def __init__(self, input_dim: int, num_hidden: int = 1):
    self.weights = np.random.randn(input_dim, num_hidden) * np.sqrt(2. / input_dim)
    self.bias = np.zeros(num_hidden)
  
  def __call__(self, x):
    self.x = x
    output = x @ self.weights + self.bias
    return output

  def backward(self, gradient):
    self.weights_gradient = self.x.T @ gradient
    self.bias_gradient = gradient.sum(axis=0)
    self.x_gradient = gradient @ self.weights.T
    return self.x_gradient

  def update(self, lr):
    self.weights = self.weights - lr * self.weights_gradient
    self.bias = self.bias - lr * self.bias_gradient

## Activation Function (ReLU)

In [5]:
class Relu:
    def __call__(self, input_):
      self.input_ = input_
      self.output = np.clip(self.input_, 0, None)
      return self.output
    
    def backward(self, output_gradient):
      self.input_gradient = (self.input_ > 0) * output_gradient
      return self.input_gradient

## Model Class
* Compute forward pass
* Compute backward pass

In [6]:
from typing import Callable

class Model:

  # Configure model
  def __init__(self, input_dim, num_hidden):
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = Relu()
    self.linear2 = Linear(num_hidden,12) # Pick 12 as number of neurons in hidden layer
    self.relu2 = Relu()
    self.linear3 = Linear(12,2)
  
  # Forward pass
  def __call__(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1)
    l2 = self.linear2(r1)
    r2 = self.relu2(l2)
    l3 = self.linear3(r2)
    return l3
  
  # Backward pass
  def backward(self, output_gradient):
    linear3_gradient = self.linear3.backward(output_gradient)
    relu2_gradient = self.relu2.backward(linear3_gradient)
    linear2_gradient = self.linear2.backward(relu2_gradient)
    relu1_gradient = self.relu1.backward(linear2_gradient)
    linear1_gradient = self.linear1.backward(relu1_gradient)
    return linear1_gradient

  # Update learnable parameters
  def update(self, lr):
    self.linear3.update(lr)
    self.linear2.update(lr)
    self.linear1.update(lr)

# Training/Testing the Model

In [7]:
# Training
def fit(x: np.ndarray, y: np.ndarray, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    y_pred = model(x)
    loss_value = loss(y_pred, y)
    if epoch % 50 == 0:
      print(f'Epoch {epoch}, loss {loss_value}')
    gradient_from_loss = loss.backward()
    model.backward(gradient_from_loss)
    model.update(lr)
    
loss = MSE()
model = Model(d, 20)
y_pred = model(x)
fit(x, y_true, model=model, loss=loss, lr=0.015, num_epochs=1000)

Epoch 0, loss 40.54205627726333
Epoch 50, loss 0.8746463687308716
Epoch 100, loss 0.2749937609103138
Epoch 150, loss 0.17423371593228432
Epoch 200, loss 0.13707868460408365
Epoch 250, loss 0.11784324845639772
Epoch 300, loss 0.10720960662365599
Epoch 350, loss 0.0999432995668605
Epoch 400, loss 0.09514801039930981
Epoch 450, loss 0.09162801285151469
Epoch 500, loss 0.0885433611300563
Epoch 550, loss 0.08583240622913459
Epoch 600, loss 0.08318137682447335
Epoch 650, loss 0.08068373713120855
Epoch 700, loss 0.07825872428966704
Epoch 750, loss 0.07568386141506984
Epoch 800, loss 0.07338533002495438
Epoch 850, loss 0.07155678962505031
Epoch 900, loss 0.0696570044032993
Epoch 950, loss 0.0673109231176251


# Plotting Training Data and Prediction

In [8]:
def plot_intereactive_3d(x, y, y_pred=None):
  import plotly.graph_objects as go # Use plotly to create interactive graph

  fig = go.Figure()
  fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y.reshape([-1]),
                    opacity=0.5, mode='markers', name='Underlying Function'
                    ))
 
  if y_pred is not None:
    fig.add_trace(go.Scatter3d(x = x[:,0],
                   y = x[:,1],
                   z = y_pred.reshape([-1]),
                   opacity=0.5, mode='markers', name='Predicted Function'
                  ))
    
  fig.update_layout(scene = dict(
                    xaxis_title='X1',
                    yaxis_title='X2',
                    zaxis_title='Y'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))
  fig.show()

To plot 3 dimensional data, reduce x into 2-dimension using TSNE 

In [9]:
from sklearn.manifold import TSNE
X_reduced = TSNE(n_components=2).fit_transform(x)
y_true_reduced = TSNE(n_components=1).fit_transform(y_true)
y_pred_reduced = TSNE(n_components=1).fit_transform(model(x))
print(f'X_reduced: {X_reduced.shape}, y_true_reduced: {y_true_reduced.shape}, y_pred_reduced: {y_pred_reduced.shape}')
plot_intereactive_3d(X_reduced,y_true_reduced,y_pred_reduced)

X_reduced: (500, 2), y_true_reduced: (500, 1), y_pred_reduced: (500, 1)
