**Prompt**: Use pure tensorflow tensor datastructure to build a 3 layer neural network (with relu nonlinearity), not using auto differentiation. 

In [1]:
import tensorflow as tf

# Creating Nonlinear Equation, Generating Data

In [7]:
# Generating data of 3 dim input and 2 dim output
n, d = 500, 3
x = tf.random.uniform(minval=-1, maxval=1, shape = (n, d))
x2 = tf.cast(x**2, tf.float32)
weights_true = tf.cast(tf.constant([[5,1,5],[1,2,1]]), tf.float32)
bias_true = tf.cast(tf.constant([1,2]), tf.float32)
y_true = tf.matmul(x2, weights_true, transpose_b=True) + tf.matmul(x,weights_true,transpose_b=True) + bias_true

print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')

x: (500, 3), weights: (2, 3), bias: (2,), y: (500, 2)


# Building 3 layer NN with Tensorflow

## Loss Function (MSE)

In [2]:
class MSE:
  def __call__(self, y_pred, y_true):
    self.y_pred = y_pred
    self.y_true = y_true
    return tf.reduce_mean((y_pred - y_true) ** 2)

  def backward(self):
    n = self.y_true.shape[0]
    self.gradient = 2. * (self.y_pred - self.y_true) / n
    return self.gradient

## Linear Layer
* Randomly initialize weight and bias
* Compute forward pass and gradient descent
* Update learnable parameters

In [3]:
class Linear:
  def __init__(self, input_dim: int, num_hidden: int = 1):
    self.weights = tf.Variable(tf.random.uniform(shape=(input_dim, num_hidden)))
    self.bias = tf.Variable(tf.zeros(shape=(num_hidden,)))
  
  def __call__(self, x):
    self.x = x
    return tf.matmul(x, self.weights) + self.bias

  def backward(self, gradient):
    self.weights_gradient = tf.matmul(self.x,gradient,transpose_a=True)
    self.bias_gradient = tf.reduce_sum(gradient, 0)
    self.x_gradient = tf.matmul(gradient, self.weights, transpose_b=True)
    return self.x_gradient

  def update(self, lr):
    self.weights = self.weights - lr * self.weights_gradient
    self.bias = self.bias - lr * self.bias_gradient

## Activation Function (ReLU)

In [4]:
class Relu:
    def __call__(self, input_):
      import math
      self.input_ = input_
      self.output = tf.clip_by_value(self.input_, 0, math.inf)
      return self.output
    
    def backward(self, output_gradient):
      self.input_gradient = tf.cast((self.input_ > 0), tf.float32) * output_gradient
      return self.input_gradient

## Model Class
* Compute forward pass
* Compute backward pass

In [5]:
from typing import Callable

class Model:
  def __init__(self, input_dim, num_hidden):
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = Relu()
    self.linear2 = Linear(num_hidden,12) # Pick 12 as number of neurons in hidden layer
    self.relu2 = Relu()
    self.linear3 = Linear(12,2)
  
  # Forward pass
  def __call__(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1)
    l2 = self.linear2(r1)
    r2 = self.relu2(l2)
    l3 = self.linear3(r2)
    return l3
  
  def backward(self, output_gradient):
    linear3_gradient = self.linear3.backward(output_gradient)
    relu2_gradient = self.relu2.backward(linear3_gradient)
    linear2_gradient = self.linear2.backward(relu2_gradient)
    relu1_gradient = self.relu1.backward(linear2_gradient)
    linear1_gradient = self.linear1.backward(relu1_gradient)
    return linear1_gradient

  def update(self, lr):
    self.linear3.update(lr)
    self.linear2.update(lr)
    self.linear1.update(lr)

# Training/Testing the Model

In [8]:
# Training
def fit(x, y, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    y_pred = model(x)
    loss_value = loss(y_pred, y)
    if epoch % 50 == 0:
      print(f'Epoch {epoch}, loss {loss_value}')
    gradient_from_loss = loss.backward()
    model.backward(gradient_from_loss)
    model.update(lr)

loss = MSE()
model = Model(d, 20)
fit(x, y_true, model=model, loss=loss, lr=0.0025, num_epochs=1000)

Epoch 0, loss 388.9023132324219
Epoch 50, loss 3.9436230659484863
Epoch 100, loss 2.2242724895477295
Epoch 150, loss 1.6083283424377441
Epoch 200, loss 1.2304809093475342
Epoch 250, loss 0.9568304419517517
Epoch 300, loss 0.7506739497184753
Epoch 350, loss 0.5917983055114746
Epoch 400, loss 0.4666556715965271
Epoch 450, loss 0.36877092719078064
Epoch 500, loss 0.29554325342178345
Epoch 550, loss 0.24264389276504517
Epoch 600, loss 0.20648051798343658
Epoch 650, loss 0.18164370954036713
Epoch 700, loss 0.16400347650051117
Epoch 750, loss 0.1509675234556198
Epoch 800, loss 0.14120785892009735
Epoch 850, loss 0.1339869201183319
Epoch 900, loss 0.12800781428813934
Epoch 950, loss 0.12293635308742523


# Plotting Training Data and Prediction

In [9]:
def plot_intereactive_3d(x, y, y_pred=None):
  import plotly.graph_objects as go

  fig = go.Figure()
  fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y.reshape([-1]),
                    opacity=0.5, mode='markers', name='Underlying Function'
                    ))
 
  if y_pred is not None:
    fig.add_trace(go.Scatter3d(x = x[:,0],
                   y = x[:,1],
                   z = y_pred.reshape([-1]),
                   opacity=0.5, mode='markers', name='Predicted Function'
                  ))
    
  fig.update_layout(scene = dict(
                    xaxis_title='X1',
                    yaxis_title='X2',
                    zaxis_title='Y'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))
  fig.show()

To plot 3 dimensional data, reduce x into 2-dimension using TSNE 

In [10]:
from sklearn.manifold import TSNE
X_reduced = TSNE(n_components=2).fit_transform(x)
y_true_reduced = TSNE(n_components=1).fit_transform(y_true)
y_pred_reduced = TSNE(n_components=1).fit_transform(model(x))
print(f'X_reduced: {X_reduced.shape}, y_true_reduced: {y_true_reduced.shape}, y_pred_reduced: {y_pred_reduced.shape}')
plot_intereactive_3d(X_reduced,y_true_reduced,y_pred_reduced)

X_reduced: (500, 2), y_true_reduced: (500, 1), y_pred_reduced: (500, 1)
