**Prompt**: Use the pure pytorch tensor data structure to build a 3 layer neural network (with relu nonlinearity), not using auto differentiation. 

In [None]:
import torch
import torch.nn as nn

# Creating Nonlinear Equation, Generating Data

In [None]:
# Generating data of 3 dim input and 2 dim output
n, d = 500, 3
x = torch.FloatTensor(n, d).uniform_(-1, 1)
weights_true = torch.tensor([[5,1,5],[1,2,1]]).float()
weights_true = torch.transpose(weights_true,0,1)
bias_true = torch.tensor([1,2])
y_true = torch.mm(x**2,weights_true) + torch.mm(x,weights_true) + bias_true
print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')

x: torch.Size([500, 3]), weights: torch.Size([3, 2]), bias: torch.Size([2]), y: torch.Size([500, 2])


# Building 3 layer NN with Pytorch using auto differentiation

In [None]:
class Linear(nn.Module):
  def __init__(self, input_dim, num_hidden):
    super(Linear, self).__init__()
    self.init = torch.rand(input_dim, num_hidden).float()
    self.weights = torch.nn.Parameter(self.init, requires_grad=True)
    self.bias = torch.zeros(num_hidden,)
  
  def __call__(self, x):
    self.x = x
    return torch.mm(x, self.weights) + self.bias

In [None]:
# Custom Model using nn.Linear modules
class TorchModel(nn.Module):
  def __init__(self, input_dim, num_hidden):
    super(TorchModel, self).__init__()
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = nn.ReLU()
    self.linear2 = Linear(num_hidden, 12)
    self.relu2 = nn.ReLU()
    self.linear3 = Linear(12, 2)
  
  def forward(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1)
    l2 = self.linear2(r1)
    r2 = self.relu2(l2)
    l3 = self.linear3(r2)
    return l3

# Training/Testing the Model

In [None]:
# Now we run the training loop
from typing import Callable
import pdb

def torch_fit(x, y, model: Callable, loss: Callable, lr: float, num_epochs: int):
  # pdb.set_trace()
  optimizer = torch.optim.SGD(model.parameters(), lr=lr)
  for epoch in range(num_epochs):
    optimizer.zero_grad() # Initialize gradient as zero
    y_pred_tensor = model(x) # Forward Pass
    loss_value = loss(y_pred_tensor, y) # Compute loss with MSE
    if epoch % 50 == 0:
      print(f'Epoch {epoch}, loss {loss_value}')
    loss_value.backward() # Use autogradient to compute backward pass
    optimizer.step() # Update weights 

loss = nn.MSELoss()

model = TorchModel(d, 22)
torch_fit(x, y_true, model=model, loss=loss, lr=0.0035, num_epochs=1000)

Epoch 0, loss 553.2417602539062
Epoch 50, loss 4.332200527191162
Epoch 100, loss 2.7386903762817383
Epoch 150, loss 1.7835078239440918
Epoch 200, loss 1.26443612575531
Epoch 250, loss 0.9646130204200745
Epoch 300, loss 0.7753123044967651
Epoch 350, loss 0.6522733569145203
Epoch 400, loss 0.5738704800605774
Epoch 450, loss 0.525150716304779
Epoch 500, loss 0.49403929710388184
Epoch 550, loss 0.47207173705101013
Epoch 600, loss 0.45465654134750366
Epoch 650, loss 0.43964290618896484
Epoch 700, loss 0.42621439695358276
Epoch 750, loss 0.413982093334198
Epoch 800, loss 0.4025614559650421
Epoch 850, loss 0.3916738033294678
Epoch 900, loss 0.3812325596809387
Epoch 950, loss 0.37155061960220337


# Plotting Training Data and Prediction

In [None]:
def plot_intereactive_3d(x, y, y_pred=None):
  import plotly.graph_objects as go

  fig = go.Figure()
  fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y.reshape([-1]),
                    opacity=0.5, mode='markers', name='Underlying Function'
                    ))
 
  if y_pred is not None:
    fig.add_trace(go.Scatter3d(x = x[:,0],
                   y = x[:,1],
                   z = y_pred.reshape([-1]),
                   opacity=0.5, mode='markers', name='Predicted Function'
                  ))
    
  fig.update_layout(scene = dict(
                    xaxis_title='X1',
                    yaxis_title='X2',
                    zaxis_title='Y'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))
  fig.show()

In [None]:
from sklearn.manifold import TSNE
X_reduced = TSNE(n_components=2).fit_transform(x)
y_true_reduced = TSNE(n_components=1).fit_transform(y_true)
y_pred_reduced = TSNE(n_components=1).fit_transform(model(x).detach())
print(f'X_reduced: {X_reduced.shape}, y_true_reduced: {y_true_reduced.shape}, y_pred_reduced: {y_pred_reduced.shape}')

plot_intereactive_3d(X_reduced,y_true_reduced,y_pred_reduced)

X_reduced: (500, 2), y_true_reduced: (500, 1), y_pred_reduced: (500, 1)
