# Bharath Gunasekaran 
# CMPE-258 Part 3a

## Colab environment

In [12]:
import tensorflow as tf
import matplotlib.pyplot as plt
import math 
import numpy as np
from sklearn.manifold import TSNE

In [13]:
# defining input,hidden, and outputs
n_input, n_hidden, n_output = 3,2,2


### Basic prediction function: Linear

In [14]:
class MSE:
  def __call__(self, y_pred, y_true):
    self.y_pred = y_pred
    self.y_true = y_true
    return tf.reduce_mean(((y_pred - y_true) ** 2))

  def backward(self):
    n = self.y_true.shape[0]
    self.gradient = 2. * (self.y_pred - self.y_true) / n
    return self.gradient


class Linear:
  def __init__(self, input_dim: int, num_hidden: int = 1):

    self.weights = tf.random.normal(shape=(input_dim, num_hidden)) * math.sqrt(2. / input_dim)
    self.bias = tf.zeros(shape=(num_hidden))
    print(self.weights.shape, self.bias.shape)
  
  def __call__(self, x):
    self.x = x
    output = x @ self.weights + self.bias
    return output

  def backward(self, gradient):
    self.weights_gradient = tf.transpose(self.x) @ gradient
    self.bias_gradient = tf.reduce_sum(gradient, axis=0)
    self.x_gradient = gradient @ tf.transpose(self.weights)
    return self.x_gradient

  def update(self, lr):
    self.weights = self.weights - lr * self.weights_gradient
    self.bias = self.bias - lr * self.bias_gradient

In [15]:
from typing import Callable

def fit(x: tf.Tensor, y: tf.Tensor, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    y_pred = model(x)
    loss_value = loss(y_pred, y)
    print(f'Epoch {epoch}, loss {loss_value}')
    gradient_from_loss = loss.backward()
    model.backward(gradient_from_loss)
    model.update(lr)


# Creating model using Tensorflow datastructures
## Generating Dating

In [16]:
# Make non-linear data

n = 200
d = 3
x = tf.random.normal(shape=(n,d))

weights_true = tf.constant([[5, 1,1],[1,2,1]], dtype=tf.float32)
weights_true = tf.transpose(weights_true)
bias_true = tf.constant([1], dtype=tf.float32)
y_true = (x ** 2) @ weights_true + x @ weights_true + bias_true
print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')

x: (200, 3), weights: (3, 2), bias: (1,), y: (200, 2)


### Add non-linearity: ReLU

In [17]:
class Relu:
    def __call__(self, input_):
        self.input_ = input_
        self.output = np.clip(self.input_, 0, None)
        return self.output
    
    def backward(self, output_gradient):
      # import pdb; pdb.set_trace()  # By the way, this is how you can debug
      self.input_gradient = tf.cast((self.input_ > 0), tf.float32) * output_gradient
      return self.input_gradient

### Train our new non-linear model

In [18]:
class Model:
  def __init__(self, input_dim, num_hidden):
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = Relu()
    self.linear2 = Linear(num_hidden, 2)
    self.relu2 = Relu()
    self.linear3 = Linear(2,2)

  
  def __call__(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1)
    l2 = self.linear2(r1)
    r2 = self.relu2(l2)
    l3 = self.linear3(r2)
    return l3
  
  def backward(self, output_gradient):
    print(output_gradient.dtype)
    linear3_gradient = self.linear3.backward(output_gradient)
    relu2_gradient = self.relu2.backward(linear3_gradient)
    linear2_gradient = self.linear2.backward(relu2_gradient)
    relu1_gradient = self.relu1.backward(linear2_gradient)
    linear1_gradient = self.linear1.backward(relu1_gradient)
    print('Model backward', linear3_gradient.shape, relu2_gradient.shape, linear2_gradient.shape, relu1_gradient.shape, linear1_gradient.shape)
    return linear1_gradient

  def update(self, lr):
    self.linear3.update(lr)
    self.linear2.update(lr)
    self.linear1.update(lr)

loss = MSE()
model = Model(3, 10)
y_pred = model(x)
loss_value = loss(y_pred, y_true)
loss_gradient = loss.backward()
print(loss_value)
# print(loss_gradient)
model.backward(loss_gradient)
# plot_3d(x, y_true, y_pred)

(3, 10) (10,)
(10, 2) (2,)
(2, 2) (2,)
tf.Tensor(139.57587, shape=(), dtype=float32)
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)


<tf.Tensor: shape=(200, 3), dtype=float32, numpy=
array([[-5.45625808e-04, -3.63874109e-03, -1.58299762e-03],
       [ 4.64488775e-01,  3.38470340e-01,  1.08138156e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 4.34542477e-01,  4.80844915e-01,  6.89707518e-01],
       [ 1.01954907e-01,  1.03655040e-01,  1.92464009e-01],
       [ 2.46597946e-01,  2.64500976e-01,  4.19398516e-01],
       [ 3.83882046e-01,  2.86456943e-01,  9.25018907e-01],
       [ 7.94557706e-02,  1.32180572e-01,  4.06238958e-02],
       [-3.15983295e-02, -2.32756883e-01, -8.23057592e-02],
       [ 2.97286175e-03,  1.84114464e-02,  9.70239565e-03],
       [ 7.22578615e-02,  1.42217427e-02, -2.65649334e-02],
       [-2.05491018e-02,  3.16599607e-02,  5.29619493e-02],
       [ 8.56042057e-02,  1.56365722e-01,  3.81805658e-01],
       [-5.86102344e-03, -9.80936084e-03, -2.40312261e-03],
       [-2.44283024e-02,  2.83828154e-02,  5.56177199e-02],
       [ 1.59655511e-03,  4.71261181e-02,  1.42808

In [19]:
# Test just one forward and backward step
loss = MSE()
model = Model(d, 10)

(3, 10) (10,)
(10, 2) (2,)
(2, 2) (2,)


In [20]:
fit(x, y_true, model=model, loss=loss, lr=0.01, num_epochs=40)


Epoch 0, loss 141.87710571289062
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 1, loss 120.52020263671875
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 2, loss 116.14340209960938
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 3, loss 112.02888488769531
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 4, loss 106.84774780273438
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 5, loss 99.0179672241211
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 6, loss 86.70497131347656
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 7, loss 67.7755355834961
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10) (200, 10) (200, 3)
Epoch 8, loss 44.165733337402344
<dtype: 'float32'>
Model backward (200, 2) (200, 2) (200, 10

In [21]:
def plot3d(x, y, y_pred=None):
  import plotly.graph_objects as go

  fig = go.Figure()
  fig.add_trace(go.Scatter3d(x = x[:,0],
                    y = x[:,1],
                    z = y.reshape([-1]),
                    opacity=0.5, mode='markers', name='Underlying Function'
                    ))
 
  if y_pred is not None:
    fig.add_trace(go.Scatter3d(x = x[:,0],
                   y = x[:,1],
                   z = y_pred.reshape([-1]),
                   opacity=0.5, mode='markers', name='Predicted Function'
                  ))
    
  fig.update_layout(scene = dict(
                    xaxis_title='X1',
                    yaxis_title='X2',
                    zaxis_title='Y'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))
  fig.show()

In [22]:
y_pred = model(x)
X_red = TSNE(n_components=2).fit_transform(x)
y_true_red = TSNE(n_components=1).fit_transform(y_true)
y_pred_red = TSNE(n_components=1).fit_transform(y_pred)
print(f'X_red: {X_red.shape}, y_true_red: {y_true_red.shape}, y_pred_red: {y_pred_red.shape}')

plot3d(X_red,y_true_red,y_pred_red)

X_red: (200, 2), y_true_red: (200, 1), y_pred_red: (200, 1)
