In [182]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap

In [183]:
# Let's create our training data 12 pairs {x_i, y_i}
# We'll try to fit a curved line to these data
data = np.array([[0.21,0.22,0.34,0.46,0.68,0.91,1.34,1.18,1.39,1.60,1.65,1.90],
                 [0.33,0.82,1.05,1.00,1.20,1.50,1.30,1.54,1.25,1.68,1.73,1.60]])

In [184]:
# Let's define our model
def model(phi,x):
  y_pred = phi[0]+phi[1] * x + phi[2]*x*x + phi[3]*x*x*x*x
  return y_pred

In [185]:
# Initialize the parameters to some arbitrary values
phi = np.zeros((4,1))
phi[0] = 0.7
phi[1] = 0.4
phi[2] = 0.3
phi[3] = 0.05



In [186]:
#define MSE loss:
def compute_loss(data_x, data_y, phi):

  error = 0
  for i in range(data_x.shape[0]):
    y_p = model(phi,data_x[i])
    #print('y_p is {} y_true is {}'.format(y_p,data_y[i]))
    error += (y_p - data_y[i])**2

  loss = (error)/data_x.shape[0]
  return loss

In [187]:
loss = compute_loss(data[0,:],data[1,:],phi)
print('Your loss = %3.3f'%(loss).item())
#Add .item() to stop the warnign!!!

Your loss = 0.424


In [188]:
#you will update the parameters with the help of the computed gradients and check if the new loss is lower than your old loss.
def change_in_loss(data_x, data_y, gradients, phi, lr=0.01):

    old_loss = compute_loss(data_x, data_y, phi)
    new_phi = phi - lr * gradients
    new_loss = compute_loss(data_x, data_y, new_phi)
    print('Difference between old loss {} and new loss {}'.format(old_loss, new_loss))
    
    return new_phi

def compute_gradient(data_x, data_y, phi,d=0.0001):

    gradients = np.zeros_like(phi)
    base_loss = compute_loss(data_x, data_y, phi)
    
    # Iterate over each parameter to compute its gradient
    phi_perturbed = phi.copy()
    for i in range(len(phi)):
        phi_perturbed[i] += d
        perturbed_loss = compute_loss(data_x, data_y, phi_perturbed)

        gradients[i] = (perturbed_loss - base_loss) / d
    
    return gradients

We can check the gradient using a trick known as **finite differences**.  If we evaluate the function and then change one of the parameters by a very small amount and normalize by that amount, we get an approximation to the gradient, so:

\begin{eqnarray}
\frac{\partial L}{\partial \phi_{0}}&\approx & \frac{L[\phi_0+\delta, \phi_1, \phi_2, \phi_3]-L[\phi_0, \phi_1, \phi_2, \phi_3]}{\delta}\\
\frac{\partial L}{\partial \phi_{1}}&\approx & \frac{L[\phi_0, \phi_1+\delta, \phi_2, \phi_3]-L[\phi_0, \phi_1, \phi_2, \phi_3]}{\delta}\\
\frac{\partial L}{\partial \phi_{2}}&\approx & \frac{L[\phi_0, \phi_1, \phi_2 + \delta, \phi_3]-L[\phi_0, \phi_1, \phi_2, \phi_3]}{\delta}\\
\frac{\partial L}{\partial \phi_{3}}&\approx & \frac{L[\phi_0, \phi_1, \phi_2, \phi_3 + \delta]-L[\phi_0, \phi_1, \phi_2]}{\delta}
\end{eqnarray}



In [189]:
# Compute the gradient using your function

gradients = compute_gradient(data[0,:],data[1,:], phi)
change_in_loss = change_in_loss(data[0,:],data[1,:],gradients,phi)

Difference between old loss [0.42391437] and new loss [0.0419479]


In [190]:
print(gradients)

[[ 0.78537052]
 [ 2.03630024]
 [ 4.13386594]
 [10.28687127]]


In [191]:
print(change_in_loss)

[[ 0.69214629]
 [ 0.379637  ]
 [ 0.25866134]
 [-0.05286871]]
