In [6]:
import math, copy
import numpy as np
import pandas as pd

In [20]:
# Load our data set
x_train = np.array([1.0, 2.0])   #features
y_train = np.array([300.0, 500.0])   #target value

In [21]:
#Function to calculate the cost
def compute_cost(x, y, w, b):
   
    m = x.shape[0] 
    cost = 0
    
    for i in range(m):
        f_wb = w * x[i] + b
        cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m) * cost

    return total_cost

In [22]:
def compute_gradient(x, y, w, b): 
    """
    Computes the gradient for linear regression 
    Args:
      x (ndarray (m,)): Data, m examples 
      y (ndarray (m,)): target values
      w,b (scalar)    : model parameters  
    Returns
      dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
      dj_db (scalar): The gradient of the cost w.r.t. the parameter b     
     """
    
    # Number of training examples
    m = x.shape[0]    
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

In [23]:
compute_gradient(x_train,y_train,0,0)

(-650.0, -400.0)

In [24]:
plt_gradients(x_train,y_train, compute_cost, compute_gradient)
plt.show()

NameError: name 'plt_gradients' is not defined

In [25]:
def compute_gradientSimple(x,y,w,b):
  """
    Computes the gradient for linear regression 
    Args:
      x (ndarray (m,)): Data, m examples 
      y (ndarray (m,)): target values
      w,b (scalar)    : model parameters  
  Returns
      dj_dw (scalar): The gradient of the cost w.r.t. the parameters w
      dj_db (scalar): The gradient of the cost w.r.t. the parameter b     
  """
    
  dj_dw = 0
  dj_db = 0

  m = len(x)



  for i in range(m):
    f_wb = w * x[i] + b
    dj_dw += (f_wb - y[i]) * x[i]
    dj_db += f_wb - y[i]
    
  dj_dw = dj_dw / m
  dj_db = dj_db / m

  return dj_dw,dj_db



In [26]:
compute_gradientSimple(x,y,0,0)

NameError: name 'x' is not defined

In [27]:
compute_gradient(x,y,0,0) == compute_gradientSimple(x,y,0,0)

NameError: name 'x' is not defined

In [28]:
plt_gradients(x,y, compute_cost, compute_gradient)
plt.show()

NameError: name 'plt_gradients' is not defined

In [46]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function): 
    """
    Performs gradient descent to fit w,b. Updates w,b by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      x (ndarray (m,))  : Data, m examples 
      y (ndarray (m,))  : target values
      w_in,b_in (scalar): initial values of model parameters  
      alpha (float):     Learning rate
      num_iters (int):   number of iterations to run gradient descent
      cost_function:     function to call to produce cost
      gradient_function: function to call to produce gradient
      
    Returns:
      w (scalar): Updated value of parameter after running gradient descent
      b (scalar): Updated value of parameter after running gradient descent
      J_history (List): History of cost values
      p_history (list): History of parameters [w,b] 
      """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    p_history = []
    b = b_in
    w = w_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w , b) 
        print(i)
        print("dj_dw=", dj_dw)
        print("dk_dw=",dj_db)

        # Update Parameters using equation (3) above
        b = b - alpha * dj_db                            
        w = w - alpha * dj_dw      
        print("b=", b)
        print("w=", w)
        print("alpha=", alpha)                      

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(x, y, w , b))
            p_history.append([w,b])
        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
 
    return w, b, J_history, p_history #return w and J,w history for graphing

In [47]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 3
tmp_alpha = 1.0e-2
# run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")


0
dj_dw= -650.0
dk_dw= -400.0
b= 4.0
w= 6.5
alpha= 0.01
Iteration    0: Cost 7.93e+04  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 4.00000e+00
1
dj_dw= -627.75
dk_dw= -386.25
b= 7.862500000000001
w= 12.7775
alpha= 0.01
Iteration    1: Cost 7.39e+04  dj_dw: -6.278e+02, dj_db: -3.862e+02   w:  1.278e+01, b: 7.86250e+00
2
dj_dw= -606.2625
dk_dw= -372.97125
b= 11.5922125
w= 18.840125
alpha= 0.01
Iteration    2: Cost 6.90e+04  dj_dw: -6.063e+02, dj_db: -3.730e+02   w:  1.884e+01, b: 1.15922e+01
(w,b) found by gradient descent: ( 18.8401, 11.5922)


In [42]:
print(w_final)

18.840125


In [43]:
print(J_hist[len(J_hist)-1])
print(p_hist[len(p_hist)-1])


68955.50943163194
[18.840125, 11.5922125]
