In [None]:
import math, copy
import numpy as np

In [None]:
x_train = np.array([1.0, 2.0])   #features
y_train = np.array([300.0, 500.0])   #target value

In [None]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost = (f_wb - y[i]) ** 2
        cost_sum += cost
    total_cost = (1 / (2*m)) * cost_sum
    return total_cost

Gradient descent is described as: <br> <center> repeat until convergence: {  <br> $$ w = w - \alpha \frac {\partial J(w,b)}{\partial w}$$  $$ b = b - \alpha \frac {\partial J(w,b)}{\partial b}$$ } <br> </center> where parameters $ w,b $ are updated simultaneously.

The gradient is defined as: <br> $$ \frac {\partial J(w,b)}{\partial w} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})x^{(i)} $$	<br> $$ \frac {\partial J(w,b)}{\partial b} = \frac {1}{m} \displaystyle\sum_{i=0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})$$	<br>
Simultaneously means you calculate the partial derivatives for all parameters before updating any of them.
  

In [None]:
def compute_gradient(x,y,w,b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0

    for i in range(m):
        f_wb = w*x[i] + b
        dj_dw_i = (f_wb - y[i]) * x[i]
        dj_db_i = f_wb - y[i]
        dj_dw += dj_dw_i
        dj_db += dj_db_i
    dj_dw /= m
    dj_db /= m

    return dj_dw, dj_db

In [None]:
def gradient_descent(x,y,w_in,b_in, alpha, num_iters, cost_function, gradient_function):
    J_history = []
    p_history = []
    w = w_in
    b = b_in

    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(x,y,w,b)
        w = w- alpha * dj_dw
        b = b - alpha * dj_db


        if i < 100000:
            J_history.append(cost_function(x,y,w,b))
            p_history.append([w,b])
            if i%math.ceil(num_iters/10) == 0:
                print(f'Iteration {i:4}: Cost {J_history[-1]:.2}', f'dj_dw: {dj_dw:.3e}, dj_db: {dj_db: .3e}', f'w: {w:.3e}, b:{b:.5e}')
                      
    return w, b, J_history, p_history


In [None]:
w_init = 0
b_init = 0

iterations = 10000
tmp_alpha = 1.0e-2
w_final, b_final, J_hist, p_hist = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f'(w,b) found by gradient descent: {w_final:8.4f}, {b_final:8.4f}')