In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np


In [2]:
# Load our data set
x_train = np.array([1.0, 2.0])   #features
y_train = np.array([300.0, 500.0])   #target value

In [4]:
# function to calculate the cost
def compute_cost(x: np.ndarray, y: np.ndarray, w:float, b:float):
    m = x.shape[0]
    cost_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost_sum += (f_wb - y[i])**2
    cost = (1/(2*m)*cost_sum)
    return cost


**Linear Model:**
$$ f_{w, b}(x^i) = wx^i + b $$
**Cost Function**
$$ J(w, b) = \frac{1}{2m}\sum_{i=1}^{i=m}(f_{w, b}(x^i)-y^i)^2 $$
**Gradient Descent:**
$$ w = w - \frac{\partial}{\partial w}  J(w, b)$$
$$ b = b - \frac{\partial}{\partial b}  J(w, b)$$
*Here, $w$ and $b$ has to be updated simultaneously until converge*

**Simple form of derivative:**
$$\frac{\partial}{\partial w}  J(w, b) = \frac{1}{m}\sum_{i=1}^{i=m}\{f_{w, b}(x^i)-y^i\}x^i$$
$$\frac{\partial}{\partial b}  J(w, b) = \frac{1}{m}\sum_{i=1}^{i=m}\{f_{w, b}(x^i)-y^i\}$$

In [13]:
# Function to compute Gradient
def compute_gradient(x: np.ndarray, y: np.ndarray, w: float, b: float):
    """
    Computes the gradient for linear regression
    Args:   
        x (ndarray (m, )): Training input
        y (ndarray (m, )): Training output
        w (scalar): Weight
        b (scalar): bias
    Returns:
        dj_dw (scalar): Derivative term for weight.
        dj_d  (scalar): Derivative term for bias.
    """
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        f_wb = w * x[i] + b
        dj_dw += (f_wb - y[i]) * x[i]
        dj_db += (f_wb - y[i])
    dj_db = dj_db / m
    dj_dw = dj_dw / m
    return dj_dw, dj_db
    
    

In [15]:
def gradient_descent(x: np.ndarray, y:np.ndarray, w_in: float, b_in: float, alpha: float, num_iters: int, cost_function, gradient_function):
    """
    Performs gradient descent to determine the suitable value of the w and b by updating them simultaneously
    Args:
        x (ndarray (m,)): Training input
        y (ndarray (m,)): Training output
        w_in, b_in (scalar): Initial model parameter values. Weight and Bias
        alpha (float): Learning rate
        num_iters (int): Number of iterations to perform gradient descent
        cost_function: Function to call produce cost
        gradient_function: Function to calculate derivative
    Returns:
        w (scalar): Updated value of weight.
        b (scalar): Updated value of bias.
        j_history (List): History of cost values
        p_history (List): History of parameters (w, b)
    """
    j_history = []
    p_history = []
    w = w_in
    b = b_in
    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(x, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        j_history.append(cost_function(x, y, w, b))
        p_history.append([w, b])
        if i % math.ceil(num_iters/10):
            print(f"Iteration {i:4}: Cost {j_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    return w, b, j_history, p_history

NameError: name 'function' is not defined

In [None]:
# Testing gradient descent function

# Initialize parameters
w_init = 0
b_init = 0
iterations = 10000
tmp_alpha = 1.0e-2
# Run Gradient descent with these parameters
w_final, b_final, j_history, p_history = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)