# Components required for Gradient Descent

- `Predictions of the model`

- `Cost Computation Function for the predictions made by the model`

- `Derivatives of the parameters of the model`

All these components together comprise gradient descent which `finds the local minima for the parameters of the model`

In [9]:
# Importing the necessary libraries
import math
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Training Data
x_train = np.array([1.0, 2.0])
y_train = np.array([300.0, 500.0])

In [5]:
# Cost Function an essential of gradient descent
def compute_cost(x, y, w, b):
    
    """
    Computes the squared error of the model for any given parameters
    Args:
        x - Features
        y - Targets
        w, b - Model Parameters
        
    Returns:
        total_cost - The cost of the model in predicting the output for any given parameters of the model (w, b)
    """
    
    # No of training samples
    m = x.shape[0]
    
    # Cost Variable
    total_cost = 0
    
    # Looping over all the training examples
    for i in range(m):
        
        # Prediction
        y_hat = w * x[i] + b
        
        # Error Calculation
        del_error = (y_hat - y[i]) ** 2
        
        # Summation of delta 
        total_cost += del_error
        
    return (1 / (2 * m)) * total_cost

In [6]:
# Compute Gradient function is used to backtrack using gradient descent
def compute_gradient(x, y, w, b):
    
    """
    Computes the gradient of the parameters used in the model to optimise their values
    Args:
        x - Features
        y - Target
        w, b - Model Parameters
        
    Returns:
        dj_dw, dj_db the gradients of each of w and b after each training example
    """
    
    # Tuned Parameters of the model
    dj_dw = 0
    dj_db = 0
    
    # No of training examples
    m = x.shape[0]
    
    for i in range(m):
        
        # Prediction
        y_hat = w * x[i] + b
        
        # Gradients for the current training examples
        dj_dw_i = (y_hat - y[i]) * x[i]
        dj_db_i = y_hat - y[i]
        
        # Updating the gradients for the model
        dj_dw += dj_dw_i
        dj_db += dj_db_i
        
    # Scaling the values by taking the mean
    dj_dw /= m
    dj_db /= m
    
    return dj_dw, dj_db
        

In [13]:
# The Gradient Descent Function combines all the knowledge from before to complete the algorithm
def gradient_descent(x, y, w_in, b_in, alpha, num_iterations, cost_function, gradient_function):
    
    """
    Computes the gradient descent and chooses the optimised values of w and b automatically
    Args:
        x - Features
        y - Target
        w_in, b_in - Initial values of the model parameters
        num_iterations - No of times gradient descent is being run
        cost_function - A helper function to find the cost of the each prediction
        gradient_function - A helper function to find the gradient
        
    Returns:
        w, b - Final value after tuning the parameters of the model
        J_history - The list of all the values of J during gradient descent
        P_history - The list of all the values of model parameters w, b during gradient descent
    """
    
    J_history, P_history = [], []
    w, b = w_in, b_in
    
    # Using Gradient Descent for the given number of iterations
    for i in range(num_iterations):
        
        # Computing the gradient
        dj_dw, dj_db = gradient_function(x, y, w, b)
        
        # Updating the values of parameters
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        
        # Storing the values of w, b and J
        if i < 10000:
            
            J_history.append(cost_function(x, y, w, b))
            P_history.append([w, b])
            
        if i % math.ceil(num_iterations / 10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
            
    return w, b, J_history, P_history

In [14]:
# Initialize parameters
w_init = 0
b_init = 0

# Some gradient descent settings
iterations = 10000
tmp_alpha = 1.0e-2

# Run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)

# Result of gradient descent
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 7.93e+04  dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 4.00000e+00
Iteration 1000: Cost 3.41e+00  dj_dw: -3.712e-01, dj_db:  6.007e-01   w:  1.949e+02, b: 1.08228e+02
Iteration 2000: Cost 7.93e-01  dj_dw: -1.789e-01, dj_db:  2.895e-01   w:  1.975e+02, b: 1.03966e+02
Iteration 3000: Cost 1.84e-01  dj_dw: -8.625e-02, dj_db:  1.396e-01   w:  1.988e+02, b: 1.01912e+02
Iteration 4000: Cost 4.28e-02  dj_dw: -4.158e-02, dj_db:  6.727e-02   w:  1.994e+02, b: 1.00922e+02
Iteration 5000: Cost 9.95e-03  dj_dw: -2.004e-02, dj_db:  3.243e-02   w:  1.997e+02, b: 1.00444e+02
Iteration 6000: Cost 2.31e-03  dj_dw: -9.660e-03, dj_db:  1.563e-02   w:  1.999e+02, b: 1.00214e+02
Iteration 7000: Cost 5.37e-04  dj_dw: -4.657e-03, dj_db:  7.535e-03   w:  1.999e+02, b: 1.00103e+02
Iteration 8000: Cost 1.25e-04  dj_dw: -2.245e-03, dj_db:  3.632e-03   w:  2.000e+02, b: 1.00050e+02
Iteration 9000: Cost 2.90e-05  dj_dw: -1.082e-03, dj_db:  1.751e-03   w:  2.000e+02, b: 1.00024e+02


## Predictions after tuning the parameters using gradient descent

In [15]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

1000 sqft house prediction 300.0 Thousand dollars
1200 sqft house prediction 340.0 Thousand dollars
2000 sqft house prediction 500.0 Thousand dollars
