In [72]:
#This code will show you how to automate the gradient descent process
#This applies for a single variable
import math, copy
import numpy as np
from datascience import *
import matplotlib.pyplot as plt


In [73]:
"""
#This is for example data
#Example data
x_train = np.array([1.0, 2.0])   #features (column data used when referencing a CSV)
y_train = np.array([300.0, 500.0])   #target value

"""

'\n#This is for example data\n#Example data\nx_train = np.array([1.0, 2.0])   #features (column data used when referencing a CSV)\ny_train = np.array([300.0, 500.0])   #target value\n\n'

In [None]:
#NOTICE: This block is only meant to test CSVs on these functions. Turn off if not currently
#being used.

cah = Table.read_table("academic_drop.csv")

def create_label(target_response):
    if target_response == "Dropout":
        return 1
    else:
        return 0
    
target_dummy = cah.apply(create_label, "Target")

target_dummy_label = cah.with_column("target", target_dummy)

#We're only using one variable and the binary target
clean_data = target_dummy_label.select("Curricular units 1st sem (grade)", "target")

target_data = clean_data.select("target")
var_data = clean_data.select("Curricular units 1st sem (grade)")

x_train = clean_data.column("Curricular units 1st sem (grade)")
y_train = clean_data.column("target")

#This code functions correctly. Passed test. Can be used as reference.



In [75]:
#Function to calculate the cost
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost = 0

    for i in range(m):
        f_wb = w * x[i] + b
        cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m)

    return total_cost
    

In [None]:
#To implement the gradient descent algorithm, you need three functions: 
#compute_gradient:Implementing the two equations gradient is defined as.
#compute_cost: Measuring the cost over all of the training samples.
#gradient_descent: Ultilizing compute_gradient and compute_cost to use gradient descent.

#NOTICE: Reference Course 1 lab 4 and 6 to see the equations the functions are using

def compute_gradient(x, y, w, b,):
    """This function computes the gradient for linear regression.
    Args:
        x: Data, m examples or variables
        y: target values (target variable)
        w,b: model parameters

    Returns:
        dj_dw: The gradient of the cost w.r.t. the parameters of w
        dj_db: The gradient of the cost w.r.t the parameters of b
    """

    #The number of training examples
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0

    for i in range(m):
        f_wb = w * x[i] + b
        dj_dw_i = (f_wb - y[i]) * x[i] 
        dj_db_i = f_wb - y[i] 
        dj_db += dj_db_i
        dj_dw += dj_dw_i 
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return dj_dw, dj_db

In [77]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    """This function performs gradient descent to fit w,b. Updates w,b by taking
    num_iters gradient steps with learning rate alpha.

    Args:
        x: Data, m examples 
        y: target values
        w_in,b_in: initial values of model parameters  
        alpha: Learning rate
        num_iters: number of iterations to run gradient descent
        cost_function: function to call to produce cost
        gradient_function: function to call to produce gradient
    
    Returns:
        w (scalar): Updated value of parameter after running gradient descent
        b (scalar): Updated value of parameter after running gradient descent
        J_history (List): History of cost values
        p_history (list): History of parameters [w,b] 
    """

    #Arrays to store cst J and w's at each iteration (used for graphing)
    J_history = []
    p_history = []
    b = b_in
    w = w_in

    for i in range(num_iters):
        #Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w , b)

        #Update parameters using the gradient equation (3)
        b = b - alpha * dj_db                            
        w = w - alpha * dj_dw

        #Save cost J at each iteration
        if i<100000: #Resource exhaustion prevention
            J_history.append(cost_function(x, y, w, b))
            p_history.append([w,b])

        #Print the cost at intervals 10 times or as many iterations if < 10
        if i%  math.ceil(num_iters/10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
            
    return w, b, J_history, p_history #return w and J,w history for graphing

In [78]:
#Finally it's time to run gradient descent
#intialize parameters
w_init = 0
b_init = 0
#Set interation and tmp_alpha
iterations = 10000
tmp_alpha = 1.0e-2
#run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4})")

Iteration    0: Cost 1.13e-04  dj_dw: -2.331e+00, dj_db: -3.212e-01   w:  2.331e-02, b: 3.21203e-03
Iteration 1000: Cost 1.13e-04  dj_dw:  1.961e-03, dj_db: -2.515e-02   w: -3.487e-02, b: 6.67119e-01
Iteration 2000: Cost 1.13e-04  dj_dw:  3.556e-04, dj_db: -4.562e-03   w: -4.426e-02, b: 7.87626e-01
Iteration 3000: Cost 1.13e-04  dj_dw:  6.450e-05, dj_db: -8.275e-04   w: -4.597e-02, b: 8.09484e-01
Iteration 4000: Cost 1.13e-04  dj_dw:  1.170e-05, dj_db: -1.501e-04   w: -4.627e-02, b: 8.13448e-01
Iteration 5000: Cost 1.13e-04  dj_dw:  2.122e-06, dj_db: -2.722e-05   w: -4.633e-02, b: 8.14167e-01
Iteration 6000: Cost 1.13e-04  dj_dw:  3.849e-07, dj_db: -4.937e-06   w: -4.634e-02, b: 8.14298e-01
Iteration 7000: Cost 1.13e-04  dj_dw:  6.980e-08, dj_db: -8.955e-07   w: -4.634e-02, b: 8.14321e-01
Iteration 8000: Cost 1.13e-04  dj_dw:  1.266e-08, dj_db: -1.624e-07   w: -4.634e-02, b: 8.14325e-01
Iteration 9000: Cost 1.13e-04  dj_dw:  2.296e-09, dj_db: -2.946e-08   w: -4.634e-02, b: 8.14326e-01


In [79]:
#Let's test the gradient descent with data from a csv. Use the dropout csv with only one
#variable and the target value to test.