In [22]:
import numpy as np 
import copy, math
import matplotlib.pyplot as plt 
import pandas as pd 

In [4]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [5]:
print(f"X_train.shape: {X_train.shape}")
print(X_train)
print(f"y_train.shape: {y_train.shape}")
print(y_train)

X_train.shape: (3, 4)
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y_train.shape: (3,)
[460 232 178]


In [6]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [7]:
def predict_loop(X, w, b):
    '''
    Predict loop using linear regression. Loop over individual instance. No vectorization.

    Args:
        X (ndarray): Shape(m,n) m examples of n vectors.
        w (ndarray): Shape(n,) m weights for corresponding feature values.
        b (float): model parameter
    
    Returns: 
        y_hat (ndarray): Shape (m,) predicted outputs for m examples.
    '''
    m, n = X.shape

    y_hat = np.zeros((m,))

    for i in range(m):
        for j in range(n):
            y_hat[i] = y_hat[i] + (w[j] * X[i, j])
    
        y_hat[i] += b

    return y_hat

In [10]:
predict_loop(X_train, w_init, b_init)

array([459.99999762, 231.99999837, 177.99999899])

In [11]:
def predict(X, w, b):
    ''' 
    Predict with vectorization.
    Args:
        X (ndarray): Shape(m,n) m examples of n vectors.
        w (ndarray): Shape(n,) m weights for corresponding feature values.
        b (float): model parameter

    Returns: 
        y_hat (ndarray): Shape (m,) predicted outputs for m examples.   
    '''

    y_hat = np.dot(X, w) + b
    return y_hat

In [12]:
predict(X_train, w_init, b_init)

array([459.99999762, 231.99999837, 177.99999899])

In [15]:
def compute_cost(X, y, w, b):
    ''' 
    Compute cost for multiple regression with vectorization.
    Args:
        X (ndarray): Shape(m,n) m examples of n vectors.
        y (ndarray): m target values.
        w (ndarray): Shape(n,) m weights for corresponding feature values.
        b (float): model parameter

    Returns: 
        total_cost (float): Computed cost for m examples based on the given w and b.   
    '''
    m = X.shape[0]
    y_hat = predict(X, w, b)
    total_cost = (1/(2*m)) * np.sum((y_hat - y)**2)
    return total_cost

In [16]:
print(f"Cost: {compute_cost(X_train, y_train, w_init, b_init)}, w: {w_init}, b: {b_init}")

Cost: 1.5578903428073909e-12, w: [  0.39133535  18.75376741 -53.36032453 -26.42131618], b: 785.1811367994083


In [21]:
def compute_gradients(X, y, w, b):
    ''' 
    Compute gradients for multiple weights with vectorization.
    Args:
        X (ndarray): Shape(m,n) m examples of n vectors.
        y (ndarray): m target values.
        w (ndarray): Shape(n,) m weights for corresponding feature values.
        b (float): model parameter

    Returns: 
        dj_dw (ndarray): Shape(n,) and dj_db (float): Computed gradients for vector w and the scalar b. 
    '''

    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.0 

    for i in range(n):
        dj_dw[i] = (1/m) * np.sum((predict(X, w, b) - y) * X[:, i])

    dj_db = (1/m) * np.sum(predict(X, w, b) - y)

    return dj_dw, dj_db

In [20]:
compute_gradients(X_train, y_train, w_init, b_init)

(array([-2.72623569e-03, -6.27197244e-06, -2.21745567e-06, -6.92403362e-05]),
 -1.6739250744042995e-06)

In [48]:
def gradient_descent(X, y, w, b, lr=0.01, iterations=100):
    current_cost = 0.0
    min_cost = float('inf')
    min_w = [] 
    min_b = 0.0
    _w = copy.deepcopy(w)
    _b = b
    dj_dw = np.zeros((X.shape[1],))
    dj_db = 0.0 

    for i in range(iterations):
        current_cost = compute_cost(X, y, _w, _b)

        print(f"Current Cost: {current_cost}, w: {_w}, b: {_b}, dj_dw: {dj_dw}, dj_db: {dj_db}")
        
        if current_cost < min_cost:
            min_cost = current_cost 
            min_w = _w 
            min_b = _b

        dj_dw, dj_db = compute_gradients(X, y, _w, _b)

        _w = _w - lr*dj_dw 
        _b = _b - lr*dj_db 


    print(f"Min Cost: {min_cost}, w: {min_w}, b: {min_b}")

    return min_w, min_b

In [50]:
initial_w = np.zeros_like(w_init)
initial_b = 0.
w, b = gradient_descent(X_train, y_train, initial_w, initial_b, lr=5.0e-7, iterations=1000)

Current Cost: 49518.0, w: [0. 0. 0. 0.], b: 0.0, dj_dw: [0. 0. 0. 0.], dj_db: 0.0
Current Cost: 2529.46295223163, w: [2.41334667e-01 5.58666667e-04 1.83666667e-04 6.03500000e-03], b: 0.000145, dj_dw: [-4.82669333e+05 -1.11733333e+03 -3.67333333e+02 -1.20700000e+04], dj_db: -290.0
Current Cost: 765.8336829952987, w: [1.94582073e-01 4.54367630e-04 1.34363401e-04 4.77918168e-03], b: 0.00011402564683333336, dj_dw: [93505.18809822   208.59807378    98.60653178  2511.63663444], dj_db: 61.94870633333326
Current Cost: 699.6290280210451, w: [2.03641847e-01 4.78507728e-04 1.30194588e-04 4.93584845e-03], b: 0.00011714368908454539, dj_dw: [-1.81195497e+04 -4.82801955e+01  8.33762481e+00 -3.13333530e+02], dj_db: -6.236084502424063
Current Cost: 697.1337425623078, w: [2.01888855e-01 4.77764738e-04 1.17281885e-04 4.81887235e-03], b: 0.00011365700305146084, dj_dw: [3.50598521e+03 1.48597858e+00 2.58254064e+01 2.33952194e+02], dj_db: 6.973372066169105
Current Cost: 697.0296591788103, w: [2.02230666e-01

In [52]:
y_hat = predict(X_train, w, b)

print(f"Predicted: {y_hat}, Target: {y_train}")

Predicted: [426.18472345 286.16777187 171.46856801], Target: [460 232 178]
