In [1]:
import math, copy
import numpy as np
import matplotlib.pyplot as plt
# redcued display precision on numpy arrays
np.set_printoptions(precision=2)

In [2]:
X_train = np.array([[2104, 5, 1, 45],
                    [1416, 3, 2, 40],
                    [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [6]:
print(X_train.shape)# m=3, n=4

(3, 4)


In [7]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)}")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)}")
print(y_train)

X Shape: (3, 4), X Type:<class 'numpy.ndarray'>
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y Shape: (3,), y Type:<class 'numpy.ndarray'>
[460 232 178]


In [8]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [9]:
def predict_single_loop(x, w, b):
    """

    :param x: ndarray, Shape (n,)
    :param w: ndarray, Shape (n,)
    :param b: scalar, model parameter
    :return: p(scalar): prediction
    """
    n = x.shape[0]
    p=0
    for i in range(n):
        p_i=x[i]*w[i]
        p+=p_i
    p+=b
    return p

In [10]:
# get a row from training data
x_vec = X_train[0]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


In [11]:
# Implenting above prediction using dot product
def predict(x, w, b):
    """

    :param x: ndarray, Shape (n,)
    :param w: ndarray, Shape (n,)
    :param b: scalar, model parameter
    :return: p(scalar): prediction
    """
    p=np.dot(w, x)+b
    return p

In [12]:
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")
# make a prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


In [15]:
def compute_cost(X, y, w, b):
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters
      b (scalar)       : model parameter

    Returns:
      cost (scalar): cost
    """
    m = X_train.shape[0]
    f_wb = np.dot(X, w)+b
    diff = f_wb - y
    cost = np.dot(diff, diff)
    cost = cost/(2*m)
    return cost

In [16]:
# compute and display cost using our pre-chose optional parameters
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f"Cost at optimal w : {cost}")

Cost at optimal w : 1.5578904330213735e-12


In [20]:
def compute_gradient(X, y, w, b):
    """
    Computes the gradient for linear regression
    Args:
        X (ndarray (m,n)): Data, m examples with n features
        y (ndarray (m,)) : target values
        w (ndarray (n,)) : model parameters
        b (scalar)       : model parameter

    Returns:
        dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
        dj_db (scalar):       The gradient of the cost w.r.t. the parameter b.
    """
    m=X_train.shape[0]
    n=X_train.shape[1]
    diff = np.dot(X, w) - y + b
    dj_dw=np.zeros(n)
    for i in range(n):
        sum = np.dot(X_train[:,i], diff)
        dj_dw[i]=sum
    dj_dw=dj_dw/m
    dj_db=np.sum(diff)/m
    return dj_dw, dj_db


In [21]:
# compute and display gradient
tmp_dj_dw, tmp_dj_db = compute_gradient(X_train, y_train, w_init, b_init)
print(f"dj_db at initial w,b: {tmp_dj_db}")
print(f"dj_dw at initial w,b: \n {tmp_dj_dw}")

dj_db at initial w,b: -1.6739251122999121e-06
dj_dw at initial w,b: 
 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]


In [24]:
# Now gradient descent with multiple variables
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """
    Performs batch gradient descent to learn w and b. Updates w and b by taking
    num_iters gradient steps with learning rate alpha

    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent

    Returns:
      w (ndarray (n,)) : Updated values of parameters
      b (scalar)       : Updated value of parameter
      """
    w=w_in
    b=b_in
    for i in range(num_iters):
        cost = compute_cost(X, y, w, b)
        dj_dw, dj_db = compute_gradient(X, y, w, b)
        w = w-alpha*dj_dw
        b = b-alpha*dj_db
        if i%99==0:
            print(f"Cost after {i} iterations: {cost}")
            print(f"Current value of w is {w}")
            print(f"Current value of b is {b}")
    return w, b

In [25]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b, compute_cost, compute_gradient, alpha, iterations)
print(f"w,b found by gradient descent: {b_final:0.2f}, {w_final}")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final)+b_final:0.2f}, target value: {y_train[i]}")

Cost after 0 iterations: 49518.0
Current value of w is [2.41e-01 5.59e-04 1.84e-04 6.03e-03]
Current value of b is 0.000145
Cost after 99 iterations: 696.0118041331306
Current value of w is [ 0.2  0.  -0.  -0. ]
Current value of b is -0.00011745590317761642
Cost after 198 iterations: 694.9526500150532
Current value of w is [ 0.2   0.   -0.   -0.01]
Current value of b is -0.00035487296468089186
Cost after 297 iterations: 693.9026620364898
Current value of w is [ 0.2   0.   -0.   -0.02]
Current value of b is -0.0005911804640586401
Cost after 396 iterations: 692.8617598510532
Current value of w is [ 0.2   0.   -0.   -0.02]
Current value of b is -0.0008263832759345858
Cost after 495 iterations: 691.8298638166552
Current value of w is [ 0.2   0.   -0.01 -0.03]
Current value of b is -0.0010604862535202677
Cost after 594 iterations: 690.8068949893494
Current value of w is [ 0.2   0.   -0.01 -0.04]
Current value of b is -0.0012934942287090873
Cost after 693 iterations: 689.7927751171948
Curren