In [1]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('../deeplearning.mplstyle')
np.set_printoptions(precision=2)  # reduced display precision on numpy arrays

You will use the motivating example of housing price prediction. The training dataset contains three examples with four features (size, bedrooms, floors and, age) shown in the table below. Note that, unlike the earlier labs, size is in sqft rather than 1000 sqft. This causes an issue, which you will solve in the next lab!

Size (sqft)	Number of Bedrooms	Number of floors	Age of Home	Price (1000s dollars)
2104	5	1	45	460
1416	3	2	40	232
852	2	1	35	178
You will build a linear regression model using these values so you can then predict the price for other houses. For example, a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old.

Please run the following code cell to create your X_train and y_train variables.

In [2]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [17]:
# data is stored in numpy array/matrix
print(f"X Shape: {X_train.shape}, X Type:{type(X_train)})")
print(X_train)
print(f"y Shape: {y_train.shape}, y Type:{type(y_train)})")
print(y_train)

X Shape: (3, 4), X Type:<class 'numpy.ndarray'>)
[[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y Shape: (3,), y Type:<class 'numpy.ndarray'>)
[460 232 178]


In [18]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


### Prediction

In [47]:
def predict_single_loop(x, w, b): 
    """
    single predict using linear regression
    
    Args:
      x (ndarray): Shape (n,) example with multiple features
      w (ndarray): Shape (n,) model parameters    
      b (scalar):  model parameter     
      
    Returns:
      p (scalar):  prediction
    """
    n = x.shape[0]
    p = 0
    for i in range(n):
        p_i = x[i] * w[i]  
        p = p + p_i         
    p = p + b                
    return p

# get a row from our training data
x_vec = X_train[0,:]
print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# make a prediction
f_wb = predict_single_loop(x_vec, w_init, b_init)
print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

x_vec shape (4,), x_vec value: [2104    5    1   45]
f_wb shape (), prediction: 459.9999976194083


In [55]:
# For loop
def predict_single_loop(x, w, b):
    n = x.shape[0]
    f_w = 0
    for i in range(n):
        fw = x[i] * w[i]
        f_w += fw
    return f_w + b

# Vecter
def predict(x, w, b):
    return np.dot(x, w) + b

x_vec = X_train[0]
print(f'X[0] = {predict_single_loop(x_vec, w_init, b_init)}')
# predict(x_vec, w_init, b_init)
print(predict_single_loop(X_train, w_init, b_init))  #只能进行单行预测
print(predict(X_train, w_init, b_init))

X[0] = 459.9999976194083
[-17299.11    736.68    769.72   -314.67]
[460. 232. 178.]


### Cost Function

In [50]:
def compute_cost(X, y, w, b): 
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    cost = 0.0
    for i in range(m):                                
        f_wb_i = np.dot(X[i], w) + b           #(n,)(n,) = scalar (see np.dot)
        print(f_wb_i)
        cost = cost + (f_wb_i - y[i])**2       #scalar
    cost = cost / (2 * m)                      #scalar    
    return cost
# Compute and display cost using our pre-chosen optimal parameters. 
cost = compute_cost(X_train, y_train, w_init, b_init)
print(f'Cost at optimal w : {cost}')

459.9999976194083
231.9999983694081
177.99999898940825
Cost at optimal w : 1.5578904045996674e-12


In [68]:
def cost_function(x, y, w, b, predict_function):
    n = x.shape[0]
    diff = 0.0

    # y_predict = predict_function(x,w,b)
    # print(y_predict)
    for i in range(n):
        y_predict = np.dot(x[i], w) + b
        y_diff = y_predict - y[i]
        diff += y_diff ** 2
    cost_diff = diff / (2 * n)
    return cost_diff

# Compute and display cost using our pre-chosen optimal parameters. 
cost = cost_function(X_train, y_train, w_init, b_init, predict)
print(f'Cost at optimal w : {cost}')

Cost at optimal w : 1.5578904045996674e-12


### Gradient

In [73]:
def compute_gradient(X, y, w, b):
    m, n = X.shape   #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):
        err = (np.dot(X[i], w) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i, j]
        dj_db += err
    
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    return dj_db, dj_dw

#Compute and display gradient 
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251122999121e-06
dj_dw at initial w,b: 
 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]


In [75]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
      """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
        
    return w, b, J_history #return final w,b and J history for graphing

# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

508.0428356666666
341.9734763333332
205.829807
Iteration    0: Cost  2529.46   
409.61826419025846
275.7211279491076
165.95235435336176
428.68720011432623
288.55610299827777
173.67681308590332
424.99361963284133
286.069154883401
172.17915133692253
425.7099181316527
286.5505880563804
172.46813022224904
425.5718720076035
286.45694412727835
172.41097567891867
425.59934166499625
286.4747123447689
172.4208790874839
425.594745214712
286.4708962300766
172.41779112629666
425.59636103558097
286.471261756085
172.41722009053632
425.5967732946436
286.470817171397
172.41616149228665
425.5974186927273
286.4705295492178
172.4151974037543
425.5980188916787
286.4702115343462
172.41421505761537
425.5986278151317
286.46989942390036
172.41323630076027
425.59923501623365
286.46958618588485
172.41225690069578
425.5998425188986
286.4692731826371
172.41127767739843
425.6004499310267
286.46896015022503
172.41029847200846
425.6010573285826
286.4686471397807
172.4093193153001
425.6016646968506
286.4683341413973
