In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy, math

In [19]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [20]:
X_train

array([[2104,    5,    1,   45],
       [1416,    3,    2,   40],
       [ 852,    2,    1,   35]])

### Parameter vector w,b 

In [21]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"Initial bias: {b_init}, weights: {w_init}")

Initial bias: 785.1811367994083, weights: [  0.39133535  18.75376741 -53.36032453 -26.42131618]


### Cost function

In [22]:
def cost_function(X,y,w,b):
    m = X.shape[0]
    cost = 0.0
    for i in range (m):
        f = np.dot(X[i], w) + b
        cost = cost + (f-y[i])**2
    cost = cost / (2*m)
    return cost

In [23]:
print(cost_function(X_train, y_train, w_init, b_init))

1.5578904045996674e-12


In [24]:
def predict(vec, w,b):
    return np.dot(vec, w) + b


x_vec = X_train[0,:]
print(predict(x_vec, w_init, b_init))


459.9999976194083


### Gradient  with Multiple Variables

In [25]:
def computer_gradinet(X,y,w,b):
    m,n = X.shape 
    dj_dw = np.zeros((n,))
    dj_db = 0.0
    for i in range(m):
        f = (np.dot(X[i],w)+b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + f*X[i,j]
        dj_db = dj_db + f
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    return dj_db, dj_dw

### Gradient descent with multiple variables

In [36]:
def gradient_descent(X, y, w_in, b_in, cost_function, computer_gradinet, alpha, num_iters): 
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    for i in range(num_iters):
        dj_db, dj_dw = computer_gradinet(X,y,w,b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
        if i < 100000:
            J_history.append(cost_function(X,y,w,b))
        if i % 1000 == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
    return w, b, J_history

In [37]:

initial_w = np.zeros_like(w_init)
initial_b = 0.
iterations = 1000
alpha = 5.0e-7
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    cost_function, computer_gradinet, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  2529.46   
b,w found by gradient descent: -0.00,[ 0.20396569  0.00374919 -0.0112487  -0.0658614 ] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
