In [3]:
# Gradient Descent for Linear Regression
# yhat = wx + b 
# mse = (y-yhat)**2 / 2m 
import numpy as np

np.set_printoptions(suppress = True,
   formatter = {'float_kind':'{:f}'.format})

In [14]:
a = np.array([1, 2, 3, 4])
print((a * 3) + 2)

[ 5  8 11 14]


In lecture, *gradient descent* was described as:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline
\;  w &= w -  \alpha \frac{\partial J(w,b)}{\partial w} \tag{3}  \; \newline 
 b &= b -  \alpha \frac{\partial J(w,b)}{\partial b}  \newline \rbrace
\end{align*}$$
where, parameters $w$, $b$ are updated simultaneously.  
The gradient is defined as:
$$
\begin{align}
\frac{\partial J(w,b)}{\partial w}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} ( \hat{y}^{(i)} - y^{(i)})x^{(i)} \tag{4}\\
  \frac{\partial J(w,b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (\hat{y}^{(i)} - y^{(i)}) \tag{5}\\
\end{align}
$$

In [4]:
# Create gradient descent function
def gradient_descend(X, y, w, b, learning_rate): 
    m = X.shape[0]
    y_pred = np.dot(X, w) + b
    #print ("y_pred", y_pred)

    sumDw = 0
    sumDb = 0
    for i in range(m):
        sumDw += (y_pred[i]-y[i])*X[i]
        sumDb += (y_pred[i]-y[i])
        
    dw = (1/m) * sumDw 
    db = (1/m) * sumDb
    #print ("dw", dw)
    #print ("db", db)

    # Make an update to the w parameter 
    w = w - (learning_rate * dw)
    b = b - (learning_rate * db)
    return w, b

In [17]:
def fit(X, y, learning_rate = 0.001):
    # Parameters
    w = 0.0 
    b = 0.0 
    
    # Iteratively make updates
    for epoch in range(10000): 
        w, b = gradient_descend(X, y, w, b, learning_rate)
        # Debugging - Calculate the mse and print it every 100 epochs
        if epoch % 100 == 0:
            y_pred = np.dot(X, w) + b
            mse = np.mean((y_pred-y)**2)
            print(f'{epoch} mse is {mse}, paramters w:{w}, b:{b}')
    
    return w, b

In [6]:
def predict(X, w, b):
    y_pred = np.dot(X, w) + b
    return y_pred

In [18]:
# Testing the model
X = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50])
y = np.array([5, 8, 16, 19, 30, 35, 30, 43, 41, 44, 58])

print(X.shape, y.shape)
w, b = fit(X, y, learning_rate=0.001)
print("w", w)
print("b", b)

X_test = np.array([33, 56])
y_pred = predict(X_test, w, b)
print("X_test:", X_test)
print("y_pred:", y_pred)

(11,) (11,)
0 mse is 40.0004658347107, paramters w:0.9895454545454546, b:0.02990909090909091
100 mse is 22.1984113057095, paramters w:1.1254126280632555, b:0.19418203812112242
200 mse is 21.712903688074714, paramters w:1.1209612381068403, b:0.34992985507624
300 mse is 21.254343940743905, paramters w:1.1166351477898266, b:0.5012936154797735
400 mse is 20.821236334942622, paramters w:1.1124308301240635, b:0.6483967236414796
500 mse is 20.41216816161338, paramters w:1.1083448574005785, b:0.7913591102335173
600 mse is 20.025805123445725, paramters w:1.1043738983950269, b:0.9302973300678952
700 mse is 19.66088698266922, paramters w:1.1005147156517991, b:1.0653246571216322
800 mse is 19.316223450413812, paramters w:1.0967641628445792, b:1.196551176887114
900 mse is 18.990690304229354, paramters w:1.0931191822111979, b:1.3240838761229292
1000 mse is 18.68322572110012, paramters w:1.0895768020606906, b:1.4480267300783645
1100 mse is 18.392826813993853, paramters w:1.0861341343505284, b:1.56848