In [6]:
# Gradient Descent for Linear Regression
# yhat = wx + b 
# mse = (y-yhat)**2 / 2m 
import numpy as np

np.set_printoptions(suppress = True,
   formatter = {'float_kind':'{:f}'.format})

## Gradient Descent With Multiple Variables
$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{1}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (\hat{y}^{(i)} - y^{(i)})x_{j}^{(i)} \tag{2}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (\hat{y}^{(i)} - y^{(i)}) \tag{3}
\end{align}
$$
* m is the number of training examples in the data set

    
* $\hat{y}^{(i)} = f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target value

In [14]:
z = np.array([[2, 3], [4, 5]])
print(z)

print(z.T)

print(np.dot(z, z.T))

a = [1, 2, 3]
print(z * 3)

[[2 3]
 [4 5]]
[[2 4]
 [3 5]]
[[13 23]
 [23 41]]
[[ 6  9]
 [12 15]]


In [8]:
# Create gradient descent function
def gradient_descend(X, y, w, b, learning_rate): 
    m = X.shape[0]
    y_pred = np.dot(X, w) + b
    #print ("y_pred", y_pred)

    # X => Colums of features & Rows number of samples
    # y => Rows of samples

    dw = (1/m) * np.dot(X.T, (y_pred - y))
    db = (1/m) * np.sum(y_pred - y)

    """sumDw = 0
    sumDb = 0
    for i in range(m):
        sumDw += (y_pred[i]-y[i])*X[i]
        sumDb += (y_pred[i]-y[i]) """
        
    #dw = (1/m) * sumDw 
   #db = (1/m) * sumDb
    #print ("dw", dw)
    #print ("db", db)

    # Make an update to the w parameter 
    w = w - (learning_rate * dw)
    b = b - (learning_rate * db)
    return w, b

In [9]:
def fit(X, y, learning_rate = 0.001):
    # Parameters
    n_features = X.shape[1]
    w = np.zeros(n_features)
    b = 0.0 
    
    # Iteratively make updates
    for epoch in range(1000): 
        w, b = gradient_descend(X, y, w, b, learning_rate)
        # Debugging - Calculate the mse and print it every 100 epochs
        if epoch % 100 == 0:
            y_pred = np.dot(X, w) + b
            mse = np.mean((y_pred-y)**2)
            print(f'{epoch} mse is {mse}, paramters w:{w}, b:{b}')
    
    return w, b

In [10]:
def predict(X, w, b):
    y_pred = np.dot(X, w) + b
    return y_pred

In [11]:
# Testing the model
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y = np.array([6, 10, 12, 23])
#X = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50])
#y = np.array([5, 8, 16, 19, 30, 35, 30, 43, 41, 44, 58])

print(X.shape, y.shape)
w, b = fit(X, y, learning_rate=0.001)
print("w", w)
print("b", b)

X_test = np.array([[5, 6], [6, 7]])
y_pred = predict(X_test, w, b)
print("X_test:", X_test)
print("y_pred:", y_pred)

(4, 2) (4,)
0 mse is 193.80047896875004, paramters w:[0.038500 0.051250], b:0.012750000000000001
100 mse is 8.782391827792338, paramters w:[1.593807 2.087506], b:0.4936992330167193
200 mse is 6.477689422407822, paramters w:[1.793954 2.300350], b:0.5063963785479015
300 mse is 6.387237602795848, paramters w:[1.844496 2.312753], b:0.4682566006192434
400 mse is 6.325705968743046, paramters w:[1.878092 2.303289], b:0.4251966871458539
500 mse is 6.266565919823646, paramters w:[1.909346 2.291604], b:0.3822574194988948
600 mse is 6.209427323797611, paramters w:[1.939873 2.279855], b:0.3399821611726684
700 mse is 6.154218829402291, paramters w:[1.969858 2.268278], b:0.2984196139735879
800 mse is 6.10087519536888, paramters w:[1.999330 2.256894], b:0.2575642405869131
900 mse is 6.049333428973551, paramters w:[2.028300 2.245705], b:0.21740472133867594
w [2.056494 2.234815]
b 0.1783206893367041
X_test: [[5 6]
 [6 7]]
y_pred: [23.869678 28.160986]
