In [1]:
!pip install -r requerements.txt



In [2]:
import math
import numpy as np

In [3]:
x_train = np.array([1.0, 2.0, 3.0])
y_train = np.array([300.0, 500.0, 700.0])

print(f'x_train = { x_train }')
print(f'y_train = { y_train }')

x_train = [1. 2. 3.]
y_train = [300. 500. 700.]


In [4]:
def compute_cost(x, y, w, b):
  m = x.shape[0]
  cost = 0

  for i in range(m):
    f_wb = w * x[i] + b
    cost = cost + (f_wb - y[i]) ** 2
  total_cost = 1 / (2 * m) * cost
  return total_cost


## Gradient descent summary

$$J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})^2\tag{2}$$ 

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline
\;  w &= w -  \alpha \frac{\partial J(w,b)}{\partial w} \tag{3}  \; \newline 
 b &= b -  \alpha \frac{\partial J(w,b)}{\partial b}  \newline \rbrace
\end{align*}$$
where, parameters $w$, $b$ are updated simultaneously.  
The gradient is defined as:
$$
\begin{align}
\frac{\partial J(w,b)}{\partial w}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})x^{(i)} \tag{4}\\
  \frac{\partial J(w,b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) \tag{5}\\
\end{align}
$$

In [5]:
def compute_gradient(x, y, w, b):
  '''
  Computes the gradient for linear regression

  Args:
    x (ndarray (m, )): Data, m examples
    y (ndarray (m, )): target values
    w, b (scalar)    : model parameters

    Returns
      dj_dw (scaler): The gradient of the cost w.r.t. the parameters w
      dj_db (scaler): The gradient of the cost w.r.t. the parameter b
  '''

  m = x.shape[0]    
  dj_dw = 0
  dj_db = 0
    
  for i in range(m):  
    f_wb = w * x[i] + b 
    dj_dw_i = (f_wb - y[i]) * x[i] 
    dj_db_i = f_wb - y[i] 
    dj_db += dj_db_i
    dj_dw += dj_dw_i 
  dj_dw = dj_dw / m 
  dj_db = dj_db / m 
      
  return dj_dw, dj_db


In [6]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
  '''
  Preforms gredient descent to fit w, b. Updates w, b by taking num_iters gradient steps with learning rate alpha

  Args:
    x (ndarray (m, ))   : Data, m examples
    y (ndarray (m, ))   : target values
    w_in, b_in (scalar) : initial values of model parameters
    alpha (float)       : Learning rate
    num_iters (int)     : number of iterations to run gradient descent
    cost_function       : function to call to produce cost
    gradient_function   : function to call to produce gradient

  Returns:
    w (scalar)      : Updated value of parameter after running gradient descent
    b (scalar)      : Updated value of parameter after running gradient descent
    J_history (List): History of cost values
    p_history (list): History of parameters [w, b]
  '''

  J_history = []
  p_history = []
  w = w_in
  b = b_in

  for i in range(num_iters):
    dj_dw, dj_db = gradient_function(x, y, w, b)

    w = w - alpha * dj_dw
    b = b - alpha * dj_db

    if i < 100000:
      J_history.append(cost_function(x, y, w, b))
      p_history.append([w, b])
    
    if i % math.ceil(num_iters / 10) == 0:
      print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e}  ",
            f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
            f"w: {w: 0.3e}, b: {b: 0.5e}")
      
  return w, b, J_history, p_history


In [7]:
w_init = 0
b_init = 0

iterations = 10000
tmp_alpha  = 1.0e-2

# run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f"\n (w, b) found by gradient descent: ({w_final: 8.4f}, {b_final: 8.4f})")

Iteration    0: Cost 1.23e+05   dj_dw: -1.133e+03, dj_db: -5.000e+02   w:  1.133e+01, b:  5.00000e+00
Iteration 1000: Cost 6.55e-01   dj_dw:  1.600e-01, dj_db: -3.636e-01   w:  2.013e+02, b:  9.69785e+01
Iteration 2000: Cost 5.91e-02   dj_dw:  4.805e-02, dj_db: -1.092e-01   w:  2.004e+02, b:  9.90924e+01
Iteration 3000: Cost 5.33e-03   dj_dw:  1.443e-02, dj_db: -3.281e-02   w:  2.001e+02, b:  9.97274e+01
Iteration 4000: Cost 4.81e-04   dj_dw:  4.335e-03, dj_db: -9.855e-03   w:  2.000e+02, b:  9.99181e+01
Iteration 5000: Cost 4.34e-05   dj_dw:  1.302e-03, dj_db: -2.960e-03   w:  2.000e+02, b:  9.99754e+01
Iteration 6000: Cost 3.92e-06   dj_dw:  3.912e-04, dj_db: -8.893e-04   w:  2.000e+02, b:  9.99926e+01
Iteration 7000: Cost 3.53e-07   dj_dw:  1.175e-04, dj_db: -2.671e-04   w:  2.000e+02, b:  9.99978e+01
Iteration 8000: Cost 3.19e-08   dj_dw:  3.530e-05, dj_db: -8.024e-05   w:  2.000e+02, b:  9.99993e+01
Iteration 9000: Cost 2.88e-09   dj_dw:  1.060e-05, dj_db: -2.410e-05   w:  2.000e+

### Predictions

In [8]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

1000 sqft house prediction 300.0 Thousand dollars
1200 sqft house prediction 340.0 Thousand dollars
2000 sqft house prediction 500.0 Thousand dollars


In [9]:

w_init = 0
b_init = 0

iterations = 10
# increse learnign rate
tmp_alpha  = 8.0e-1

# run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f"\n (w, b) found by gradient descent: ({w_final: 8.4f}, {b_final: 8.4f})")

Iteration    0: Cost 1.63e+06   dj_dw: -1.133e+03, dj_db: -5.000e+02   w:  9.067e+02, b:  4.00000e+02
Iteration    1: Cost 1.93e+07   dj_dw:  3.898e+03, dj_db:  1.713e+03   w: -2.212e+03, b: -9.70667e+02
Iteration    2: Cost 2.28e+08   dj_dw: -1.340e+04, dj_db: -5.894e+03   w:  8.505e+03, b:  3.74436e+03
Iteration    3: Cost 2.69e+09   dj_dw:  4.604e+04, dj_db:  2.025e+04   w: -2.833e+04, b: -1.24586e+04
Iteration    4: Cost 3.18e+10   dj_dw: -1.583e+05, dj_db: -6.962e+04   w:  9.828e+04, b:  4.32368e+04
Iteration    5: Cost 3.76e+11   dj_dw:  5.440e+05, dj_db:  2.393e+05   w: -3.369e+05, b: -1.48195e+05
Iteration    6: Cost 4.44e+12   dj_dw: -1.870e+06, dj_db: -8.225e+05   w:  1.159e+06, b:  5.09793e+05
Iteration    7: Cost 5.25e+13   dj_dw:  6.426e+06, dj_db:  2.827e+06   w: -3.982e+06, b: -1.75183e+06
Iteration    8: Cost 6.20e+14   dj_dw: -2.209e+07, dj_db: -9.717e+06   w:  1.369e+07, b:  6.02176e+06
Iteration    9: Cost 7.33e+15   dj_dw:  7.592e+07, dj_db:  3.340e+07   w: -4.705e+

In [10]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

1000 sqft house prediction -67747572.1 Thousand dollars
1200 sqft house prediction -77157600.5 Thousand dollars
2000 sqft house prediction -114797713.8 Thousand dollars
