In [None]:
import math, copy
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('./deeplearning.mplstyle')
from lab_utils_uni import plt_house_x, plt_contour_wgrad, plt_divergence, plt_gradients

In [None]:
# Load our data set
x_train = np.array([1.0, 2.0])   #features
y_train = np.array([300.0, 500.0])   #target value

计算损失函数
$$
J(w, b) = \frac{1}{2m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)})^2
$$

In [None]:
def compute_cost(x: np.ndarray, y: np.ndarray, w, b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        temp_cost = w * x[i] + b - y[i]
        cost += temp_cost ** 2
    return cost / (2 * m)

使用梯度下降计算函数，其中拟合函数为
$$
f_{w,b}(x^{(i)})= wx^{(i)} + b
$$
代价函数
$$
J(w, b) = \frac{1}{2m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)})^2
$$
梯度下降的计算函数为
$$
b := b - \alpha\frac{\partial}{\partial b}J(w, b) \\
w := w - \alpha\frac{\partial}{\partial w}J(w, b) \\
$$
其核心偏导数的整理结果为
$$
\frac{\partial}{\partial b}J(w, b) = \frac{1}{m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)})\\
\frac{\partial}{\partial w}J(w, b) = \frac{1}{m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)})x^{(i)}
$$
梯度下降的完整计算函数为
$$
b := b - \alpha\frac{1}{m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)}) \\
w := w - \alpha\frac{1}{m}\sum_{i=1}^m(wx^{(i)} + b - y^{(i)})x^{(i)} \\
$$

In [None]:
def compute_gradient(x: np.ndarray, y: np.ndarray, w, b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        f_dw = w * x[i] + b
        dj_dw_i = (f_dw - y[i]) * x[i]
        dj_db_i = f_dw - y[i]

        dj_db += dj_db_i
        dj_dw += dj_dw_i

    dj_db /= m
    dj_dw /= m
    return dj_dw, dj_db

图表展示

In [None]:
plt_gradients(x_train, y_train, compute_cost, compute_gradient)
plt.show()

In [None]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    J_history = []
    p_history = []
    b = b_in
    w = w_in

    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(x, y, w, b)
        b = b - alpha * dj_db
        w = w - alpha * dj_dw

        if i < 100000:
            J_history.append( cost_function(x, y, w , b))
            p_history.append([w,b])
        if i % math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    return w, b, J_history, p_history

In [None]:
w_init = 0.0
b_init = 0.0

iterations = 10**4
tmp_alpha = 10**-2
w_final, b_final, j_history, p_history = gradient_descent(x_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

预测

In [None]:
print(f"1000 sqft house prediction {w_final*1.0 + b_final:0.1f} Thousand dollars")
print(f"1200 sqft house prediction {w_final*1.2 + b_final:0.1f} Thousand dollars")
print(f"2000 sqft house prediction {w_final*2.0 + b_final:0.1f} Thousand dollars")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(12, 6))
plt_contour_wgrad(x_train, y_train, p_history, ax)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(12, 4))
plt_contour_wgrad(x_train, y_train, p_history, ax, w_range=[180, 220, 0.5], b_range=[80, 120, 0.5],
            contours=[1,5,10,20],resolution=0.5)

In [None]:
# initialize parameters
w_init = 0
b_init = 0
# set alpha to a large value
iterations = 10
tmp_alpha = 8.0e-1
# run gradient descent
w_final, b_final, j_history, p_history = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha,
                                                    iterations, compute_cost, compute_gradient)

In [None]:
plt_divergence(p_history, j_history,x_train, y_train)
plt.show()