In [None]:
import copy
import math

import numpy as np
import matplotlib.pyplot as plt
from lab_utils_multi import  load_house_data, run_gradient_descent
from lab_utils_multi import  norm_plot, plt_equal_scale, plot_cost_i_w
from lab_utils_common import dlc
np.set_printoptions(precision=2)
plt.style.use('./deeplearning.mplstyle')

# 准备数据集

In [None]:
# load the dataset
X_train, y_train = load_house_data()
X_features = ['size(sqft)','bedrooms','floors','age']

# 观察数据集特征

In [None]:
fig,ax=plt.subplots(1, 4, figsize=(12, 3), sharey=True)
for index in range(len(ax)):
    ax[index].scatter(X_train[:,index],y_train)
    ax[index].set_xlabel(X_features[index])
ax[0].set_ylabel("Price (1000's)")
plt.show()

# 预测函数
$$f_{\mathbf{w},b}(\mathbf{x})=\mathbf{w}\cdot\mathbf{x}+b$$

In [None]:
def predict_value(x: np.ndarray, w: np.ndarray, b):
    return np.dot(x, w) + b

# 计算损失函数
$$J(\mathbf{w},b)=\frac{1}{2m}\sum_{i=0}^{n-1}(f_{\mathbf{w},b}(\mathbf{x^{(i)}}) - y^{(i)})^2$$

In [None]:
def compute_cost(x: np.ndarray, y: np.ndarray, w: np.ndarray, b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        cost += (predict_value(x[i], w, b) - y[i]) ** 2
    return cost / (2 * m)

# 计算梯度
$$\frac{\partial}{\partial w_j}J(\mathbf{w}, b) = \frac{1}{m} \sum_{i=0}^{n-1}(f_{\mathbf{w},b}(\mathbf{x^{(i)}}) - y^{(i)})x_j^{(i)} $$
$$\frac{\partial}{\partial b}J(\mathbf{w}, b) = \frac{1}{m} \sum_{i=0}^{n-1}(f_{\mathbf{w},b}(\mathbf{x^{(i)}}) - y^{(i)}) $$

In [None]:
def compute_gradient(x: np.ndarray, y: np.ndarray, w: np.ndarray, b):
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0
    for i in range(m):
        err = predict_value(x[i], w, b) - y[i]
        for j in range(n):
            dj_dw[j] += err * x[i, j]
        dj_db += err

    return dj_dw / m, dj_db / m

# 梯度下降
$$
w:= w - \frac{\partial}{\partial w_j}J(\mathbf{w}, b) \\
b:= b - \frac{\partial}{\partial b}J(\mathbf{w}, b)
$$

In [None]:
def gradient_descend(x, y, w_in, b_in, alpha, num_iterations, cost_function, gradient_function):
    w = copy.deepcopy(w_in)
    b = b_in
    J_history = []
    for i in range(num_iterations):
        dj_dw, dj_db = gradient_function(x, y, w, b)
        w -= alpha * dj_dw
        b -= alpha * dj_db
        cost = cost_function(x, y, w, b)
        if i < 100000:
            J_history.append(cost)
        if i % math.ceil(num_iterations / 10) == 0:
            print(f'循环执行了{i}次， 当前预测函数误差为:{cost:0.2f}')
    return w, b, J_history

# 训练模型

In [None]:
m, n = X_train.shape
alpha = 9.1e-7
num_iterations = 10000

w, b, J_history = gradient_descend(X_train, y_train, np.zeros((n,)), 0, alpha, num_iterations, compute_cost, compute_gradient)

# 绘制图形

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_history)
ax2.plot(100 + np.arange(len(J_history[100:])), J_history[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost')
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step')
plt.show()

# 特征缩放 - z-score 标准化(zero-mean normalization)


In [None]:
def zscore_normalize_features(X):
    """
    计算矩阵X的z-score标准化(以列为特征)

    Args:
      X (ndarray (m,n))     : 输入数据集, m个训练样本, n个特征向量

    Returns:
      X_norm (ndarray (m,n)): input normalized by column
      mu (ndarray (n,))     : mean of each feature
      sigma (ndarray (n,))  : standard deviation of each feature
    """
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    X_norm = (X - mu) / sigma
    return X_norm, mu, sigma

对标准化进行展示

In [None]:
X_norm, mu, sigma = zscore_normalize_features(X_train)
X_mean = X_train - mu

_,ax=plt.subplots(1, 3, figsize=(12, 3))
ax[0].scatter(X_train[:,0], X_train[:,3])
ax[0].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[0].set_title("unnormalized")
ax[0].axis('equal')

ax[1].scatter(X_mean[:,0], X_mean[:,3])
ax[1].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[1].set_title(r"X - $\mu$")
ax[1].axis('equal')

ax[2].scatter(X_norm[:,0], X_norm[:,3])
ax[2].set_xlabel(X_features[0]); ax[0].set_ylabel(X_features[3]);
ax[2].set_title(r"Z-score normalized")
ax[2].axis('equal')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
fig.suptitle("distribution of features before, during, after normalization")
plt.show()