In [16]:
import numpy as np
import random

In [17]:
def value(theta, X_b, y):
    """总的损失函数(以均方误差损失函数为例)的值"""
    return ((y - (X_b @ theta)).T @ (y - (X_b @ theta))) / len(y)


def d_value(theta, X_b, y):
    """计算损失函数的梯度矩阵"""
    return X_b.T.dot(X_b.dot(theta) - y) / len(y) # 批量梯度下降(BGD)


def gradient_descent(X_b, y, 
                     initial_theta, # 初始值
                     lr,  # 学习率(没有对学习率进行一维搜索)
                     n_iters=1e4, # 最高迭代次数 
                     epsilon=1e-8): # 迭代结束时必须满足的精度
    theta = initial_theta
    cur_iter = 0

    while cur_iter < n_iters:
        gradient = d_value(theta, X_b, y)
        last_theta = theta
        theta = theta - lr * gradient # 迭代值更新过程
        if abs(value(theta, X_b, y) - value(last_theta, X_b, y)) < epsilon: # 比较2次迭代值的差异,达到精度要求时跳出循环
            break

        cur_iter += 1

    return theta

In [18]:
m = 1000
n = 50 # 特征数量

x_trian = np.random.normal(size=(m, n))
x_trian = np.hstack((x_trian, np.ones((m, 1))))
true_theta = np.array(random.sample(range(1, 52), 51)) # 包含截距项
y_trian = x_trian.dot(true_theta) + np.random.normal(0., 1., size=m)

In [19]:
%%time
initial_theta = np.ones(true_theta.shape)
predict_theta = gradient_descent(x_trian, y_trian, 
                                 initial_theta=initial_theta, lr=0.01)
predict_theta

Wall time: 163 ms


array([ 8.01119843, 41.99473351, 22.98704658, 29.00709237, 21.97585321,
       42.97486954,  1.0703511 , 20.02077025, 32.04732205, 36.99312378,
       30.01816003, 11.02390814,  1.97462439,  7.01014539, 16.02840779,
       34.97350184, 50.00929887, 12.97728938,  6.05471524,  2.9419934 ,
       47.9862502 , 14.98581965, 37.97418374,  9.98070889, 45.94401906,
       38.92124811, 24.96217429, 27.00014765, 45.02041181, 28.01184102,
       33.05885949, 25.97818555, 24.00338759, 16.98034987, 18.97514449,
       47.0031188 , 21.02927638,  4.98319893,  8.97245778, 34.03808235,
        4.00878041, 14.00452847, 18.01045899, 31.00049369, 39.98818097,
       36.02288317, 43.96151709, 48.97751688, 50.97778758, 41.00824437,
       11.99500973])

In [20]:
value(predict_theta, x_trian, y_trian) # 最终损失函数的值

0.88433009928767