# Ridge回归

## 基于Numpy的实现

In [1]:
import numpy as np

### 模型主体

In [2]:
### 定义Ridge回归损失函数
def l2_loss(X, y, w, b, alpha):
    """
    输入:
    X: 输入变量矩阵
    y: 输出标签向量
    w: 变量参数权重矩阵
    b: 偏置
    alpha: 正则化系数
    输出:
    y_hat: 线性模型预测输出
    loss: 均方损失值
    dw: 权重系数一阶偏导
    db: 偏置一阶偏导
    """
    # 训练样本量
    num_train = X.shape[0]
    # 回归模型预测输出
    y_hat = np.dot(X, w) + b
    # L1损失函数
    loss = np.sum((y_hat - y) ** 2) / num_train + alpha * np.sum(np.square(w))
    # 基于向量化符号函数的参数梯度计算
    dw = 2 * np.dot(X.T, y_hat - y) / num_train + 2 * alpha * w
    db = 2 * np.sum(y_hat - y) / num_train
    return y_hat, loss, dw, db

### 训练过程

In [3]:
### 初始化模型参数
def initialize_params(dims):
    """
    输入:
    dims: 训练数据变量维度
    输出:
    w: 初始化权重系数值
    b: 初始化偏置系数值
    """
    # 初始化权重系数为零向量
    w = np.zeros((dims, 1))
    # 初始化偏置系数为零
    b = 0
    return w, b

In [4]:
### 定义Ridge回归模型的训练过程
def ridge_train(X, y, learning_rate=0.01, epochs=1000):
    """
    输入:
    X: 输入变量矩阵
    y: 输出标签向量
    learning_rate: 学习率
    epochs: 训练迭代次数
    输出:
    loss_his: 每次迭代的L2损失列表
    params: 优化后的参数字典
    grads: 优化后的参数梯度字典
    """
    # 训练特征数
    num_feature = X.shape[1]
    # 初始化模型参数
    w, b = initialize_params(num_feature)
    # 记录训练损失的空列表
    loss_his = []
    # 迭代训练
    for i in range(1, epochs):
        # 计算当前迭代的预测值、损失和梯度
        y_hat, loss, dw, db = l2_loss(X, y, w, b, 0.1)
        # 基于梯度下降法的参数更新
        w -= learning_rate * dw
        b -= learning_rate * db
        # 记录当前迭代的损失
        loss_his.append(loss)
        # 每300次迭代打印当前损失信息
        if i % 300 == 0:
            print('epoch %d loss %f' % (i, loss))
        # 将当前迭代步优化后的参数保存到字典中
        params = {
            'w': w,
            'b': b
        }
        grads = {
            'dw': dw,
            'db': db
        }
    return loss_his, params, grads

### 数据测试


In [5]:
# 读取示例数据
data = np.genfromtxt('example.dat', delimiter=',')
data.shape

(101, 101)

In [6]:
data[0]

array([-1.14558, -1.29249,  0.84911,  0.36008,  0.26068,  2.51167,
        2.31855,  0.60805,  0.3428 , -0.28903,  0.70398,  1.18534,
       -1.44321, -0.72979, -0.06026, -0.50449, -0.3148 ,  0.94552,
       -0.32453, -0.09248,  0.84448,  0.32551,  0.57684, -0.12461,
       -0.5531 , -1.37074, -0.85719,  0.05802,  0.27486, -0.09269,
        0.28742, -1.29854,  0.66856,  0.02223,  0.28599, -0.28722,
        0.54304,  0.67301, -0.67343,  1.19857,  1.35595,  1.53356,
       -0.61245,  1.91698,  1.89642,  1.28004,  0.60073, -0.37792,
        0.34903,  2.17043, -1.52004, -0.75143, -0.54607, -0.46285,
        0.28424,  0.02458, -2.31083, -0.43165,  0.87581, -0.07552,
        0.66682,  1.65492, -0.71924,  1.39364,  0.5405 ,  0.55335,
        1.00319, -0.76292, -0.26241, -1.32323, -0.15256, -0.30767,
        0.37729,  0.02091,  1.05892, -0.30399,  1.76204, -1.38097,
        0.96642, -0.34674,  0.92688, -1.05426, -0.50192,  0.11954,
        0.97006,  2.01984,  0.00975,  0.65729, -1.01224,  0.93

In [7]:
# 选择特征与标签
x = data[:, 0:100]
y = data[:, 100].reshape(-1, 1)
# 加一列
X = np.column_stack((np.ones((x.shape[0], 1)), x))
# 划分训练集与测试集
X_train, y_train = X[:70], y[:70]
X_test, y_test = X[70:], y[70:]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(70, 101) (70, 1) (31, 101) (31, 1)


In [8]:
# 执行训练示例
loss_list, params, grads = ridge_train(X_train, y_train, 0.01, 3000)

epoch 300 loss 0.703214
epoch 600 loss 0.688477
epoch 900 loss 0.686890
epoch 1200 loss 0.686525
epoch 1500 loss 0.686367
epoch 1800 loss 0.686275
epoch 2100 loss 0.686217
epoch 2400 loss 0.686180
epoch 2700 loss 0.686156


In [9]:
# 设置NumPy数组显示格式为小数点后3位，且不采用科学计数法
# np.set_printoptions(precision=3, suppress=True)
# 获取训练参数
params

{'w': array([[-0.01274401],
        [-0.13577965],
        [ 0.44995799],
        [ 0.29598226],
        [ 0.13251107],
        [ 0.73391187],
        [-0.11724036],
        [ 0.73770615],
        [-0.31029702],
        [ 0.04398261],
        [-0.022068  ],
        [ 0.63371473],
        [ 0.04351178],
        [ 0.70299436],
        [ 0.09163995],
        [-0.30449914],
        [ 0.09403611],
        [ 0.36474124],
        [-0.04799417],
        [ 0.70366001],
        [ 0.06494192],
        [-0.00480895],
        [-0.10705298],
        [ 0.41602075],
        [-0.03373413],
        [-0.06878905],
        [-0.01149144],
        [-0.03381789],
        [-0.06205757],
        [ 0.38041767],
        [ 0.09568254],
        [ 0.14340883],
        [ 0.1455656 ],
        [ 0.16151416],
        [-0.14227541],
        [-0.04532967],
        [ 0.12515192],
        [-0.05237463],
        [-0.04896764],
        [-0.19441383],
        [-0.00279151],
        [ 0.01958285],
        [ 0.06333959],
      

## 基于sklearn的实现

In [10]:
from sklearn import linear_model

In [11]:
# 创建Ridge模型实例
sk_ridge = linear_model.Ridge(alpha=1.0)
# 对训练集进行拟合
sk_ridge.fit(X_train, y_train)
# 打印模型相关系数
print("sklearn Ridge intercept :", sk_ridge.intercept_)
print("\nsklearn Ridge coefficients :\n", sk_ridge.coef_)

sklearn Ridge intercept : [-0.40576153]

sklearn Ridge coefficients :
 [[ 0.00000000e+00 -2.01786172e-01  5.45135248e-01  3.28370796e-01
   7.88208577e-02  8.63329630e-01 -1.28629181e-01  8.98548367e-01
  -4.15384520e-01  1.58905870e-01 -2.93807956e-02  6.32380717e-01
   4.21771945e-02  9.24308741e-01  1.20277300e-01 -3.85333806e-01
   1.63068579e-01  3.98963430e-01 -2.55902692e-02  8.88008417e-01
   3.69510302e-02  5.63702626e-04 -1.74758205e-01  4.51826721e-01
  -7.30107159e-02 -1.35017481e-01  5.39686001e-02 -4.02425081e-03
  -6.07507156e-02  3.75631827e-01  8.57162815e-02  1.45771573e-01
   1.44022204e-01  1.98972072e-01 -1.74729670e-01 -4.55411141e-02
   2.10931708e-01 -4.20589474e-02 -1.16955409e-01 -3.48704701e-01
   9.24987738e-02 -3.59919666e-02  3.12791851e-02  9.89341477e-02
  -3.20373964e-02  5.01884867e-04  2.52601261e-02 -1.43870413e-01
  -2.01630343e-01 -2.04659068e-02  1.39960583e-01 -2.40332862e-01
   1.64551174e-01  1.05411007e-02 -1.27446721e-01 -8.05713152e-02
   3.