In [1]:
import numpy as np  #导入NumPy数学工具箱
import pandas as pd  #导入Pandas数据处理工具箱

# 读入数据并显示前面几行的内容，这是为了确保我们的文件读入正确性
# 示例代码是在Kaggle中数据集中读入文件，如果在本机中需要指定具体本地路径
df_ads = pd.read_csv('./dataset/advertising.csv')
df_ads.head()

Unnamed: 0,wechat,weibo,others,sales
0,304.4,93.6,294.4,9.7
1,1011.9,34.4,398.4,16.7
2,1091.1,32.8,295.2,17.3
3,85.5,173.6,403.2,7.0
4,1047.0,302.4,553.6,22.1


In [2]:
X = np.array(df_ads)  # 构建特征集，含全部特征
X = np.delete(X, [3], axis=1)  # 删除掉标签
y = np.array(df_ads.sales)  #构建标签集，销售金额
print("张量X的阶:", X.ndim)
print("张量X的形状:", X.shape)
print(X)

张量X的阶: 2
张量X的形状: (200, 3)
[[ 304.4   93.6  294.4]
 [1011.9   34.4  398.4]
 [1091.1   32.8  295.2]
 [  85.5  173.6  403.2]
 [1047.   302.4  553.6]
 [ 940.9   41.6  155.2]
 [1277.2  111.2  296. ]
 [  38.2  217.6   16.8]
 [ 342.6  162.4  260. ]
 [ 347.6    6.4  118.4]
 [ 980.1  188.8  460.8]
 [  39.1   16.8    8. ]
 [  39.6  391.2  600. ]
 [ 889.1  381.6  423.2]
 [ 633.8  116.    81.6]
 [ 527.8   61.6  184.8]
 [ 203.4  206.4  164.8]
 [ 499.6  382.4  411.2]
 [ 633.4  114.4  204.8]
 [ 437.7  118.4  311.2]
 [ 334.   136.   103.2]
 [1132.   216.8  183.2]
 [ 841.3  351.2   13.6]
 [ 435.4   11.2   59.2]
 [ 627.4  371.2  472. ]
 [ 599.2  147.2  276.8]
 [ 321.2  128.   326.4]
 [ 571.9  295.2  633.6]
 [ 758.9  336.    28.8]
 [ 799.4  123.2   19.2]
 [ 314.    74.4    7.2]
 [ 108.3  280.8  527.2]
 [ 339.9  395.2  365.6]
 [ 619.7  153.6  132.8]
 [ 227.5   92.8  147.2]
 [ 347.2  220.   128. ]
 [ 774.4   62.4  281.6]
 [1003.3  265.6  303.2]
 [  60.1  127.2  396.8]
 [  88.3  128.   178.4]
 [1280.4  316.

In [3]:
y = y.reshape(-1, 1)  #通过reshape函数把向量转换为矩阵，-1就是len(y),返回样本个数
print("张量y的形状:", y.shape)

张量y的形状: (200, 1)


In [4]:
# 将数据集进行80%（训练集）和20%（验证集）的分割
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=0)

In [5]:
def scaler(train, test):  # 定义归一化函数 ，进行数据压缩
    # 数据的压缩
    min = train.min(axis=0)  # 训练集最小值
    max = train.max(axis=0)  # 训练集最大值
    gap = max - min  # 最大值和最小值的差
    train -= min  # 所有数据减最小值
    train /= gap  # 所有数据除以大小值差
    test -= min  #把训练集最小值应用于测试集
    test /= gap  #把训练集大小值差应用于测试集
    return train, test  # 返回压缩后的数据

In [6]:
def min_max_gap(train):  # 计算训练集最大，最小值以及他们的差，用于后面反归一化过程
    min = train.min(axis=0)  # 训练集最小值
    max = train.max(axis=0)  # 训练集最大值
    gap = max - min  # 最大值和最小值的差
    return min, max, gap


y_min, y_max, y_gap = min_max_gap(y_train)

In [7]:
X_train_original = X_train.copy()  # 保留一份训练集数据副本，用于对要预测数据归一化

In [8]:
X_train, X_test = scaler(X_train, X_test)  # 对特征归一化
y_train, y_test = scaler(y_train, y_test)  # 对标签也归一化

In [9]:
x0_train = np.ones((len(X_train), 1))  # 构造X_train长度的全1数组配合对Bias的点积
X_train = np.append(x0_train, X_train, axis=1)  #把X增加一系列的1
x0_test = np.ones((len(X_test), 1))  # 构造X_test长度的全1数组配合对Bias的点积
X_test = np.append(x0_test, X_test, axis=1)  #把X增加一系列的1
print("张量X的形状:", X_train.shape)
print(X_train)

张量X的形状: (160, 4)
[[1.         0.39995488 0.1643002  0.42568162]
 [1.         0.72629521 0.83975659 0.34564644]
 [1.         0.22746071 0.31845842 0.35620053]
 [1.         0.66952402 0.05679513 0.30167106]
 [1.         0.81803143 0.98782961 0.38698329]
 [1.         0.35341003 0.27789047 0.09322779]
 [1.         0.24355215 0.40567951 0.28320141]
 [1.         0.44852996 0.83975659 0.40105541]
 [1.         0.44544703 0.09330629 0.07915567]
 [1.         0.71636965 0.86612576 0.294635  ]
 [1.         0.46597489 0.03245436 0.07651715]
 [1.         0.46319272 0.03651116 0.23131047]
 [1.         0.11594857 0.81135903 0.10202287]
 [1.         0.07353936 0.78498986 0.07915567]
 [1.         0.97706594 0.85192698 0.44766931]
 [1.         0.45770359 0.93509128 0.51627089]
 [1.         0.22204677 0.18255578 0.00527704]
 [1.         0.1898639  0.23732252 0.3764292 ]
 [1.         0.94871795 0.79716024 0.48812665]
 [1.         0.49808256 0.71602434 0.05013193]
 [1.         0.70682006 0.59229209 0.079155

In [10]:
def loss_function(X, y, W):  # 手工定义一个MSE均方误差函数,W此时是一个向量
    y_hat = X.dot(W.T)  # 点积运算 h(x)=w_0*x_0 + w_1*x_1 + w_2*x_2 + w_3*x_3
    loss = y_hat.reshape((len(y_hat), 1)) - y  # 中间过程,求出当前W和真值的差异
    cost = np.sum(loss ** 2) / (2 * len(X))  # 这是平方求和过程, 均方误差函数的代码实现
    return cost  # 返回当前模型的均方误差值

In [11]:
def gradient_descent(X, y, W, lr, iterations):  # 定义梯度下降函数
    l_history = np.zeros(iterations)  # 初始化记录梯度下降过程中损失的数组
    W_history = np.zeros((iterations, len(W)))  # 初始化权重数组
    for iter in range(iterations):  # 进行梯度下降的迭代，就是下多少级台阶
        y_hat = X.dot(W.T)  # 这个是向量化运行实现的假设函数
        loss = y_hat.reshape((len(y_hat), 1)) - y  # 中间过程, y_hat和y真值的差
        derivative_W = X.T.dot(loss) / len(X)  #求出多项式的梯度向量
        derivative_W = derivative_W.reshape(len(W))
        W = W - lr * derivative_W  # 结合下降速率更新权重
        l_history[iter] = loss_function(X, y, W)  # 损失的历史记录
        W_history[iter] = W  # 梯度下降过程中权重的历史记录
    return l_history, W_history  # 返回梯度下降过程数据

In [12]:
#首先确定参数的初始值
iterations = 300;  # 迭代300次
alpha = 0.15;  #学习速率设为0.15
weight = np.array([0.5, 1, 1, 1])  # 权重向量，w[0] = bias
#计算一下初始值的损失
print('当前损失：', loss_function(X_train, y_train, weight))

当前损失： 0.8039183733604857


In [13]:
# 定义线性回归模型
def linear_regression(X, y, weight, alpha, iterations):
    loss_history, weight_history = gradient_descent(X, y,
                                                    weight,
                                                    alpha, iterations)
    print("训练最终损失:", loss_history[-1])  # 打印最终损失
    y_pred = X.dot(weight_history[-1])  # 进行预测
    traning_acc = 100 - np.mean(np.abs(y_pred - y)) * 100  # 计算准确率
    print("线性回归训练准确率: {:.2f}%".format(traning_acc))  # 打印准确率
    return loss_history, weight_history  # 返回训练历史记录

In [14]:
# 调用刚才定义的线性回归模型
loss_history, weight_history = linear_regression(X_train, y_train,
                                                 weight, alpha, iterations)  #训练机器

训练最终损失: 0.0025067234661860244
线性回归训练准确率: 75.67%


In [15]:
print("权重历史记录：", weight_history)
print("损失历史记录：", loss_history)

权重历史记录： [[0.31788555 0.90705968 0.90344561 0.94365907]
 [0.17864769 0.83591028 0.82804373 0.89945723]
 [0.07235191 0.78150141 0.76891205 0.86458972]
 ...
 [0.02844653 0.64665926 0.21226359 0.19962852]
 [0.02869611 0.64661834 0.21227308 0.19880159]
 [0.02894386 0.64657762 0.21228392 0.19797842]]
损失历史记录： [0.48378085 0.29458062 0.18270562 0.11649638 0.07725705 0.05394715
 0.04004689 0.03170611 0.02665112 0.02353916 0.02157726 0.02029707
 0.01942197 0.01878854 0.01830024 0.01789999 0.01755403 0.01724238
 0.01695319 0.01667942 0.01641684 0.01616293 0.01591615 0.01567554
 0.01544049 0.01521061 0.0149856  0.01476528 0.01454947 0.01433805
 0.01413089 0.0139279  0.01372896 0.013534   0.01334291 0.01315561
 0.01297202 0.01279205 0.01261563 0.01244268 0.01227312 0.01210688
 0.01194388 0.01178405 0.01162733 0.01147365 0.01132294 0.01117514
 0.01103018 0.010888   0.01074854 0.01061175 0.01047757 0.01034593
 0.01021679 0.01009009 0.00996579 0.00984382 0.00972413 0.00960669
 0.00949144 0.00937833 0.0

In [16]:
X_plan = [250, 50, 50]  # 要预测的X特征数据
X_train, X_plan = scaler(X_train_original, X_plan)  # 对预测数据也要归一化缩放
X_plan = np.append([1], X_plan)  # 加一个哑特征X0 = 1
y_plan = np.dot(weight_history[-1], X_plan)  # [-1] 即模型收敛时的权重
# 对预测结果要做反向缩放，才能得到与原始广告费用对应的预测值
y_value = y_plan * y_gap + y_min  # y_gap是当前y_train中最大值和最小值的差，y_min是最小值
print("预计商品销售额： ", y_value, "千元")

预计商品销售额：  [7.42162744] 千元
