In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = load_boston()
X = pd.DataFrame(data.data, columns=[data.feature_names])
y = data.target

In [2]:
X.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97


In [3]:
# 数据预处理
scale = StandardScaler()
X = scale.fit_transform(X)
X = pd.DataFrame(X, columns=[data.feature_names])
X['bias'] = 1
X.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,bias
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,-5.616939000000001e-17,6.319056000000001e-17,-3.145486e-15,-2.106352e-17,2.7523e-15,-1.15077e-14,-1.13743e-15,7.582867e-16,5.616939000000001e-17,5.616939000000001e-17,-1.022283e-14,8.593916e-15,-5.897786e-16,1.0
std,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,1.00099,0.0
min,-0.4197819,-0.4877224,-1.557842,-0.2725986,-1.465882,-3.880249,-2.335437,-1.267069,-0.9828429,-1.31399,-2.707379,-3.907193,-1.531127,1.0
25%,-0.4109696,-0.4877224,-0.8676906,-0.2725986,-0.9130288,-0.5686303,-0.837448,-0.8056878,-0.6379618,-0.767576,-0.4880391,0.2050715,-0.79942,1.0
50%,-0.3906665,-0.4877224,-0.2110985,-0.2725986,-0.1442174,-0.1084655,0.3173816,-0.2793234,-0.5230014,-0.4646726,0.274859,0.3811865,-0.1812536,1.0
75%,0.00739656,0.04877224,1.015999,-0.2725986,0.598679,0.4827678,0.9067981,0.6623709,1.661245,1.530926,0.8065758,0.433651,0.6030188,1.0
max,9.933931,3.804234,2.422565,3.668398,2.732346,3.555044,1.117494,3.960518,1.661245,1.798194,1.638828,0.4410519,3.548771,1.0


In [4]:
# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2021)
X_train.shape[0]

404

In [5]:
# 模型训练与梯度更新
def l1_loss(X, y, theta):
    """
    :param X: 特征
    :param y: 目标值
    :param theta: 模型参数
    :return:
    """
    k = X.shape[0]
    total_loss = 0
    for i in range(k):
        total_loss += 1 / k * np.abs(y[i] - theta.dot(X[i, :]))
    loss = np.mean(np.abs([y[i] - theta.dot(X[i, :]) for i in range(k)]))
    print(loss)
    return total_loss

def l2_loss(X, y , theta):
    k = X.shape[0]
    total_loss = 0
    for i in range(k):
        total_loss += 1 / k * (y[i] - theta.dot(X[i, :])) ** 2
    return total_loss

In [6]:
def step_l1_gradient(X, y, learning_rate, theta):
    """
    计算MAE损失函数的梯度的函数，对于0处的不可微分点返回梯度值0
    :param X:
    :param y:
    :param learning_rate:
    :param theta:
    :return:
    """
    n = X.shape[0]
    e = y - X @ theta
    gradients = - (X.T @ np.sign(e)) / n
    theta = theta - learning_rate * gradients
    return theta

In [7]:
def step_l2_gradient(X, y, learning_rate, theta):
    k = X.shape[0]
    n = X.shape[1]
    gradients = np.zeros(n)
    for i in range(k):
        for j in range(n):
            gradients[j] += (-2 / k) * (y[i] - theta.dot(X[i, :])) * X[i, j]
    theta = theta - learning_rate * gradients
    return theta

In [8]:
def gradient_descent(X, y, learning_rate, iter):
    k = X.shape[0]
    n = X.shape[1]
    theta = np.zeros(n)
    loss_values = []
    for i in range(iter):
        theta = step_l1_gradient(X, y, learning_rate, theta)
        loss = l1_loss(X, y, theta)
        loss_values.append(loss)
        print(f'{i},  cost: {loss}')
    return theta, loss_values


In [12]:
X_train = np.array(X_train)
learning_rate = 0.04
iteration = 1000
theta, loss = gradient_descent(X_train, y_train, learning_rate, iteration)


22.507734370983364
0,  cost: 22.50773437098337
22.46744893998653
1,  cost: 22.467448939986518
22.427163508989693
2,  cost: 22.4271635089897
22.386878077992858
3,  cost: 22.38687807799285
22.346592646996026
4,  cost: 22.346592646996044
22.306307215999187
5,  cost: 22.30630721599919
22.26602178500235
6,  cost: 22.266021785002337
22.22573635400552
7,  cost: 22.225736354005534
22.18545092300868
8,  cost: 22.18545092300868
22.14516549201185
9,  cost: 22.14516549201185
22.104880061015017
10,  cost: 22.104880061015002
22.064594630018174
11,  cost: 22.064594630018192
22.024309199021346
12,  cost: 22.02430919902135
21.98402376802451
13,  cost: 21.9840237680245
21.94373833702767
14,  cost: 21.94373833702767
21.903452906030836
15,  cost: 21.90345290603085
21.863167475034007
16,  cost: 21.86316747503399
21.82288204403717
17,  cost: 21.822882044037186
21.782596613040337
18,  cost: 21.78259661304034
21.7423111820435
19,  cost: 21.7423111820435
21.702025751046666
20,  cost: 21.70202575104668
21.66174

14.96752964866876
192,  cost: 14.967529648668757
14.930239466115156
193,  cost: 14.930239466115156
14.892949283561547
194,  cost: 14.892949283561547
14.855659101007944
195,  cost: 14.855659101007944
14.818368918454343
196,  cost: 14.818368918454345
14.781078735900739
197,  cost: 14.781078735900735
14.743788553347132
198,  cost: 14.74378855334713
14.706498370793529
199,  cost: 14.706498370793524
14.669208188239923
200,  cost: 14.66920818823992
14.63191800568632
201,  cost: 14.631918005686309
14.594627823132713
202,  cost: 14.594627823132708
14.55733764057911
203,  cost: 14.557337640579098
14.520047458025502
204,  cost: 14.520047458025507
14.482757275471899
205,  cost: 14.4827572754719
14.445467092918296
206,  cost: 14.445467092918298
14.40817691036469
207,  cost: 14.40817691036468
14.370886727811087
208,  cost: 14.370886727811083
14.33359654525748
209,  cost: 14.33359654525749
14.296306362703877
210,  cost: 14.296306362703866
14.25901618015027
211,  cost: 14.259016180150278
14.221725997

7.754940328499994
390,  cost: 7.754940328499988
7.7222452011822496
391,  cost: 7.722245201182252
7.689550073864506
392,  cost: 7.689550073864504
7.656854946546762
393,  cost: 7.656854946546757
7.624163099771979
394,  cost: 7.624163099771976
7.591469600855925
395,  cost: 7.591469600855923
7.558774473538181
396,  cost: 7.558774473538176
7.526079346220437
397,  cost: 7.526079346220439
7.493384218902693
398,  cost: 7.493384218902692
7.460699202082842
399,  cost: 7.460699202082842
7.428411799346003
400,  cost: 7.428411799346003
7.396126684156232
401,  cost: 7.396126684156227
7.36384156896646
402,  cost: 7.363841568966467
7.331556453776687
403,  cost: 7.331556453776691
7.299271338586915
404,  cost: 7.299271338586917
7.267129315619599
405,  cost: 7.267129315619604
7.23521052211458
406,  cost: 7.235210522114582
7.203291728609563
407,  cost: 7.203291728609564
7.171372935104546
408,  cost: 7.17137293510454
7.1397135925862525
409,  cost: 7.139713592586254
7.1092579556858775
410,  cost: 7.10925795

3.939359035415593
589,  cost: 3.939359035415593
3.932165449189169
590,  cost: 3.9321654491891675
3.925092687898136
591,  cost: 3.9250926878981343
3.9180199266071023
592,  cost: 3.918019926607103
3.9109662549202016
593,  cost: 3.910966254920202
3.9039748224226316
594,  cost: 3.9039748224226303
3.8969833899250625
595,  cost: 3.8969833899250625
3.8899919574274926
596,  cost: 3.8899919574274904
3.883000524929923
597,  cost: 3.883000524929921
3.8760090924323536
598,  cost: 3.876009092432355
3.8690176599347845
599,  cost: 3.869017659934783
3.8620396463297824
600,  cost: 3.862039646329782
3.855127865007944
601,  cost: 3.855127865007945
3.8482160836861055
602,  cost: 3.848216083686105
3.8413144300059665
603,  cost: 3.841314430005965
3.834521263478754
604,  cost: 3.8345212634787535
3.8277280969515424
605,  cost: 3.827728096951543
3.82093493042433
606,  cost: 3.820934930424328
3.8141539333403847
607,  cost: 3.8141539333403816
3.8074645307110733
608,  cost: 3.8074645307110715
3.80088278636021
609

3.2934350547091107
786,  cost: 3.2934350547091116
3.2921991228858762
787,  cost: 3.292199122885875
3.2909635669047446
788,  cost: 3.290963566904746
3.2897377681331377
789,  cost: 3.2897377681331363
3.2885206248865804
790,  cost: 3.2885206248865813
3.287303481640023
791,  cost: 3.2873034816400213
3.2860863383934658
792,  cost: 3.286086338393467
3.284873680997517
793,  cost: 3.284873680997517
3.28367492617826
794,  cost: 3.2836749261782603
3.282476171359002
795,  cost: 3.2824761713590047
3.2812774165397447
796,  cost: 3.2812774165397487
3.280078661720487
797,  cost: 3.280078661720492
3.2788799069012295
798,  cost: 3.2788799069012304
3.2776811520819717
799,  cost: 3.277681152081971
3.276496774233495
800,  cost: 3.276496774233499
3.2753162667776876
801,  cost: 3.2753162667776885
3.2741352395704264
802,  cost: 3.2741352395704237
3.2729640189186098
803,  cost: 3.272964018918608
3.2718109348233497
804,  cost: 3.2718109348233493
3.2706566358019633
805,  cost: 3.2706566358019633
3.2695015592270

3.1667322835610916
987,  cost: 3.1667322835610925
3.1663934117279084
988,  cost: 3.1663934117279053
3.166054209674634
989,  cost: 3.1660542096746345
3.16572155041481
990,  cost: 3.1657215504148097
3.1653799777515843
991,  cost: 3.1653799777515865
3.165040512433019
992,  cost: 3.165040512433017
3.164703039081569
993,  cost: 3.164703039081567
3.1643942112412895
994,  cost: 3.1643942112412855
3.1640668751284506
995,  cost: 3.164066875128451
3.1637541469752177
996,  cost: 3.163754146975219
3.163436096364209
997,  cost: 3.1634360963642045
3.1631221360716815
998,  cost: 3.163122136071684
3.1628086304179255
999,  cost: 3.162808630417927
