# 线性回归
模型:
$f(x_i)=w_1x_i^1+w_2x_i^2+...+w_px_i^p+b$

即:
$f(x_i)=W \cdot X$, $W=[b, w_1, w_2, ..., w_p]$, $X=[1, x_1, x_2, ..., x_p]$

损失函数:
$argmax_{W}\sum_{i=1}^n[f(x_i)-y_i]^2$

## 读取数据

In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
boston = load_boston()
x = boston.data
y = boston.target

print(f"f.shape:{x.shape}")
print(f"y.shape:{y.shape}")
print(f'x[0]:{x[0]}')
print(f'y[0]:{y[0]}')

f.shape:(506, 13)
y.shape:(506,)
x[0]:[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00]
y[0]:24.0


## 划分数据

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=8)
print(x_train.shape)
print(x_test.shape)

(379, 13)
(127, 13)


## 训练模型

In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate

# 线性回归
lr = LinearRegression(normalize=True, n_jobs=2)
scores = cross_validate(lr, x_train, y_train, cv=10, scoring='neg_mean_squared_error')


In [10]:
scores

{'fit_time': array([0.00181413, 0.00105405, 0.00086594, 0.00084567, 0.00080514,
        0.00081396, 0.00091219, 0.00093603, 0.00088692, 0.00086713]),
 'score_time': array([0.00036192, 0.00030589, 0.00029111, 0.00027704, 0.00027609,
        0.00027895, 0.00030208, 0.00032306, 0.00029302, 0.000283  ]),
 'test_score': array([-28.58836436, -18.05963883, -38.65397282, -16.72591549,
        -19.24338451, -41.11994213, -46.86664107, -11.25843391,
         -9.8140435 , -15.88790921])}

In [11]:
1.0/3*np.log2(1.0/3) 

-0.5283208335737187

In [12]:
1.0/2 * np.log2(1.0/2)

-0.5