# 多元线性回归的一般过程
### 基本原理
使用多维空间中的一条直线去拟合多维空间中的样本点，寻找最佳的参数（系数和截距），使得损失最小，参数求解的工作已经由数学家帮我们做好，我们只需要直接套个公式拿来用就行了。 <br>
你唯一需要知道的就是正规方程这个公式，使用正规方程就能求解出最优的参数。 
### 正规方程
$\theta = (X^TX)^{-1}X^TY$

In [1]:
import numpy as np
from sklearn.datasets import load_boston

In [2]:
boston = load_boston()
x = boston.data
y = boston.target

In [3]:
x.shape

(506, 13)

In [4]:
y.shape

(506,)

### 生成偏置b
参数是这样分布的<br>
$\theta = (b ,\theta_1 ,\theta_2, ... \theta_n)$

In [5]:
np.ones((len(x),1))[:5,]

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]])

### 将偏置与X拼接到一起

In [6]:
X_b = np.hstack([np.ones((len(x),1)),x])
X_b[:5,]

array([[1.0000e+00, 6.3200e-03, 1.8000e+01, 2.3100e+00, 0.0000e+00,
        5.3800e-01, 6.5750e+00, 6.5200e+01, 4.0900e+00, 1.0000e+00,
        2.9600e+02, 1.5300e+01, 3.9690e+02, 4.9800e+00],
       [1.0000e+00, 2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00,
        4.6900e-01, 6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00,
        2.4200e+02, 1.7800e+01, 3.9690e+02, 9.1400e+00],
       [1.0000e+00, 2.7290e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00,
        4.6900e-01, 7.1850e+00, 6.1100e+01, 4.9671e+00, 2.0000e+00,
        2.4200e+02, 1.7800e+01, 3.9283e+02, 4.0300e+00],
       [1.0000e+00, 3.2370e-02, 0.0000e+00, 2.1800e+00, 0.0000e+00,
        4.5800e-01, 6.9980e+00, 4.5800e+01, 6.0622e+00, 3.0000e+00,
        2.2200e+02, 1.8700e+01, 3.9463e+02, 2.9400e+00],
       [1.0000e+00, 6.9050e-02, 0.0000e+00, 2.1800e+00, 0.0000e+00,
        4.5800e-01, 7.1470e+00, 5.4200e+01, 6.0622e+00, 3.0000e+00,
        2.2200e+02, 1.8700e+01, 3.9690e+02, 5.3300e+00]])

### 用正规方程求解参数

In [7]:
theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
theta

array([ 3.64594884e+01, -1.08011358e-01,  4.64204584e-02,  2.05586264e-02,
        2.68673382e+00, -1.77666112e+01,  3.80986521e+00,  6.92224640e-04,
       -1.47556685e+00,  3.06049479e-01, -1.23345939e-02, -9.52747232e-01,
        9.31168327e-03, -5.24758378e-01])

### 截距和系数

In [8]:
intercept = theta[0]
coef = theta[1:]

### 预测

In [9]:
x_test = x[0].reshape(1,-1)
x_test = np.hstack([np.ones((len(x_test),1)),x_test])
x_test.shape

(1, 14)

In [10]:
x_test.dot(theta) #预测结果

array([30.00384338])

### 使用sklearn

In [11]:
from sklearn.linear_model import LinearRegression

In [12]:
lr = LinearRegression()
lr.fit(x,y)
lr.predict([x[0]])

array([30.00384338])