![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.png)

### 增加x0 = 1 ，方便计算

![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A7.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A7.png)

### 扩增到所有样本

![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A73.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A73.png)

### 化简

![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A74.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A74.png)

### 结果

![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A75.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A75.png)

### 总结

![%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A76.png](attachment:%E5%A4%9A%E5%85%83%E7%BA%BF%E6%80%A76.png)

### 实现多元线性回归模型

In [42]:
import numpy as np
from sklearn.metrics import r2_score

class LinearRegression:
    
    def __init__(self):
        """初始化Linear Regression"""
        self.coef_ = None            #系数,theta 1-n
        self.interception_ = None  #截距,theta 0
        self._theta = None           #theta 0-n

    def fit_normal(self, X_train, y_train):
        """根据训练数据集X_train, y_train训练Linear Regression模型"""
        assert X_train.shape[0] == y_train.shape[0], \
            "the size of X_train must be equal to the size of y_train"
        
        X_b = np.hstack( [np.ones( (len(X_train), 1) ), X_train] )      # len() 矩阵横向长度；hstack 横向维度连接
        self._theta = np.linalg.inv( X_b.T.dot(X_b) ).dot(X_b.T).dot(y_train)  #linalg.inv 逆矩阵
        
        self.interception_ = self._theta[0]
        self.coef_ = self._theta[1:]
        
        return self
    
    def predict(self, X_predict):
        """给定待预测数据集X_predict, 返回X_predict的结果向量"""
        assert self.interception_ is not None and self.coef_ is not None, \
            "must fit before predict !"
        assert X_predict.shape[1] == len(self.coef_), \
            "the feature number of X_predict must be equal to X_train"
        
        X_b = np.hstack( [np.ones( (len(X_predict), 1) ), X_predict] )
        return X_b.dot(self._theta)
    
    def score(self, X_test, y_test):
        """根据测试数据集X_test 和 y_test确定当前模型的准确度"""
        
        y_predict = self.predict(X_test)
        return r2_score(y_test, y_predict)
    
    def __repr__(self):
        return "LinearRegression()"

### 测试

In [43]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [44]:
boston = datasets.load_boston()

X = boston.data
y = boston.target

X = X[y < 50.0]
y = y[y < 50.0]

In [45]:
X.shape

(490, 13)

In [46]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=666)

In [47]:
reg = LinearRegression()
reg.fit_normal(X_train, y_train)

LinearRegression()

In [48]:
reg.coef_

array([-1.20354261e-01,  3.64423279e-02, -3.61493155e-02,  5.12978140e-02,
       -1.15775825e+01,  3.42740062e+00, -2.32311760e-02, -1.19487594e+00,
        2.60101728e-01, -1.40219119e-02, -8.35430488e-01,  7.80472852e-03,
       -3.80923751e-01])

In [49]:
reg.interception_

34.11739972320428

In [50]:
reg.score(X_test, y_test)

0.8129794056212832