In [1]:
# 一元线性回归分析例子
from sklearn import datasets
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np

In [3]:
boston = datasets.load_boston()
print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [2]:
# 生成连续的值
np.linspace(0, 10, 50)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [3]:
# 生成误差
np.random.uniform(2, -2, 50)

array([-0.75129536, -0.02379179,  0.2358307 , -1.48369662,  0.8844868 ,
        0.62017999, -1.5791455 , -1.44620836,  0.53282636, -1.96642391,
       -0.2011667 , -0.4575057 , -1.16012662,  1.12050843, -1.01648854,
        0.21407216, -1.44896191,  0.24225833,  0.11109032,  0.62435171,
        0.16116625, -0.08840531,  1.53582471, -0.49357582,  1.96789342,
        0.18885898, -1.98894767,  1.0896109 ,  0.21131571, -1.68867115,
        0.20527201, -1.42651844,  0.36808994,  1.13034472, -0.76214527,
        0.86522932, -1.83409817,  1.60036209,  0.40433247,  0.26940651,
       -0.47232439, -0.36691534, -0.83110346, -1.82718896, -1.50425275,
       -1.24269141, -1.07083458, -1.37065726, -0.62669106, -0.27343084])

In [4]:
boston = datasets.load_boston()

In [5]:
b_x = boston.data  # 特征变量
b_y = boston.target  # 目标值
x_train, x_test, y_train, y_test = train_test_split(b_x, b_y, test_size=0.25)

In [6]:
y_train

array([22.2, 33.2, 22.2, 23.6, 24.1, 23.9,  7. , 50. , 18.3, 33.1, 30.5,
       19.9, 17.2, 15.7, 33. , 13.8, 19.1, 35.1, 22.1, 20.1, 16.8, 16.5,
       10.8, 36.2, 13.1, 27. , 21.2, 50. , 31.7, 23.5,  8.7, 22.9, 15. ,
       27.5, 29.1, 19.8, 15.6, 10.4, 22.2,  8.1, 21.8, 22.8,  8.3, 48.5,
       20.4, 39.8, 34.9, 13. , 25. , 18.4, 13.4, 13.1, 29. , 21.5, 18.9,
       24.4, 23.1, 17. , 20. , 32.4, 15. , 18.5, 14.5, 24.1, 14.4, 13.3,
       32.5,  5. , 18.6, 23.4, 14.1, 30.1, 48.8, 23.2, 17.8, 21.9, 21.2,
       37.9, 30.1, 20.1, 15.4, 36.4, 13.4, 20.7, 44. , 22.2, 23.8, 22. ,
       12.7, 20.5, 20.4, 23.7, 25. , 16.7, 22. , 25.1, 10.2, 42.3, 23.8,
       50. , 20.7, 18.4, 26.4, 23.9, 11.3, 10.5, 50. , 23.9, 19.7, 13.8,
       23.3, 13.9, 35.4, 16.8, 43.8, 17.1, 18.2,  7.2, 21.4, 23.4, 17.4,
       19.6, 18.6, 21.9, 16.7, 33.1, 28. , 24. , 13.5, 16.4, 24.2,  7.4,
        8.4, 20.5, 25. , 19.4, 22.4, 22.6, 31.2, 24.6, 10.9, 25.3, 13.1,
       21.2, 12.6, 20.1, 50. , 19.2, 19.1, 30.3, 23

In [7]:
b_x

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [8]:
b_y

array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
       18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
       15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
       13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
       21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
       35.4, 24.7, 31.6, 23.3, 19.6, 18.7, 16. , 22.2, 25. , 33. , 23.5,
       19.4, 22. , 17.4, 20.9, 24.2, 21.7, 22.8, 23.4, 24.1, 21.4, 20. ,
       20.8, 21.2, 20.3, 28. , 23.9, 24.8, 22.9, 23.9, 26.6, 22.5, 22.2,
       23.6, 28.7, 22.6, 22. , 22.9, 25. , 20.6, 28.4, 21.4, 38.7, 43.8,
       33.2, 27.5, 26.5, 18.6, 19.3, 20.1, 19.5, 19.5, 20.4, 19.8, 19.4,
       21.7, 22.8, 18.8, 18.7, 18.5, 18.3, 21.2, 19.2, 20.4, 19.3, 22. ,
       20.3, 20.5, 17.3, 18.8, 21.4, 15.7, 16.2, 18. , 14.3, 19.2, 19.6,
       23. , 18.4, 15.6, 18.1, 17.4, 17.1, 13.3, 17.8, 14. , 14.4, 13.4,
       15.6, 11.8, 13.8, 15.6, 14.6, 17.8, 15.4, 21