## scikit-learn中的回归问题
> **注意sklearn中的fit方式并不是自己封装的正规化方程这种方式**

In [1]:
import numpy as np
from sklearn import datasets
boston = datasets.load_boston()
x = boston.data
y = boston.target
import sys
sys.path.append('../')
from ml_python.model_selection import train_test_split
X_train , X_test, y_train, y_test = train_test_split(x, y, seed = 666)

In [2]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()

In [3]:
lin_reg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [4]:
lin_reg.coef_

array([-7.95894880e-02,  4.94361879e-02,  6.86615042e-02,  2.55724695e+00,
       -1.61072486e+01,  4.09729380e+00,  6.75085474e-03, -1.41803278e+00,
        2.93986843e-01, -1.41754834e-02, -9.70502529e-01,  1.20053220e-02,
       -5.31221945e-01])

In [5]:
lin_reg.intercept_

32.83500778240816

In [6]:
lin_reg.score(X_test, y_test)

0.6285476924385722

## 使用Knn Regressor解决回归问题
> **1） 在neighbors中有两类，一类为KNeighborsClassifier解决分类问题，另一类为KNeighborsRegressor解决回归问题**

> **2)  可以使用多个超参数，对KNN问题优化**

> **3） gridsearchcv 采用的是cv交叉验证算法，不是自己所预测的方法判断准确度**


In [7]:
from sklearn.neighbors import KNeighborsRegressor 

In [8]:
knn_reg = KNeighborsRegressor()
knn_reg.fit(X_train, y_train)
knn_reg.score(X_test, y_test)

0.4794852438262164

In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = [
    {
        'weights':['uniform'],
        'n_neighbors':[i for i in range(1, 11)]
    },
    {
        'weights':['distance'],
        'n_neighbors':[i for i in range(1, 11)],
        'p': [i for i in range(1,6)]
    }    
]

In [11]:
knn_reg = KNeighborsRegressor()
grid_search = GridSearchCV(knn_reg, param_grid, n_jobs = -1, verbose=1)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 60 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:    1.9s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'weights': ['uniform'], 'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, {'weights': ['distance'], 'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 'p': [1, 2, 3, 4, 5]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [12]:
grid_search.best_params_

{'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [13]:
grid_search.best_score_

0.6232308299386636

In [14]:
grid_search.best_estimator_.score(X_test, y_test)

0.6156471875033573