In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import jupyterthemes
import pandas as pd

from jupyterthemes import jtplot
jtplot.style()

In [2]:
boston = load_boston()

# create X (features) and y (response)
X = boston.data
y = boston.target.astype(int)

In [3]:
boston.target[:10]

array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9])

In [4]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [5]:
from sklearn.metrics import mean_absolute_error

In [6]:
for i in range(10):
    rand_state = np.random.randint(100) # Генерация случайного числа для random state
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rand_state) # Разбиение датасета на обучаемую и тестовую выборку
    # Обучение модели Knn
    model = LinearRegression() 
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mae = mean_absolute_error(y_test, predictions) # Оценка качества модели
    print('№{} MAE: {}'.format(i, round(mae, 5)))

№0 MAE: 3.3336
№1 MAE: 3.74433
№2 MAE: 3.26172
№3 MAE: 3.38435
№4 MAE: 3.38435
№5 MAE: 3.73576
№6 MAE: 3.53024
№7 MAE: 3.32843
№8 MAE: 3.42312
№9 MAE: 3.54722


In [7]:
from sklearn.model_selection import GridSearchCV

In [8]:
# define the parameter values that should be searched
k_range = list(range(0, 5))
print(k_range)

[0, 1, 2, 3, 4]


In [9]:
# create a parameter grid: map the parameter names to the values that should be searched
# simply a python dictionary
# key: parameter name
# value: list of values that should be searched for that parameter
# single key-value pair for param_grid
param_grid = {'fit_intercept': k_range, 'normalize': [True, False]}
print(param_grid)

{'fit_intercept': [0, 1, 2, 3, 4], 'normalize': [True, False]}


In [10]:
# instantiate the grid
grid = GridSearchCV(LinearRegression(), param_grid=param_grid)

In [11]:
# fit the grid with data
grid.fit(X, y)

GridSearchCV(cv=None, error_score=nan,
             estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                        n_jobs=None, normalize=False),
             iid='deprecated', n_jobs=None,
             param_grid={'fit_intercept': [0, 1, 2, 3, 4],
                         'normalize': [True, False]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [12]:
# view the complete results (list of named tuples)
for k in grid.cv_results_:
    print(k, ":", grid.cv_results_[k])

mean_fit_time : [0.00259724 0.00104289 0.00160742 0.00154419 0.00101485 0.00078721
 0.00095019 0.00079985 0.00101776 0.00093164]
std_fit_time : [2.62397307e-03 2.90653992e-04 5.28833756e-04 6.65453878e-04
 1.11075897e-04 4.51127770e-05 5.02787646e-05 7.89630528e-05
 1.94731935e-04 1.64317116e-04]
mean_score_time : [0.00139055 0.00086308 0.00106664 0.00100002 0.0006392  0.0006701
 0.00065002 0.00063157 0.00065298 0.00062699]
std_score_time : [4.90469764e-04 2.13224441e-04 4.26664057e-04 4.68060075e-04
 5.74607572e-05 9.28805591e-05 7.68444348e-05 5.68411094e-05
 9.40672218e-05 7.72104350e-05]
param_fit_intercept : [0 0 1 1 2 2 3 3 4 4]
param_normalize : [True False True False True False True False True False]
params : [{'fit_intercept': 0, 'normalize': True}, {'fit_intercept': 0, 'normalize': False}, {'fit_intercept': 1, 'normalize': True}, {'fit_intercept': 1, 'normalize': False}, {'fit_intercept': 2, 'normalize': True}, {'fit_intercept': 2, 'normalize': False}, {'fit_intercept': 3, 'n

In [13]:
grid.best_params_

{'fit_intercept': 0, 'normalize': True}

In [14]:
from sklearn.model_selection import RandomizedSearchCV

In [15]:
rnd_search = RandomizedSearchCV(LinearRegression(), param_distributions=param_grid)
rnd_search.fit(X, y)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                              n_jobs=None, normalize=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'fit_intercept': [0, 1, 2, 3, 4],
                                        'normalize': [True, False]},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring=None, verbose=0)

In [16]:
rnd_search.best_estimator_

LinearRegression(copy_X=True, fit_intercept=0, n_jobs=None, normalize=True)

In [17]:
rnd_search.best_estimator_

LinearRegression(copy_X=True, fit_intercept=0, n_jobs=None, normalize=True)

In [18]:
rnd_search.best_params_

{'normalize': True, 'fit_intercept': 0}

In [19]:
rnd_search.best_params_

{'normalize': True, 'fit_intercept': 0}

In [20]:
rnd_search.best_score_

0.4068320322856941

In [21]:
rnd_search.best_score_

0.4068320322856941