In [200]:
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import cross_validate
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris, load_diabetes
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

dataset = load_diabetes()
print(diabetes.keys())
X, y = dataset.data, dataset.target
X.shape, y.shape

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])


((442, 10), (442,))

In [201]:
#Usando apenas Cross Validate

In [202]:
classificador_knn = KNeighborsRegressor()

scores_cv = cross_validate(classificador_knn, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores_cv['test_score'])
sempad = np.mean(scores_cv['test_score'])
print(f"Sem padronização: {sempad}")

[56.2778101  63.42066441 59.63695849 59.59086644 61.70551029]
Sem padronização: 60.126361947859564


In [203]:
classificador_lr = LogisticRegression()

scores_cv = cross_validate(classificador_lr, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores_cv['test_score'])
sempad = np.mean(scores_cv['test_score'])
print(f"Sem padronização: {sempad}")

[ 72.28944629  68.2585962   87.86540585 113.62068074  72.8651463 ]
Sem padronização: 82.9798550755253


In [204]:
#Usando apenas Grid Search

In [205]:
hiper_parametros = {'n_neighbors': [3,5,7,9]}

grid_search_knn = GridSearchCV(classificador_knn , hiper_parametros, scoring=make_scorer(mean_squared_error, squared=False))
grid_search_knn.fit(X, y)

GridSearchCV(estimator=KNeighborsRegressor(),
             param_grid={'n_neighbors': [3, 5, 7, 9]},
             scoring=make_scorer(mean_squared_error, squared=False))

In [206]:
grid_search_knn.cv_results_

{'mean_fit_time': array([0.00299664, 0.00219698, 0.00119891, 0.00159888]),
 'std_fit_time': array([0.00089346, 0.000747  , 0.00040035, 0.00048963]),
 'mean_score_time': array([0.0059999 , 0.00479841, 0.00339856, 0.00319901]),
 'std_score_time': array([0.00209742, 0.00146971, 0.00135661, 0.00074918]),
 'param_n_neighbors': masked_array(data=[3, 5, 7, 9],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 3},
  {'n_neighbors': 5},
  {'n_neighbors': 7},
  {'n_neighbors': 9}],
 'split0_test_score': array([58.07341774, 56.2778101 , 56.99180879, 56.19003238]),
 'split1_test_score': array([67.43002106, 63.42066441, 62.93518794, 59.32108614]),
 'split2_test_score': array([62.28861222, 59.63695849, 59.57483095, 59.00744094]),
 'split3_test_score': array([62.02315061, 59.59086644, 55.63774296, 54.78238886]),
 'split4_test_score': array([67.25015489, 61.70551029, 60.43146504, 57.37903883]),
 'mean_test_score': array([63

In [207]:
grid_search_knn.best_estimator_

KNeighborsRegressor(n_neighbors=3)

In [208]:
#Usando Grid Search + Cross Validate

In [209]:
scores_cv_gs = cross_validate(grid_search_knn, X, y, scoring=make_scorer(mean_squared_error, squared=False), return_estimator=True)
print(scores_cv_gs['test_score'])
sempad = np.mean(scores_cv_gs['test_score'])
print(f"Sem padronização: {sempad}")

[58.07341774 67.43002106 62.28861222 62.02315061 67.25015489]
Sem padronização: 63.41307130519219


In [210]:
scores_cv_gs

{'fit_time': array([0.2058692 , 0.10593581, 0.10093617, 0.10293818, 0.10093713]),
 'score_time': array([0.00399637, 0.00199842, 0.00199819, 0.00299764, 0.0019989 ]),
 'estimator': (GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [3, 5, 7, 9]},
               scoring=make_scorer(mean_squared_error, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [3, 5, 7, 9]},
               scoring=make_scorer(mean_squared_error, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [3, 5, 7, 9]},
               scoring=make_scorer(mean_squared_error, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [3, 5, 7, 9]},
               scoring=make_scorer(mean_squared_error, squared=False)),
  GridSearchCV(estimator=KNeighborsRegressor(),
               param_grid={'n_neighbors': [3, 5, 7, 9]},
        

In [211]:
for estimator in scores_cv_gs['estimator']:
    print(estimator.best_estimator_)

KNeighborsRegressor(n_neighbors=3)
KNeighborsRegressor(n_neighbors=3)
KNeighborsRegressor(n_neighbors=3)
KNeighborsRegressor(n_neighbors=3)
KNeighborsRegressor(n_neighbors=3)


# Pipeline + GridSearch

In [214]:
#Com o Grid Search dentro do Pipeline
parametros = {'n_neighbors': [3,5,7]}

GridSearch_KNN = GridSearchCV(KNeighborsRegressor(), parametros, scoring='neg_root_mean_squared_error')

pipeline = Pipeline([
    ("padronização", StandardScaler()),
    ("gsknn", GridSearch_KNN)
])
scores_cv_pipe = cross_validate(pipeline, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores_cv_pipe['test_score'])
compad = np.mean(scores_cv_pipe['test_score'])
print(f"Com padronização: {compad}")

[56.82092904 63.05450375 60.15515846 54.92728369 59.33477082]
Com padronização: 58.858529150715086


In [216]:
#Com o Pipeline dentro do Grid Search

pipeline = Pipeline([
    ("padronização", StandardScaler()),
    ("knn", KNeighborsRegressor())
])

parametros = {'knn__n_neighbors': [3,5,7]}

grid_search_pipeline = GridSearchCV(pipeline, parametros, scoring='neg_root_mean_squared_error')

scores_cv_gs_pipe = cross_validate(grid_search_pipeline, X, y, scoring=make_scorer(mean_squared_error, squared=False))
print(scores_cv_gs_pipe['test_score'])
compad = np.mean(scores_cv_gs_pipe['test_score'])
print(f"Com padronização: {compad}")

[56.82092904 63.05450375 60.15515846 54.92728369 59.33477082]
Com padronização: 58.858529150715086
