In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
from sklearn.datasets import fetch_california_housing

In [2]:
from sklearn.model_selection import train_test_split

data, target = fetch_california_housing(return_X_y=True, as_frame=True)
data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=42)

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor

preprocessing = StandardScaler()

model = Pipeline(
    [
        ("preprocessing", preprocessing),
        ("KNeighbors", KNeighborsRegressor())
    ]
)
model

Pipeline(steps=[('preprocessing', StandardScaler()),
                ('KNeighbors', KNeighborsRegressor())])

In [4]:
from sklearn.model_selection import cross_validate
results = cross_validate(model, data_train, target_train)
results = pd.DataFrame(results)
results

Unnamed: 0,fit_time,score_time,test_score
0,0.034576,0.22602,0.669533
1,0.017273,0.196058,0.694065
2,0.016562,0.194019,0.690931
3,0.018013,0.22216,0.676837
4,0.016163,0.187657,0.683274


In [5]:
model.get_params(), np.logspace(0, 3, num=10)

({'memory': None,
  'steps': [('preprocessing', StandardScaler()),
   ('KNeighbors', KNeighborsRegressor())],
  'verbose': False,
  'preprocessing': StandardScaler(),
  'KNeighbors': KNeighborsRegressor(),
  'preprocessing__copy': True,
  'preprocessing__with_mean': True,
  'preprocessing__with_std': True,
  'KNeighbors__algorithm': 'auto',
  'KNeighbors__leaf_size': 30,
  'KNeighbors__metric': 'minkowski',
  'KNeighbors__metric_params': None,
  'KNeighbors__n_jobs': None,
  'KNeighbors__n_neighbors': 5,
  'KNeighbors__p': 2,
  'KNeighbors__weights': 'uniform'},
 array([   1.        ,    2.15443469,    4.64158883,   10.        ,
          21.5443469 ,   46.41588834,  100.        ,  215.443469  ,
         464.15888336, 1000.        ]))

In [6]:
from scipy.stats import loguniform
from sklearn.model_selection import GridSearchCV

param_grid = {
    "preprocessing__with_mean" : (True, False),
    "preprocessing__with_std" : (True, False),
    "KNeighbors__n_neighbors" : np.logspace(0, 3, num=10).astype(np.int32),
}
model_grid_search = GridSearchCV(model, param_grid=param_grid, cv=2)
model_grid_search.fit(data_train, target_train)

GridSearchCV(cv=2,
             estimator=Pipeline(steps=[('preprocessing', StandardScaler()),
                                       ('KNeighbors', KNeighborsRegressor())]),
             param_grid={'KNeighbors__n_neighbors': array([   1,    2,    4,   10,   21,   46,  100,  215,  464, 1000],
      dtype=int32),
                         'preprocessing__with_mean': (True, False),
                         'preprocessing__with_std': (True, False)})

In [16]:
cv_results = pd.DataFrame(model_grid_search.cv_results_).sort_values(
    "mean_test_score", ascending=False
)

cv_results.head(3)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_KNeighbors__n_neighbors,param_preprocessing__with_mean,param_preprocessing__with_std,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score
14,0.012469,0.001522,0.485543,0.021771,10,False,True,"{'KNeighbors__n_neighbors': 10, 'preprocessing...",0.664784,0.720859,0.692821,0.028037,1
12,0.014149,0.002303,0.503743,0.014824,10,True,True,"{'KNeighbors__n_neighbors': 10, 'preprocessing...",0.664784,0.720859,0.692821,0.028037,1
18,0.010513,0.000931,0.591554,0.009094,21,False,True,"{'KNeighbors__n_neighbors': 21, 'preprocessing...",0.658006,0.712443,0.685225,0.027219,3


In [24]:
import plotly.express as px
# print( param_grid.keys() )
column_results = [f"param_{name}" for name in param_grid.keys()]
column_results += ["mean_test_score", "std_test_score", "rank_test_score"]
print(column_results)
cv_results_02 = cv_results[column_results].apply(
    {
        "param_preprocessing__with_mean" : lambda x: int(x),
        "param_preprocessing__with_std" : lambda x: int(x),
        "param_KNeighbors__n_neighbors" : lambda x: x,
        "mean_test_score" : lambda x: x
    }
)
cv_results_02

['param_preprocessing__with_mean', 'param_preprocessing__with_std', 'param_KNeighbors__n_neighbors', 'mean_test_score', 'std_test_score', 'rank_test_score']


Unnamed: 0,param_preprocessing__with_mean,param_preprocessing__with_std,param_KNeighbors__n_neighbors,mean_test_score
14,0,1,10,0.692821
12,1,1,10,0.692821
18,0,1,21,0.685225
16,1,1,21,0.685225
8,1,1,4,0.674391
10,0,1,4,0.674391
20,1,1,46,0.668928
22,0,1,46,0.668928
26,0,1,100,0.641858
24,1,1,100,0.641858


In [25]:
px.parallel_coordinates(
    cv_results_02,
    color="mean_test_score",
    color_continuous_scale=px.colors.sequential.Viridis
)