In [6]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
from sklearn.datasets import fetch_california_housing

In [7]:
from sklearn.model_selection import train_test_split

data, target = fetch_california_housing(return_X_y=True, as_frame=True)
data_train, data_test, target_train, target_test = train_test_split(data, target, random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor

preprocessing = StandardScaler()

model = Pipeline(
    [
        ("preprocessing", preprocessing),
        ("KNeighbors", KNeighborsRegressor())
    ]
)
model

Pipeline(steps=[('preprocessing', StandardScaler()),
                ('KNeighbors', KNeighborsRegressor())])

In [11]:
from sklearn.model_selection import cross_validate
results = cross_validate(model, data_train, target_train)
results = pd.DataFrame(results)
results

Unnamed: 0,fit_time,score_time,test_score
0,0.02442,0.267077,0.669533
1,0.01848,0.208864,0.694065
2,0.018901,0.200891,0.690931
3,0.017794,0.25092,0.676837
4,0.024267,0.240246,0.683274


In [13]:
model.get_params(), np.logspace(0, 3, num=10)

({'memory': None,
  'steps': [('preprocessing', StandardScaler()),
   ('KNeighbors', KNeighborsRegressor())],
  'verbose': False,
  'preprocessing': StandardScaler(),
  'KNeighbors': KNeighborsRegressor(),
  'preprocessing__copy': True,
  'preprocessing__with_mean': True,
  'preprocessing__with_std': True,
  'KNeighbors__algorithm': 'auto',
  'KNeighbors__leaf_size': 30,
  'KNeighbors__metric': 'minkowski',
  'KNeighbors__metric_params': None,
  'KNeighbors__n_jobs': None,
  'KNeighbors__n_neighbors': 5,
  'KNeighbors__p': 2,
  'KNeighbors__weights': 'uniform'},
 array([   1.        ,    2.15443469,    4.64158883,   10.        ,
          21.5443469 ,   46.41588834,  100.        ,  215.443469  ,
         464.15888336, 1000.        ]))

In [16]:
from scipy.stats import loguniform
from sklearn.model_selection import GridSearchCV

param_grid = {
    "preprocessing__with_mean" : (True, False),
    "preprocessing__with_std" : (True, False),
    "KNeighbors__n_neighbors" : np.logspace(0, 3, num=10).astype(np.int32),
}
model_grid_search = GridSearchCV(model, param_grid=param_grid, cv=2)
model_grid_search.fit(data_train, target_train)

GridSearchCV(cv=2,
             estimator=Pipeline(steps=[('preprocessing', StandardScaler()),
                                       ('KNeighbors', KNeighborsRegressor())]),
             param_grid={'KNeighbors__n_neighbors': array([   1,    2,    4,   10,   21,   46,  100,  215,  464, 1000],
      dtype=int32),
                         'preprocessing__with_mean': (True, False),
                         'preprocessing__with_std': (True, False)})