# Hyper-parameter optimization

## Data

In [None]:
import seaborn as sns

In [None]:
penguins = sns.load_dataset('penguins')

In [None]:
penguins = penguins.dropna()

In [None]:
features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']

In [None]:
X = penguins[features]
y = penguins.species

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3)

## K-nearest neighbors

We will tune two hyper-parameters `n_neighbors` and `weights`
```
 |  n_neighbors : int, default=5
 |      Number of neighbors to use by default for :meth:`kneighbors` queries.
 |  
 |  weights : {'uniform', 'distance'}, callable or None, default='uniform'
 |      Weight function used in prediction.  Possible values:
 |  
 |      - 'uniform' : uniform weights.  All points in each neighborhood
 |        are weighted equally.
 |      - 'distance' : weight points by the inverse of their distance.
 |        in this case, closer neighbors of a query point will have a
 |        greater influence than neighbors which are further away.
 |      - [callable] : a user-defined function which accepts an
 |        array of distances, and returns an array of the same shape
 |        containing the weights.
```

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
from sklearn.pipeline import make_pipeline

In [None]:
pipe = make_pipeline(
    StandardScaler(),
    KNeighborsClassifier(),
)

In [None]:
pipe.named_steps

In [None]:
parameters = {
    'kneighborsclassifier__weights':('uniform', 'distance'), 
    'kneighborsclassifier__n_neighbors':[1,3,5,7,9]
}
clf = GridSearchCV(pipe, parameters)
clf.fit(X_train, y_train)
clf.best_params_

In [None]:
clf.score(X_test, y_test)