In [12]:

from numpy import mean
from sklearn.datasets import make_blobs
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from skopt.space import Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
import warnings
warnings.filterwarnings("ignore")

In [13]:

# generate 2d classification dataset
X, y = make_blobs(n_samples=500, centers=3, n_features=2)


In [14]:
print(X)

[[ 2.76265506e+00 -4.21694696e+00]
 [ 3.07928360e+00 -2.40991175e+00]
 [ 5.08106441e+00  5.14920681e-01]
 [ 3.02409896e+00 -4.93942252e+00]
 [ 3.53618176e+00 -3.15342368e+00]
 [ 4.82605952e+00 -4.96497839e+00]
 [ 5.82962012e+00 -7.80095960e-01]
 [ 3.74265249e+00 -6.17315545e+00]
 [ 2.52522513e+00 -5.07505685e+00]
 [ 2.75768961e+00 -4.32563180e+00]
 [ 5.06244486e+00 -3.28331245e+00]
 [ 4.73205802e+00  7.91880062e-01]
 [ 4.40270065e+00 -7.94023140e+00]
 [ 4.66319021e+00 -3.51303241e+00]
 [ 3.92136949e+00 -3.24629637e+00]
 [ 2.23972249e+00 -5.03990137e+00]
 [ 4.58079971e+00 -5.53130179e+00]
 [ 2.98611855e+00 -2.84841563e+00]
 [ 3.71714030e+00 -8.28042176e+00]
 [ 2.67130704e+00 -7.75138131e+00]
 [ 2.36251998e+00 -7.56138969e+00]
 [ 3.92167900e+00 -4.44776836e+00]
 [ 5.09531968e+00 -1.76212347e+00]
 [ 4.79513533e+00 -2.08210968e+00]
 [ 4.64902096e+00  1.12713229e-01]
 [ 3.82032424e+00 -7.64767646e+00]
 [ 4.16584398e+00 -4.34019132e+00]
 [ 3.02442700e+00 -8.64056248e+00]
 [ 3.35683551e+00 -2

In [15]:
print(y)

[2 2 1 2 2 2 1 0 0 2 2 1 0 2 2 2 0 2 0 0 0 0 1 2 1 0 2 0 2 1 0 0 2 0 2 2 0
 0 2 1 2 0 2 2 2 2 0 0 1 1 1 0 0 0 1 1 2 0 0 2 0 1 1 1 1 0 1 1 2 2 2 1 2 1
 2 2 1 1 0 0 1 2 0 1 2 0 0 0 2 0 0 1 2 2 2 0 1 1 2 1 1 0 2 1 1 2 0 1 0 0 1
 1 2 0 2 0 2 1 0 2 0 1 0 2 1 1 1 0 0 0 0 1 2 0 0 2 1 2 0 1 0 2 0 2 1 0 0 1
 2 1 0 1 2 2 0 1 0 1 0 2 1 0 0 0 2 1 2 1 0 1 1 2 0 2 0 0 2 2 2 0 0 2 1 0 2
 0 0 2 0 1 0 0 0 1 1 2 1 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 0 2 0 2 0 0 2 1 0
 2 1 2 0 1 2 1 2 2 2 2 1 2 0 1 2 1 0 1 0 0 2 1 1 0 1 1 0 0 1 2 0 1 2 1 1 2
 0 0 1 0 0 1 2 2 2 1 0 0 0 2 0 0 2 1 1 0 2 2 1 1 0 1 1 2 2 1 0 1 0 2 1 1 2
 0 2 2 1 2 2 0 0 0 0 0 1 2 1 0 0 1 1 0 1 1 1 2 0 1 2 1 1 2 0 1 0 1 1 1 2 0
 1 1 0 1 1 2 2 0 1 2 1 1 2 1 1 2 0 2 2 0 2 0 1 2 0 2 2 2 0 1 2 0 2 2 0 2 0
 2 2 1 0 1 1 2 0 1 2 0 2 2 1 0 0 2 0 1 2 0 1 1 2 2 2 0 0 2 1 0 2 1 1 1 2 0
 2 0 2 0 0 1 0 1 1 0 1 2 2 2 0 1 1 0 2 2 2 2 1 2 1 0 0 2 2 0 1 0 1 1 2 1 0
 0 2 0 1 1 2 0 1 1 1 2 1 1 1 1 1 2 0 2 1 0 2 0 2 1 0 0 0 0 2 1 0 1 2 2 2 1
 2 0 1 2 2 2 2 0 2 0 2 2 

In [26]:
# define the model
model = KNeighborsClassifier()



In [17]:
# define the space of hyperparameters to search
search_space = [Integer(1, 12, name='n_neighbors'), Integer(1, 4, name='p')]


# define the function used to evaluate a given configuration
@use_named_args(search_space)
def evaluate_model(**params):
    # something
    model.set_params(**params)
    # calculate 5-fold cross validation
    result = cross_val_score(model, X, y, cv=10, n_jobs=-1, scoring='accuracy')
    # calculate the mean of the scores
    estimate = mean(result)
    return 1.0 - estimate


In [18]:
# perform optimization
result = gp_minimize(evaluate_model, search_space)
# summarizing finding:
print('Best Accuracy: %.3f' % (1.0 - result.fun))
print('Best Parameters: n_neighbors=%d, p=%d' % (result.x[0], result.x[1]))

Best Accuracy: 0.908
Best Parameters: n_neighbors=9, p=1
