# Randomized Search CV
We use this if grid search cv is slow  
It reduces computational expense

In [1]:
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
%matplotlib inline
sns.set()

In [3]:
iris_df = pd.read_csv('../../data/iris.csv', dtype = {'species': 'category'})
iris_df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [4]:
X = iris_df.iloc[:, :-1]
y = iris_df.species

In [5]:
k_range = range(1, 31)
weights = ['uniform', 'distance']

param_dist = dict(n_neighbors = k_range, weights = weights)
param_dist

{'n_neighbors': range(1, 31), 'weights': ['uniform', 'distance']}

In [6]:
knn = KNeighborsClassifier()
rand = RandomizedSearchCV(knn, param_dist, cv = 10, scoring = 'accuracy', n_iter = 10, n_jobs = -1, random_state = 42)
rand.fit(X, y)

RandomizedSearchCV(cv=10, error_score='raise',
          estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
          fit_params=None, iid=True, n_iter=10, n_jobs=-1,
          param_distributions={'n_neighbors': range(1, 31), 'weights': ['uniform', 'distance']},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score=True, scoring='accuracy', verbose=0)

In [7]:
rand.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=17, p=2,
           weights='distance')

In [8]:
rand.best_params_

{'n_neighbors': 17, 'weights': 'distance'}

In [9]:
rand.best_score_

0.97999999999999998

# Using the best parameters before making final predictions

In [10]:
knn = KNeighborsClassifier(n_neighbors = 17, weights = 'uniform')
knn.fit(X, y)

knn.predict([[3, 5, 4, 2], [5, 4, 3, 2]])

array(['versicolor', 'versicolor'], dtype=object)

In [11]:
rand.predict([[3, 5, 4, 2], [5, 4, 3, 2]])    # rand has the best fitted Model by default

array(['versicolor', 'versicolor'], dtype=object)