## Import libraries

In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
import matplotlib.pyplot as plt
plt.rcParams.update(plt.rcParamsDefault)
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

---

## Load Iris Dataset

In [3]:
from sklearn import datasets
iris = datasets.load_iris()

In [4]:
X = iris.data
y = iris.target

---

## Split into training and test dataset

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

---

## PCA

In [6]:
n_components=4

In [7]:
from sklearn.decomposition import PCA
pca = PCA(n_components=n_components)
X_pca_train = pca.fit_transform(X_train)
X_pca_test = pca.transform(X_test)
y_pca_train = y_train
y_pca_test = y_test

## GridSearch

In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid

model_params = {
    '1KNN' : {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [1],
            'metric': ["minkowski", "euclidean", "manhattan", "chebyshev", "cosine", "hamming", "canberra", "braycurtis"]
        }
    },
    '3KNN' : {
        'model': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3],
            'metric': ["minkowski", "euclidean", "manhattan", "chebyshev", "cosine", "hamming", "canberra", "braycurtis"]
        }
    }, 
    'NC' : {
        'model': NearestCentroid(),
        'params': {
            'metric': ["minkowski", "euclidean", "manhattan", "chebyshev", "cosine", "hamming", "canberra", "braycurtis"]
        }
    }
    
}

In [9]:
from sklearn.model_selection import GridSearchCV
scores = []

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=20, return_train_score=True, verbose=3)
    clf.fit(X_pca_train, y_pca_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df_score = pd.DataFrame(scores,columns=['model','best_score','best_params'])

Fitting 20 folds for each of 8 candidates, totalling 160 fits
[CV 1/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=0.833) total time=   0.0s
[CV 2/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 3/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=0.833) total time=   0.0s
[CV 4/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 5/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 6/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=0.833) total time=   0.0s
[CV 7/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 8/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 9/20] END metric=minkowski, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 10/20] END metric=minkowski, n_neighbors=1;

[CV 18/20] END metric=hamming, n_neighbors=1;, score=(train=1.000, test=0.333) total time=   0.0s
[CV 19/20] END metric=hamming, n_neighbors=1;, score=(train=1.000, test=0.333) total time=   0.0s
[CV 20/20] END metric=hamming, n_neighbors=1;, score=(train=1.000, test=0.333) total time=   0.0s
[CV 1/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=0.833) total time=   0.0s
[CV 2/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=0.667) total time=   0.0s
[CV 3/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=0.667) total time=   0.0s
[CV 4/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 5/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 6/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=0.833) total time=   0.0s
[CV 7/20] END metric=canberra, n_neighbors=1;, score=(train=1.000, test=1.000) total time=   0.0s
[CV 8/20] END metric

[CV 18/20] END metric=chebyshev, n_neighbors=3;, score=(train=0.991, test=0.833) total time=   0.0s
[CV 19/20] END metric=chebyshev, n_neighbors=3;, score=(train=0.974, test=1.000) total time=   0.0s
[CV 20/20] END metric=chebyshev, n_neighbors=3;, score=(train=0.974, test=1.000) total time=   0.0s
[CV 1/20] END metric=cosine, n_neighbors=3;, score=(train=0.939, test=0.667) total time=   0.0s
[CV 2/20] END metric=cosine, n_neighbors=3;, score=(train=0.930, test=0.667) total time=   0.0s
[CV 3/20] END metric=cosine, n_neighbors=3;, score=(train=0.930, test=0.833) total time=   0.0s
[CV 4/20] END metric=cosine, n_neighbors=3;, score=(train=0.912, test=1.000) total time=   0.0s
[CV 5/20] END metric=cosine, n_neighbors=3;, score=(train=0.912, test=0.833) total time=   0.0s
[CV 6/20] END metric=cosine, n_neighbors=3;, score=(train=0.930, test=0.833) total time=   0.0s
[CV 7/20] END metric=cosine, n_neighbors=3;, score=(train=0.921, test=1.000) total time=   0.0s
[CV 8/20] END metric=cosine,

[CV 4/20] END metric=canberra;, score=(train=0.719, test=0.833) total time=   0.0s
[CV 5/20] END metric=canberra;, score=(train=0.711, test=1.000) total time=   0.0s
[CV 6/20] END metric=canberra;, score=(train=0.737, test=0.500) total time=   0.0s
[CV 7/20] END metric=canberra;, score=(train=0.711, test=0.500) total time=   0.0s
[CV 8/20] END metric=canberra;, score=(train=0.719, test=0.833) total time=   0.0s
[CV 9/20] END metric=canberra;, score=(train=0.693, test=1.000) total time=   0.0s
[CV 10/20] END metric=canberra;, score=(train=0.737, test=1.000) total time=   0.0s
[CV 11/20] END metric=canberra;, score=(train=0.702, test=0.667) total time=   0.0s
[CV 12/20] END metric=canberra;, score=(train=0.746, test=0.500) total time=   0.0s
[CV 13/20] END metric=canberra;, score=(train=0.719, test=0.833) total time=   0.0s
[CV 14/20] END metric=canberra;, score=(train=0.728, test=0.667) total time=   0.0s
[CV 15/20] END metric=canberra;, score=(train=0.711, test=0.500) total time=   0.0

In [10]:
df_score = pd.DataFrame(scores,columns=['model','best_score','best_params'])
print(df_score)

  model  best_score                                 best_params
0  1KNN    0.958333   {'metric': 'minkowski', 'n_neighbors': 1}
1  3KNN    0.975000  {'metric': 'braycurtis', 'n_neighbors': 3}
2    NC    0.925000                     {'metric': 'minkowski'}
