In [89]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sklearn
import warnings

warnings.filterwarnings('ignore')

In [119]:
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']

In [90]:
df=sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [91]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [92]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [93]:
X = df.drop('species', axis=1)
y = df['species']

In [94]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [95]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [96]:
model_KNN = KNeighborsClassifier(n_neighbors=3) # change n value
model_KNN.fit(X_train, y_train)

In [97]:
model_KNN.score(X_test, y_test)

1.0

In [98]:
from sklearn.svm import SVC

In [99]:
model_SVM = SVC(gamma='auto')
model_SVM.fit(X_train, y_train)

In [100]:
model_SVM.score(X_test, y_test)

1.0

Use GridSearchCV

In [101]:
#grid search cv

from sklearn.model_selection import GridSearchCV

In [102]:
classifier = GridSearchCV((model_SVM), {
    'C': [1,10,20,30],
    'kernel': ['rbf','linear']
}, cv=5, return_train_score=False)

classifier.fit(X, y)

In [103]:
classifier.cv_results_

{'mean_fit_time': array([0.00270181, 0.00232501, 0.00280619, 0.00245595, 0.0023118 ,
        0.00373259, 0.00344424, 0.00221939]),
 'std_fit_time': array([4.40865761e-04, 2.52387885e-05, 8.57172455e-04, 2.56822608e-04,
        4.07246850e-05, 2.88043375e-04, 7.47315913e-04, 5.64379433e-05]),
 'mean_score_time': array([0.0019484 , 0.00174527, 0.0020237 , 0.00178118, 0.00174375,
        0.00291982, 0.00231962, 0.00163116]),
 'std_score_time': array([2.64020708e-04, 1.36273686e-05, 3.87437630e-04, 7.31303123e-05,
        6.36986269e-05, 2.22373999e-04, 5.86025232e-04, 1.55192732e-05]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20, 30, 30],
              mask=[False, False, False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear',
                    'rbf', 'linear'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value=np.str_('?'),
        

In [104]:
results = pd.DataFrame(classifier.cv_results_)
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002702,0.000441,0.001948,0.000264,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.002325,2.5e-05,0.001745,1.4e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.002806,0.000857,0.002024,0.000387,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.002456,0.000257,0.001781,7.3e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.002312,4.1e-05,0.001744,6.4e-05,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.003733,0.000288,0.00292,0.000222,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6
6,0.003444,0.000747,0.00232,0.000586,30,rbf,"{'C': 30, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.933333,1.0,0.96,0.038873,7
7,0.002219,5.6e-05,0.001631,1.6e-05,30,linear,"{'C': 30, 'kernel': 'linear'}",1.0,1.0,0.9,0.9,1.0,0.96,0.04899,7


In [105]:
results.columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_C', 'param_kernel', 'params', 'split0_test_score',
       'split1_test_score', 'split2_test_score', 'split3_test_score',
       'split4_test_score', 'mean_test_score', 'std_test_score',
       'rank_test_score'],
      dtype='object')

In [106]:
results[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667
6,30,rbf,0.96
7,30,linear,0.96


KNN

In [107]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [108]:
X = df.drop('species', axis=1)
y = df['species']

In [109]:
y_encoded = pd.get_dummies(y)

In [110]:
y_encoded = y_encoded.astype(int)

In [111]:
y_encoded

Unnamed: 0,setosa,versicolor,virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
...,...,...,...
145,0,0,1
146,0,0,1
147,0,0,1
148,0,0,1


In [112]:
scalar = StandardScaler()
X_encoded = scalar.fit_transform(X)

In [113]:
X_encoded_train, X_encoded_test, y_encoded_train, y_encoded_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

In [114]:
model_KNN = KNeighborsClassifier(n_neighbors=11) # change n value
model_KNN.fit(X_encoded, y_encoded)

In [115]:
model_KNN.score(X_encoded_test, y_encoded_test)

1.0

In [116]:
classifier2 = GridSearchCV((model_KNN), {
    'n_neighbors': [1,3,5,7,9,11,13,1],
    'weights': ['uniform', 'distance'],
    'algorithm': ['ball_tree', 'kd_tree', 'brute']
}, cv=5, return_train_score=False)

classifier2.fit(X_encoded, y_encoded)

In [117]:
results_knn.columns

Index(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time',
       'param_algorithm', 'param_n_neighbors', 'param_weights', 'params',
       'split0_test_score', 'split1_test_score', 'split2_test_score',
       'split3_test_score', 'split4_test_score', 'mean_test_score',
       'std_test_score', 'rank_test_score'],
      dtype='object')

In [118]:
results_knn = pd.DataFrame(classifier2.cv_results_)
results_knn.sort_values(by=['mean_test_score'],ascending=False)[['param_n_neighbors','param_weights','param_algorithm', 'mean_test_score']]

Unnamed: 0,param_n_neighbors,param_weights,param_algorithm,mean_test_score
0,1,uniform,ball_tree,0.913333
1,1,distance,ball_tree,0.913333
15,1,distance,ball_tree,0.913333
14,1,uniform,ball_tree,0.913333
30,1,uniform,kd_tree,0.913333
31,1,distance,kd_tree,0.913333
17,1,distance,kd_tree,0.913333
16,1,uniform,kd_tree,0.913333
32,1,uniform,brute,0.913333
33,1,distance,brute,0.913333
