In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

In [2]:
iris = pd.read_csv('iris.csv')
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [3]:
X = iris.drop(['Species'], axis=1)
X

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
Y = iris['Species']
Y

0         Setosa
1         Setosa
2         Setosa
3         Setosa
4         Setosa
         ...    
145    Virginica
146    Virginica
147    Virginica
148    Virginica
149    Virginica
Name: Species, Length: 150, dtype: object

In [5]:
X = preprocessing.StandardScaler().fit_transform(X)
X[:5]

array([[-0.90068117,  1.03205722, -1.3412724 , -1.31297673],
       [-1.14301691, -0.1249576 , -1.3412724 , -1.31297673],
       [-1.38535265,  0.33784833, -1.39813811, -1.31297673],
       [-1.50652052,  0.10644536, -1.2844067 , -1.31297673],
       [-1.02184904,  1.26346019, -1.3412724 , -1.31297673]])

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=1)
X_train.shape

(135, 4)

In [7]:
KNN_model = KNeighborsClassifier(n_neighbors=3)
KNN_model.fit(X_train, Y_train)
Y_predict = KNN_model.predict(X_test)

In [8]:
accuracy = accuracy_score(Y_test, Y_predict)
accuracy

1.0

In [9]:
cm = confusion_matrix(Y_test, Y_predict)
cm

array([[5, 0, 0],
       [0, 6, 0],
       [0, 0, 4]], dtype=int64)

In [10]:
species = ['setosa', 'versicolor', 'virginica']
cm1 = pd.DataFrame(data=cm, index=species, columns=species)
cm1

Unnamed: 0,setosa,versicolor,virginica
setosa,5,0,0
versicolor,0,6,0
virginica,0,0,4


In [11]:
prediction_output = pd.DataFrame(data=[Y_test.values, Y_predict], index=['test', 'predict'])
prediction_output.transpose()

Unnamed: 0,test,predict
0,Setosa,Setosa
1,Versicolor,Versicolor
2,Versicolor,Versicolor
3,Setosa,Setosa
4,Virginica,Virginica
5,Versicolor,Versicolor
6,Virginica,Virginica
7,Setosa,Setosa
8,Setosa,Setosa
9,Virginica,Virginica


In [12]:
input = np.array([[7.7 ,3.5, 4.6, 4]])
output = KNN_model.predict(input)
output[0]

'Virginica'

In [13]:
fileName = 'IrisModel.pkl'

In [14]:
joblib.dump(KNN_model, fileName)

['IrisModel.pkl']

In [15]:
IrisModel = joblib.load(fileName)

In [16]:
input = np.array([[5.1,	3.5,	1.4,	0.2]])
output = IrisModel.predict(input)
output[0]

'Virginica'