In [13]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [14]:
# Importing a dataset 'iris.csv'
data = pd.read_csv('../datasets/iris.csv')

# data.head()
print(data.columns)

# Identifying the class names
data.variety.unique()

Index(['sepal.length', 'sepal.width', 'petal.length', 'petal.width',
       'variety'],
      dtype='object')


array(['Setosa', 'Versicolor', 'Virginica'], dtype=object)

In [15]:
# Splitting dataset into features and target variable

# Features
X = data[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
X.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

# Target Variable
y = data['variety'].replace({'Setosa':0,'Versicolor':1,'Virginica':2})

In [16]:
# Splitting dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=2,shuffle=True)

In [17]:
# Creating a model
model = KNeighborsClassifier(n_neighbors=3)

# Training the model
model.fit(X_train,y_train)

# Forming a prediction
y_pred = np.array(model.predict(X_test))
print("Predicted values:\n",list(y_pred))

y_test = np.array(y_test)
print("\nActual values:\n",list(y_test))

Predicted values:
 [0, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 0, 0, 2, 0, 2, 2, 0, 1, 2, 1, 0, 2, 1, 1, 2, 1, 1, 2, 1, 0]

Actual values:
 [0, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 0, 0, 2, 0, 2, 2, 0, 1, 2, 1, 0, 2, 1, 1, 2, 1, 1, 2, 1, 0]


In [18]:
# Differences between predicted and actual values
diff = y_pred - y_test

print("Differences between predicted and actual values are")
print(diff)

print("\nNumber of incorrect classifications are ",(len(diff) - list(diff).count(0)))

Differences between predicted and actual values are
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0]

Number of incorrect classifications are  0


In [19]:
# Printing accuracy score
print("The model is",(accuracy_score(y_pred,y_test))*100,"% accurate!")

The model is 100.0 % accurate!


In [20]:
# Printing a confusion matrix
print("The confusion matrix below describes how accurate the model is")

cm = confusion_matrix(y_test,y_pred)

print("\nConfusion Matrix\n\n", cm)

print("\nTrue Positives(TP) ", cm[0,0])

print("\nTrue Negatives(TN) ", cm[0,1])

print("\nFalse Positives(FP) ", cm[1,0])

print("\nFalse Negatives(FN) ", cm[1,1])

The confusion matrix below describes how accurate the model is

Confusion Matrix

 [[17  0  0]
 [ 0 15  0]
 [ 0  0 13]]

True Positives(TP)  17

True Negatives(TN)  0

False Positives(FP)  0

False Negatives(FN)  15


In [21]:
# Printing a classification report
print("Classification Report\n")
print(classification_report(y_test,y_pred))

Classification Report

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

