In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Importing iris dataset from sklearn

In [2]:
from sklearn.datasets import load_iris

In [3]:
dataset = load_iris()

### Analysing Dataset

In [4]:
dataset.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [5]:
print(dataset['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [6]:
print(dataset['data'][:10])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]


In [7]:
print(dataset['target'])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [8]:
print(dataset['target_names'])

['setosa' 'versicolor' 'virginica']


In [9]:
print(dataset['feature_names'])

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


### Spliting the data

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [11]:
X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target']) #(data[feature],data[target])

In [12]:
print(f"X Train Shape : {X_train.shape}")
print(f"y Train Shape : {y_train.shape}")
print(f"X Test Shape : {X_test.shape}")
print(f"y Test Shape : {y_test.shape}")

X Train Shape : (112, 4)
y Train Shape : (112,)
X Test Shape : (38, 4)
y Test Shape : (38,)


In [13]:
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=6, p=2,
                     weights='uniform')

In [14]:
print(f"Accuracy of the model :{knn.score(X_test, y_test)}")

Accuracy of the model :1.0


### Prediction

In [15]:
demo = np.array([[3.1, 1.5, 3.4, 0.6]])
print(f"shape : {demo.shape}")

shape : (1, 4)


In [16]:
pred = knn.predict(demo)
print(f"Predicted : {pred}")
print(f"Predicted flower: {dataset['target_names'][pred]}")

Predicted : [1]
Predicted flower: ['versicolor']


In [17]:
i = 2
pred = knn.predict([X_test[i]])
print(f"Predicted : {pred}")
print(f"Predicted flower: {dataset['target_names'][pred]}")
print(f"Actual flower: {y_test[i]}")

Predicted : [0]
Predicted flower: ['setosa']
Actual flower: 0


In [18]:
y_pred = knn.predict(X_test)
print(f"Test set predictions: {y_pred}")

Test set predictions: [1 2 0 1 1 2 1 0 2 0 2 0 2 1 1 0 2 2 1 1 0 2 0 2 1 2 2 0 0 2 1 2 1 1 1 0 0
 2]
