# K-Nearest Neighbors (kNN) Classification using Scikit-learn- Iris Dataset

![Iris](images/03_iris.png)

In [1]:
# Download Iris Flower Dataset

from IPython.display import IFrame
IFrame('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', width=300, height=200)

### Example training data

![Training data](images/04_knn_dataset.png)

### kNN classification map (K=1)

![1NN classification map](images/04_1nn_map.png)

### kNN classification map (K=5)

![5NN classification map](images/04_5nn_map.png)

### Loading the data

In [2]:
# Import load_iris function from Datasets module
from sklearn.datasets import load_iris

# Save 'bunch' object containing Iris Dataset and its Attributes
iris = load_iris()

# Store Feature matrix in "X"
X = iris.data

# Store Response vector in "y"
y = iris.target

### Printing Data, Shape and Type

In [3]:
# Print the Shapes of X and y
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [4]:
# Print the Iris data
print(iris.data)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

In [5]:
# Print the names of the Four Features
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [6]:
# Print Integers representing the Species of each Observation
print(iris.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [7]:
# Print the Encoding Scheme for Species: 0 = Setosa, 1 = Versicolor, 2 = Virginica
print(iris.target_names)

['setosa' 'versicolor' 'virginica']


In [8]:
# Check Types of the Features and Response
print(type(iris.data))
print(type(iris.target))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [9]:
# Check the Shape of the Features (1st Dimension = No. of Observations, 2nd Dimension = No. of Features)
print(iris.data.shape)

(150, 4)


In [10]:
# Check the Shape of the Response (Single Dimension matching the No. of Observations)
print(iris.target.shape)

(150,)


In [11]:
# Store Feature Matrix in 'X'
X = iris.data

# Store response vector in 'y'
y = iris.target

### Import the required Class

In [12]:
from sklearn.neighbors import KNeighborsClassifier

### Instantiate the Estimator (k = 1)

In [13]:
knn = KNeighborsClassifier(n_neighbors=1)

In [14]:
print(knn)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')


### Model Fitting with Data

In [15]:
knn.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=1, p=2,
           weights='uniform')

### Predicting Response for a New Observation

In [16]:
knn.predict([[3, 5, 4, 2]])

array([2])

### Predicting for Multiple Observations

In [17]:
X_new = [[3, 5, 4, 2], [5, 4, 3, 2]]
knn.predict(X_new)

array([2, 1])

### Using a Different value for k (k = 5)

In [18]:
# instantiate the model (using the value K=5)
knn = KNeighborsClassifier(n_neighbors=5)

# fit the model with data
knn.fit(X, y)

# predict the response for new observations
knn.predict(X_new)

array([1, 1])

# Using a Different Classification Model- Logistic Regression

In [19]:
# import the class
from sklearn.linear_model import LogisticRegression

# instantiate the model (using the default parameters)
logreg = LogisticRegression()

# fit the model with data
logreg.fit(X, y)

# predict the response for new observations
logreg.predict(X_new)



array([2, 0])

## The Predicted Value for Two Observations:
1. kNN = Versicolor, Versicolor
2. Logistic Regression = Virginica, Setosa

### Reference: [Data School](http://www.dataschool.io/)