Importing Necessary Libraries:

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.metrics import accuracy_score
from sklearn import datasets

## Loading the Data:

In [5]:
wine = datasets.load_wine()

## Exploring the Data:

In [7]:
wine.feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [8]:
wine.target_names

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [16]:
"""Top 5 records of the Feature Set"""
wine.data[0:5]

array([[1.423e+01, 1.710e+00, 2.430e+00, 1.560e+01, 1.270e+02, 2.800e+00,
        3.060e+00, 2.800e-01, 2.290e+00, 5.640e+00, 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, 1.120e+01, 1.000e+02, 2.650e+00,
        2.760e+00, 2.600e-01, 1.280e+00, 4.380e+00, 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, 1.860e+01, 1.010e+02, 2.800e+00,
        3.240e+00, 3.000e-01, 2.810e+00, 5.680e+00, 1.030e+00, 3.170e+00,
        1.185e+03],
       [1.437e+01, 1.950e+00, 2.500e+00, 1.680e+01, 1.130e+02, 3.850e+00,
        3.490e+00, 2.400e-01, 2.180e+00, 7.800e+00, 8.600e-01, 3.450e+00,
        1.480e+03],
       [1.324e+01, 2.590e+00, 2.870e+00, 2.100e+01, 1.180e+02, 2.800e+00,
        2.690e+00, 3.900e-01, 1.820e+00, 4.320e+00, 1.040e+00, 2.930e+00,
        7.350e+02]])

In [17]:
"""Record of the Target Set"""
wine.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [20]:
wine.data.shape

(178, 13)

In [21]:
wine.target.shape

(178,)

## Splitting the Data:

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(wine.data, wine.target, test_size=0.3)

## Building the KNN classifier model:

In [44]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
y_pred = knn.predict(X_test)

In [45]:
y_pred

array([0, 2, 1, 1, 2, 2, 0, 1, 2, 1, 1, 1, 0, 2, 1, 2, 2, 1, 1, 0, 2, 2,
       1, 2, 1, 2, 0, 0, 2, 2, 0, 2, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 1, 1,
       2, 0, 2, 2, 1, 2, 1, 1, 2, 1])

In [46]:
"""Accuracy"""
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
print(confusion_matrix(Y_test, y_pred))
print(classification_report(Y_test, y_pred))
print(accuracy_score(Y_test, y_pred))

[[10  0  2]
 [ 1 13 10]
 [ 2  5 11]]
             precision    recall  f1-score   support

          0       0.77      0.83      0.80        12
          1       0.72      0.54      0.62        24
          2       0.48      0.61      0.54        18

avg / total       0.65      0.63      0.63        54

0.6296296296296297


## Optimizing KNNs Performance (Parameter Tuning):

### The parameters in Decision Tree are:
#### n_neighbors:
n_neighbors represents the number of neighbors to use for kneighbors queries
- default = '5'
- The determination of the K value varies greatly depending on the case.

#### p:
This is the power parameter for the Minkowski metric.
- default=2
- When p = 1, this is equivalent to using manhattan_distance (l1)
- When p = 2, this is equivalent to using euliddean_distance(l2)

In [51]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, Y_train)
y_pred = knn.predict(X_test)

In [52]:
print(confusion_matrix(Y_test, y_pred))
print(classification_report(Y_test, y_pred))
print(accuracy_score(Y_test, y_pred))

[[10  0  2]
 [ 1 15  8]
 [ 2  6 10]]
             precision    recall  f1-score   support

          0       0.77      0.83      0.80        12
          1       0.71      0.62      0.67        24
          2       0.50      0.56      0.53        18

avg / total       0.66      0.65      0.65        54

0.6481481481481481
