In [32]:
from sklearn import datasets
wine = datasets.load_wine()

In [33]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [34]:
wine

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [35]:
target_names = list(wine.target_names)
target_names

['class_0', 'class_1', 'class_2']

In [36]:
feature_names = list(wine.feature_names)
feature_names

['alcohol',
 'malic_acid',
 'ash',
 'alcalinity_of_ash',
 'magnesium',
 'total_phenols',
 'flavanoids',
 'nonflavanoid_phenols',
 'proanthocyanins',
 'color_intensity',
 'hue',
 'od280/od315_of_diluted_wines',
 'proline']

In [37]:
print(wine.data[0:2])

[[1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00
  2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 1.120e+01 1.000e+02 2.650e+00 2.760e+00
  2.600e-01 1.280e+00 4.380e+00 1.050e+00 3.400e+00 1.050e+03]]


In [38]:
X = wine.data
y = wine.target

In [39]:
X.shape

(178, 13)

In [40]:
y.shape

(178,)

In [41]:
from tabulate import tabulate
from sklearn.metrics import accuracy_score

splits = [.1,.2,.3,.4,.5,.6,.7,.8,.9]
results = []

knn = KNeighborsClassifier(n_neighbors=3)
for i in range(len(splits)):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=splits[i], random_state=0)
    
    knn.fit(X_train, y_train)
    predicted = knn.predict(X_test)
    
    accuracy = accuracy_score(y_test, predicted)
    
    results.append([i+1, str(f"{splits[i]*100}%"),  X_train.shape, X_test.shape, accuracy])
    
print(tabulate(results, headers=['SNo', 'Test Size', 'Training Data', 'Testing Data', 'Accuracy'], tablefmt='grid'))

+-------+-------------+-----------------+----------------+------------+
|   SNo | Test Size   | Training Data   | Testing Data   |   Accuracy |
|     1 | 10.0%       | (160, 13)       | (18, 13)       |   0.722222 |
+-------+-------------+-----------------+----------------+------------+
|     2 | 20.0%       | (142, 13)       | (36, 13)       |   0.777778 |
+-------+-------------+-----------------+----------------+------------+
|     3 | 30.0%       | (124, 13)       | (54, 13)       |   0.703704 |
+-------+-------------+-----------------+----------------+------------+
|     4 | 40.0%       | (106, 13)       | (72, 13)       |   0.694444 |
+-------+-------------+-----------------+----------------+------------+
|     5 | 50.0%       | (89, 13)        | (89, 13)       |   0.696629 |
+-------+-------------+-----------------+----------------+------------+
|     6 | 60.0%       | (71, 13)        | (107, 13)      |   0.738318 |
+-------+-------------+-----------------+----------------+------

In [42]:
tab = tabulate(results, headers=['SNo', 'Test Size', 'Training Data', 'Testing Data', 'Accuracy'], tablefmt='grid')

In [43]:
headers=['SNo', 'Test Size', 'Training Data', 'Testing Data', 'Accuracy']

In [44]:
import csv

with open('accuracy.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(headers)
    writer.writerows(results)

In [45]:
csv_file = open("knn_output.csv", 'w')
csv_file.write(tab)
csv_file.close()

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
X_train.shape

(124, 13)

In [51]:
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

In [52]:
from sklearn.metrics import classification_report

report = classification_report(y_test, y_predict, target_names=target_names)
print(report)

              precision    recall  f1-score   support

     class_0       0.30      0.41      0.35        17
     class_1       0.63      0.48      0.55        25
     class_2       0.33      0.33      0.33        12

    accuracy                           0.43        54
   macro avg       0.42      0.41      0.41        54
weighted avg       0.46      0.43      0.44        54



In [53]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_predict)
cm

array([[ 7,  6,  4],
       [ 9, 12,  4],
       [ 7,  1,  4]], dtype=int64)