In [73]:
import numpy as np
import pandas as pd
import csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [74]:
#----The last column is the class
def get_data_from_csv(file_name,class_names):

    data = np.genfromtxt(file_name, delimiter=',', names=True, filling_values=0, dtype=None, \
ndmin=1)
    # print(f"Extracting from {file_name} and got {data}")
#----Extract all except the last column name as feature names
    feature_names = list(data.dtype.names[:-1])
#----Extract unique values from last column as class names
    # print(f"All classes names {data[data.dtype.names[-1]]}")
    if not class_names:
        class_names = np.unique(data[data.dtype.names[-1]]).tolist()
        # print(f"Using new class names {class_names}")
    # else:
        # print(f"Using previous class names {class_names}")
    # print(f"Feature names {feature_names} Class names {class_names}")

#----Extract all except the last column as features
    features = data[feature_names].tolist()
    # print(f"Features are {features}")
#----Extract last column as classes, and convert to integers
    all_classes = list(data[data.dtype.names[-1]])
    # print(f"All classes {all_classes}")
    classes = np.zeros(len(all_classes), dtype=int)
    for index, value in enumerate(all_classes):
        classes[index] = class_names.index(value)
    classes = classes.tolist()
    # print(f"All integer classes {classes}")

    return features,classes,feature_names,class_names

In [75]:
features_train,classes_train,feature_names,class_names = get_data_from_csv("/content/drive/MyDrive/Colab Notebooks/training.csv",None);
features_test,classes_test,feature_names,class_names = get_data_from_csv("/content/drive/MyDrive/Colab Notebooks/test.csv",class_names);
feature_names,class_names,features_train,classes_train,features_test,classes_test

(['Volume', 'Doors'],
 ['Jeep', 'Pickup', 'SUV', 'Sedan', 'Van'],
 [(145, 4),
  (165, 5),
  (113, 4),
  (134, 4),
  (137, 4),
  (101, 4),
  (168, 4),
  (128, 4),
  (88, 4),
  (120, 5),
  (135, 5),
  (104, 4),
  (121, 4),
  (94, 4),
  (132, 4),
  (105, 4),
  (106, 4),
  (134, 4),
  (136, 5),
  (130, 4),
  (106, 4),
  (136, 5),
  (114, 5),
  (120, 4),
  (118, 4),
  (122, 4),
  (115, 4),
  (72, 4),
  (134, 5),
  (122, 4),
  (95, 4),
  (102, 4),
  (172, 5),
  (103, 4),
  (100, 4),
  (109, 4),
  (110, 5),
  (102, 4),
  (104, 3),
  (144, 5),
  (96, 4),
  (53, 2),
  (115, 4),
  (104, 4),
  (135, 4),
  (97, 4),
  (97, 4),
  (89, 4),
  (132, 4),
  (112, 4),
  (103, 4),
  (106, 4),
  (99, 4),
  (121, 4),
  (105, 5),
  (131, 4),
  (106, 4),
  (132, 4),
  (144, 5),
  (105, 4),
  (135, 4),
  (160, 5),
  (85, 2),
  (171, 4),
  (95, 4),
  (170, 4),
  (123, 4),
  (96, 4),
  (300, 5),
  (95, 4),
  (122, 4),
  (94, 4),
  (132, 4),
  (122, 4),
  (110, 2),
  (97, 4),
  (100, 4),
  (167, 5),
  (145, 4),
  

In [76]:
def get_predictions(K,features_train,classes_train,features_test):

#----Initialize the K-NN Classifier
    knn = KNeighborsClassifier(n_neighbors=K)
#----Train the model
    knn.fit(features_train, classes_train)
#----Predict for the test data
    predictions = knn.predict(features_test)

    return knn,predictions

In [77]:
knn,predictions = get_predictions(3,features_train,classes_train,features_test)
knn,predictions

(KNeighborsClassifier(n_neighbors=3),
 array([3, 3, 3, 3, 2, 3, 3, 2, 4, 0, 3, 3, 2, 0, 0, 3, 3, 3, 2, 2, 2, 3,
        3, 3, 3, 0, 3, 3, 1, 2, 2, 3]))

In [78]:
def print_predictions(knn,predictions,features_test,classes_test,class_names):

#----Check the probability (How sure is the model?)
    probability = knn.predict_proba(features_test)
#----Output the result for each test data
    for index, value in enumerate(classes_test):
        print(f"The {class_names[value]} is classified as: {class_names[predictions[index]]}")
        print(f"Confidence is {probability[index][predictions[index]] * 100:.0f}%")

#----Compute the accuracy
    accuracy = accuracy_score(classes_test,predictions)
    print(f"The accuracy is {accuracy}")

In [79]:
k_values =[]
for k in range(1,31):
  knn, predictions = get_predictions(k, features_train, classes_train, features_test)
  k_values.append(accuracy_score(classes_test, predictions))

knn, predictions
k_values


[0.5625,
 0.40625,
 0.65625,
 0.59375,
 0.5625,
 0.59375,
 0.71875,
 0.71875,
 0.71875,
 0.71875,
 0.71875,
 0.6875,
 0.6875,
 0.6875,
 0.6875,
 0.6875,
 0.71875,
 0.6875,
 0.6875,
 0.6875,
 0.75,
 0.75,
 0.78125,
 0.78125,
 0.78125,
 0.78125,
 0.75,
 0.75,
 0.6875,
 0.6875]

In [80]:
print_predictions(knn,predictions,features_test,classes_test,class_names)

The Sedan is classified as: Sedan
Confidence is 50%
The Sedan is classified as: Sedan
Confidence is 70%
The Sedan is classified as: Sedan
Confidence is 60%
The Sedan is classified as: Sedan
Confidence is 50%
The SUV is classified as: SUV
Confidence is 43%
The Sedan is classified as: Sedan
Confidence is 63%
The Sedan is classified as: Sedan
Confidence is 53%
The SUV is classified as: Sedan
Confidence is 50%
The SUV is classified as: SUV
Confidence is 73%
The SUV is classified as: SUV
Confidence is 63%
The Sedan is classified as: Sedan
Confidence is 67%
The Sedan is classified as: Sedan
Confidence is 53%
The SUV is classified as: Sedan
Confidence is 53%
The SUV is classified as: SUV
Confidence is 63%
The Jeep is classified as: Sedan
Confidence is 63%
The Sedan is classified as: Sedan
Confidence is 60%
The Sedan is classified as: Sedan
Confidence is 57%
The Sedan is classified as: Sedan
Confidence is 67%
The SUV is classified as: SUV
Confidence is 43%
The Pickup is classified as: SUV
Conf