In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [9]:
def load_data(path):
    # print('Loading data...')
    data = pd.read_csv(path, sep = ",", header = None)
    n_samples = len(data)
    # print('Number of samples:', n_samples)
    x = data.values[:, :-1].tolist()
    y = data.values[:,-1].tolist()
    return data, x, y

In [10]:
def split_dataset(x, y, testset_portion, R):
    # print('Split dataset.')
    x_train, x_test, y_train, y_test = train_test_split(x, y, \
                                test_size = testset_portion, \
                                random_state = R)
    return x_train, x_test, y_train, y_test

In [11]:
def feature_scaling(x_train, x_test):
    # print('Feature scaling.')
    sc = StandardScaler()
    sc.fit(x_train)
    x_train_nor = sc.transform(x_train)
    x_test_nor = sc.transform(x_test)
    return x_train_nor, x_test_nor

In [12]:
def train_KNN(x_train, y_train, n):
    # print('Start training.')
    clf = KNeighborsClassifier(n_neighbors = n)
    clf.fit(x_train, y_train)
    return clf

In [13]:
def test(clf, x_test):
    # print('Start testing...')
    y_pred = clf.predict(x_test)
    return y_pred

In [27]:
if __name__=='__main__':
#     path = 'feature_all.csv'

    path = 'on_off_closed.csv'
    testset_portion = 0.2
    random_state = 42

    data, x, y = load_data(path)

    labels = np.unique(np.array(y))

    lb = preprocessing.LabelEncoder()
    lb.fit(labels)
    y=lb.transform(y)

    x_train, x_test, y_train, y_test = split_dataset(x, y, testset_portion, random_state)
    x_train_nor, x_test_nor = feature_scaling(x_train, x_test)

    n = 1

    # print("Training and Testing for KNN:")
    clf_KNN = train_KNN(x_train_nor, y_train, n)
    y_pred_KNN = test(clf_KNN, x_test_nor)

    acc_KNN = accuracy_score(y_test, y_pred_KNN)
    print('\nAccuracy KNN\n', round(acc_KNN, 3))

    print('\nConfusion Matrix KNN\n', confusion_matrix(y_test, y_pred_KNN))
    print('\nClassification Report KNN\n', classification_report(y_test, y_pred_KNN))


Accuracy KNN
 0.971

Confusion Matrix KNN
 [[36  1]
 [ 1 30]]

Classification Report KNN
               precision    recall  f1-score   support

           0       0.97      0.97      0.97        37
           1       0.97      0.97      0.97        31

    accuracy                           0.97        68
   macro avg       0.97      0.97      0.97        68
weighted avg       0.97      0.97      0.97        68

