In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [9]:
def load_data(path):
    print('Loading data...')
    data = pd.read_csv(path, sep = ",", header = None)
    n_samples = len(data)
    print('Number of samples:', n_samples)
    x = data.values[:, :-1].tolist()
    y = data.values[:,-1].tolist()
    return data, x, y

In [10]:
def split_dataset(x, y, testset_portion):
    print('Split dataset.')
    x_train, x_test, y_train, y_test = train_test_split(x, y, \
                                test_size = testset_portion, \
                                random_state = None)
    return x_train, x_test, y_train, y_test

In [11]:
def feature_scaling(x_train, x_test):
    print('Feature scaling.')
    sc = StandardScaler()
    sc.fit(x_train)
    x_train_nor = sc.transform(x_train)
    x_test_nor = sc.transform(x_test)
    return x_train_nor, x_test_nor

In [12]:
def train_KNN(x_train, y_train, n):
    print('Start training.')
    clf = KNeighborsClassifier(n_neighbors = n)
    clf.fit(x_train, y_train)
    return clf

In [13]:
def test(clf, x_test):
    print('Start testing...')
    y_pred = clf.predict(x_test)
    return y_pred

In [14]:
if __name__=='__main__':
#     path = 'feature_all.csv'

    path = 'on_off_open.csv'
    testset_portion = 0.2

    data, x, y = load_data(path)

    labels = np.unique(np.array(y))

    lb = preprocessing.LabelEncoder()
    lb.fit(labels)
    y=lb.transform(y)

    x_train, x_test, y_train, y_test = split_dataset(x, y, testset_portion)
    x_train_nor, x_test_nor = feature_scaling(x_train, x_test)

    n = 2

    print("Training and Testing for KNN:")
    clf_KNN = train_KNN(x_train_nor, y_train, n)
    y_pred_KNN = test(clf_KNN, x_test_nor)

    acc_KNN = accuracy_score(y_test, y_pred_KNN)
    print('\nAccuracy KNN\n', round(acc_KNN, 3))

    # confusion_mat_KNN = confusion_matrix(y_test, y_pred_KNN)
    # print('\nConfusion Matrix KNN', confusion_mat_KNN)
    print('\nConfusion Matrix KNN\n', confusion_matrix(y_test, y_pred_KNN))
    print('\nClassification Report KNN\n', classification_report(y_test, y_pred_KNN))

Loading data...
Number of samples: 336
Split dataset.
Feature scaling.
Training and Testing for KNN:
Start training.
Start testing...

Accuracy KNN
 0.721

Confusion Matrix KNN
 [[27  5]
 [14 22]]

Classification Report KNN
               precision    recall  f1-score   support

           0       0.66      0.84      0.74        32
           1       0.81      0.61      0.70        36

    accuracy                           0.72        68
   macro avg       0.74      0.73      0.72        68
weighted avg       0.74      0.72      0.72        68

