In [1]:
import numpy as np
import pandas as pd
import random
import math
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
def count_label(data):
    distinct = [data[0][-1]]
    for i in range(len(data)):
        check = True
        for j in range(len(distinct)):
            if distinct[j] == data[i][-1]:
                check = False
        if check:
            distinct.append(data[i][-1])
    return np.array(distinct).astype(int), len(distinct)

In [3]:
def evaluation(arr_acc):
    mean = np.mean(arr_acc)
    varian = sum((arr_acc - mean) * (arr_acc - mean)) / len(arr_acc)
    std = math.sqrt(varian)
    return mean, std

In [4]:
def handleMissValue(data):
    filled_data = np.array([[0 for i in range(len(data[0]))] for j in range(len(data))])
    cLabel = count_label(data)[1]
    arr_data = [[] for j in range(cLabel)]
    mean = []
    for i in range(len(data)):
        arr_data[int(data[i][-1])-1].append(data[i])
    counter = 0
    for i in range(len(arr_data)):
        mean.append(np.nanmean(arr_data[i], axis=0))
        for j in range(len(arr_data[i])):
            for k in range(len(arr_data[i][j])):
                if np.isnan(arr_data[i][j][k]):
                    arr_data[i][j][k] = mean[i][k]
            filled_data[counter] = arr_data[i][j]
            counter += 1
    return filled_data

In [5]:
def randomize(arr):
    for i in range(len(arr)-1, 0, -1):
        j = random.randint(0, i)
        arr[i], arr[j] = arr[j], arr[i]
    return arr

In [6]:
def normalize(data):
    data = np.array(data).astype(float)
    max_value = np.max(data, axis=0)
    min_value = np.min(data, axis=0)
    for i in range(len(data)):
        for j in range(len(data[0])-1):
            data[i][j] = (data[i][j] - min_value[j]) / (max_value[j] - min_value[j])
    return data

In [7]:
def extraction(file, is_normal = True, is_random = True):
    data = pd.read_excel(file)
    data = handleMissValue(np.array(data))
    data = pd.DataFrame(data)
    data.drop_duplicates()
    data = np.array(data)
    if is_normal:
        data = normalize(data)
    if is_random:
        data = randomize(data.tolist())
    return data

In [8]:
def crossValidation(data, cross_val, cross_index, label_index):
    split_test = int(len(data) * (1 / cross_val))
    change_index = (cross_index - 1) * split_test
    
    for i in range(split_test):
        j = (i + change_index)
        data[i], data[j] = data[j], data[i]
    
    data_train_feature = pd.DataFrame(data[split_test:]).drop([label_index], axis=1)
    data_train_label = pd.DataFrame(pd.DataFrame(data[split_test:]), columns=[label_index]).astype(np.int)
    data_test_feature = pd.DataFrame(data[0:split_test]).drop([label_index], axis=1)
    data_test_label = pd.DataFrame(pd.DataFrame(data[0:split_test]), columns=[label_index]).astype(np.int)
    
    data_train_feature = np.array(data_train_feature)
    data_train_label = np.array(data_train_label)
    data_test_feature = np.array(data_test_feature)
    data_test_label = np.array(data_test_label)

    return data, data_train_feature, data_train_label, data_test_feature, data_test_label

In [9]:
##transform into 1d label array 
def transformArrLabel(X_label, Y_label):
    X_transformed = []
    Y_transformed = []
    for i in range(len(X_label)):
        X_transformed.append(X_label[i][0])
    for i in range(len(Y_label)):
        Y_transformed.append(Y_label[i][0])
    return np.array(X_transformed), np.array(Y_transformed)

In [10]:
k = 15
file = 'Dataset Pegawai.xlsx'
cross_val = 10
label_index = 9 #Kinerja
normalisasi = True
random_data = True
data = extraction(file, normalisasi, random_data)

In [11]:
k_range = range(1,16)

arr_mean_acc = [0 for i in range(k)]
arr_std_acc  = [0 for i in range(k)]

for k in k_range:
    arr_acc = [0 for i in range(cross_val)]
    for i in range(1, cross_val + 1):
        data, train_f, train_l, test_f, test_l = crossValidation(data, cross_val, i, label_index)
        train_l, test_l = transformArrLabel(train_l, test_l)
        knn = KNeighborsClassifier(n_neighbors = k, algorithm='brute')
        knn.fit(train_f, train_l)
        y_pred = knn.predict(test_f)
        acc = metrics.accuracy_score(test_l, y_pred)
        arr_acc[i-1] = acc
        
        target_names = ['Cukup', 'Baik', 'Sangat Baik']
        print(classification_report(test_l, y_pred, target_names=target_names))
        print("Cross-"+str(i)+" Acc:"+ str(acc))
        
        print("---------------------------------------------------------")
    mean, std = evaluation(arr_acc)
    arr_mean_acc[k-1] = mean
    arr_std_acc[k-1] = std
    print('K-{0} Mean : {1} with Standar Deviation : {2}.'.format(k,mean,std))
    print("==========================================================")

              precision    recall  f1-score   support

       Cukup       0.94      0.96      0.95       256
        Baik       0.96      0.95      0.96       326
 Sangat Baik       1.00      0.96      0.98        76

    accuracy                           0.96       658
   macro avg       0.97      0.96      0.96       658
weighted avg       0.96      0.96      0.96       658

Cross-1 Acc:0.958966565349544
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.90      0.96      0.93       253
        Baik       0.96      0.91      0.93       326
 Sangat Baik       0.96      0.97      0.97        79

    accuracy                           0.94       658
   macro avg       0.94      0.95      0.94       658
weighted avg       0.94      0.94      0.94       658

Cross-2 Acc:0.9361702127659575
---------------------------------------------------------
              precision    recall  f1-score   support

      

              precision    recall  f1-score   support

       Cukup       0.94      0.97      0.96       257
        Baik       0.96      0.95      0.95       310
 Sangat Baik       1.00      0.95      0.97        91

    accuracy                           0.96       658
   macro avg       0.97      0.96      0.96       658
weighted avg       0.96      0.96      0.96       658

Cross-9 Acc:0.9574468085106383
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.91      0.99      0.95       249
        Baik       0.99      0.92      0.95       324
 Sangat Baik       0.99      0.98      0.98        85

    accuracy                           0.95       658
   macro avg       0.96      0.96      0.96       658
weighted avg       0.96      0.95      0.95       658

Cross-10 Acc:0.9544072948328267
---------------------------------------------------------
K-2 Mean : 0.9466565349544073 with Standar Deviation : 0.00

              precision    recall  f1-score   support

       Cukup       0.91      0.99      0.95       245
        Baik       0.98      0.93      0.95       339
 Sangat Baik       1.00      0.95      0.97        74

    accuracy                           0.95       658
   macro avg       0.96      0.96      0.96       658
weighted avg       0.96      0.95      0.95       658

Cross-7 Acc:0.9544072948328267
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.93      0.97      0.95       255
        Baik       0.96      0.94      0.95       322
 Sangat Baik       1.00      0.94      0.97        81

    accuracy                           0.95       658
   macro avg       0.96      0.95      0.96       658
weighted avg       0.95      0.95      0.95       658

Cross-8 Acc:0.9513677811550152
---------------------------------------------------------
              precision    recall  f1-score   support

     

              precision    recall  f1-score   support

       Cukup       0.94      0.98      0.96       249
        Baik       0.98      0.94      0.96       324
 Sangat Baik       0.98      1.00      0.99        85

    accuracy                           0.96       658
   macro avg       0.97      0.97      0.97       658
weighted avg       0.96      0.96      0.96       658

Cross-4 Acc:0.9635258358662614
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.88      0.97      0.92       244
        Baik       0.96      0.90      0.93       335
 Sangat Baik       0.99      0.96      0.97        79

    accuracy                           0.93       658
   macro avg       0.95      0.94      0.94       658
weighted avg       0.94      0.93      0.93       658

Cross-5 Acc:0.9346504559270516
---------------------------------------------------------
              precision    recall  f1-score   support

     

              precision    recall  f1-score   support

       Cukup       0.93      0.98      0.95       255
        Baik       0.96      0.94      0.95       322
 Sangat Baik       1.00      0.94      0.97        81

    accuracy                           0.95       658
   macro avg       0.96      0.95      0.96       658
weighted avg       0.96      0.95      0.95       658

Cross-2 Acc:0.9544072948328267
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.93      0.99      0.96       245
        Baik       0.98      0.94      0.96       316
 Sangat Baik       0.99      0.98      0.98        97

    accuracy                           0.96       658
   macro avg       0.97      0.97      0.97       658
weighted avg       0.96      0.96      0.96       658

Cross-3 Acc:0.9620060790273556
---------------------------------------------------------
              precision    recall  f1-score   support

     

              precision    recall  f1-score   support

       Cukup       0.91      0.97      0.94       253
        Baik       0.98      0.92      0.95       326
 Sangat Baik       1.00      1.00      1.00        79

    accuracy                           0.95       658
   macro avg       0.96      0.97      0.96       658
weighted avg       0.95      0.95      0.95       658

Cross-10 Acc:0.9513677811550152
---------------------------------------------------------
K-9 Mean : 0.9544072948328269 with Standar Deviation : 0.009490878417018846.
              precision    recall  f1-score   support

       Cukup       0.90      0.99      0.94       253
        Baik       0.99      0.91      0.95       326
 Sangat Baik       1.00      1.00      1.00        79

    accuracy                           0.95       658
   macro avg       0.96      0.97      0.96       658
weighted avg       0.96      0.95      0.95       658

Cross-1 Acc:0.952887537993921
-----------------------------------------

              precision    recall  f1-score   support

       Cukup       0.95      0.97      0.96       257
        Baik       0.96      0.96      0.96       310
 Sangat Baik       1.00      0.96      0.98        91

    accuracy                           0.96       658
   macro avg       0.97      0.96      0.97       658
weighted avg       0.96      0.96      0.96       658

Cross-8 Acc:0.9635258358662614
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.95      0.97      0.96       249
        Baik       0.98      0.95      0.96       324
 Sangat Baik       0.98      1.00      0.99        85

    accuracy                           0.97       658
   macro avg       0.97      0.97      0.97       658
weighted avg       0.97      0.97      0.97       658

Cross-9 Acc:0.9650455927051672
---------------------------------------------------------
              precision    recall  f1-score   support

     

              precision    recall  f1-score   support

       Cukup       0.92      0.98      0.95       245
        Baik       0.98      0.94      0.96       339
 Sangat Baik       1.00      0.97      0.99        74

    accuracy                           0.96       658
   macro avg       0.97      0.96      0.97       658
weighted avg       0.96      0.96      0.96       658

Cross-6 Acc:0.958966565349544
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.94      0.98      0.96       255
        Baik       0.97      0.95      0.96       322
 Sangat Baik       1.00      0.95      0.97        81

    accuracy                           0.96       658
   macro avg       0.97      0.96      0.96       658
weighted avg       0.96      0.96      0.96       658

Cross-7 Acc:0.958966565349544
---------------------------------------------------------
              precision    recall  f1-score   support

       

              precision    recall  f1-score   support

       Cukup       0.88      0.96      0.92       244
        Baik       0.96      0.91      0.93       335
 Sangat Baik       1.00      0.97      0.99        79

    accuracy                           0.93       658
   macro avg       0.95      0.95      0.95       658
weighted avg       0.94      0.93      0.93       658

Cross-4 Acc:0.9346504559270516
---------------------------------------------------------
              precision    recall  f1-score   support

       Cukup       0.90      0.97      0.93       256
        Baik       0.96      0.91      0.94       326
 Sangat Baik       0.99      0.93      0.96        76

    accuracy                           0.94       658
   macro avg       0.95      0.94      0.94       658
weighted avg       0.94      0.94      0.94       658

Cross-5 Acc:0.9376899696048632
---------------------------------------------------------
              precision    recall  f1-score   support

     