In [1]:
import numpy
import math
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
import pandas
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import time

In [2]:
class Reduksi:
    def __init__(self, k, split):
        self.k = k + 1
        self.split = split
        
    def getMeanData(self, data):
        mean = sum(data)/self.split
        return round(mean, 2)
    
    def hitung_vik(self, kelas_k, kelasTetangga, dataKelas):
#         print('hitung vik')
        #nilai vi awal
        vi = numpy.zeros(dataKelas.size)
        #index kelas i
        indexKelas = numpy.where(dataKelas == kelas_k)[0]
        for tetangga in kelasTetangga:
            if(tetangga == kelas_k):
                #add data vi(k)
                vi[indexKelas] += 1
#                 print(vi)
            else:
                #menambahkan data vi(selain k)
                xi = numpy.ones(dataKelas.size)
                xi[indexKelas] = 0
                vi = vi + xi
#                 print(xi, '=>', vi)
#         print("----------------------------------------------------------------------")
        return vi
    
    def hitung_norm_vik(self, vi):
#         print('hitung normalisasi semua vi(k)')
        result = []
        for pembilang in vi:
            penyebut = numpy.sum(vi)
            result.append(pembilang/penyebut)
#         print(result)
#         print("----------------------------------------------------------------------")
        return numpy.array(result)

    def hitung_entropy(self, vi_norm):
        result = 0
        for norm in vi_norm:
            if norm == 0:
                result += 0
            else:
                result += norm + math.log(norm, 2)
        return (result*-1)
    
    def reduceData(self):
        result = []
        dataKelas = numpy.unique(self.kelasLatih)
        nn = NearestNeighbors(n_neighbors=self.k)
        nn.fit(self.dataLatih)
        count = 0
        #looping setiap data latih yang akan direduksi
        for data in self.dataLatih:
            index = numpy.where((self.dataLatih == data).all(axis=1))[0][0]
#             print(count, "= index data", numpy.where((self.dataLatih == data).all(axis=1)))
            k = self.kelasLatih[index]
            #mencari data yang terdekat
            distance, index = nn.kneighbors([data])
            #menghapus index tetangga yang pertama karena itu adalah data dirinya sendiri
            new_index = numpy.delete(index, 0, axis=1)
            #proses mengambil data tetangga dan kelasnya
            tetangga = self.dataLatih[new_index[0]]
            kelasTetangga = self.kelasLatih[new_index[0]]
#             print("kelas data:", k, "|| kelas tetangga:", kelasTetangga)
            #hitung vik
            vi = self.hitung_vik(k, kelasTetangga, dataKelas)
            #hitung vik normalisasi
            vi_norm = self.hitung_norm_vik(vi)
            #hitung entropy
            entropy = self.hitung_entropy(vi_norm)
            result.append(round(entropy, 3))
            count += 1
#             print("kelas data:", k, "|| kelas tetangga:", kelasTetangga, "menghasilkan nilai entropy=", entropy)
#             print('data', data, 'dengan kelas', k, 'menghasilkan nilai entropy=', entropy)
#             print("=====================================================================")
#         print(numpy.unique(result), result)
        return numpy.array(result)

    def klasify(self, entropy, x_test, y_test):
        start = time.time()
        reduce = entropy > 0
        after = KNeighborsClassifier(n_neighbors=self.k-1)
        after.fit(self.dataLatih[reduce], self.kelasLatih[reduce])
        # score
        score = round(after.score(x_test, y_test), 2)
        end = time.time()
        time_compute = end - start
        # confussion matrix presisi, recall
        y_predict = after.predict(x_test)
        presisi = precision_score(y_test, y_predict, average='micro')
        recall = recall_score(y_test, y_predict, average='micro')
        return score, presisi, recall, time_compute 
        
    def startSkenarioReduksi(self, dataset, datasetClass):
        kf = KFold(n_splits=self.split, shuffle=True)
        kf.get_n_splits(dataset)
        scores = []
        presisi = []
        recall = []
        waktu = []
        for train_index, test_index in kf.split(dataset):
            self.dataLatih = dataset[train_index]
            self.kelasLatih = datasetClass[train_index]
            dataTest = dataset[test_index]
            classTest = datasetClass[test_index]
            entropy = self.reduceData()
#             print("entropy:", numpy.unique(entropy))
#             result.append(entropy)
            #proses klasifikasi
            score, pres, rec, time_compute = self.klasify(entropy, dataTest, classTest)
            presisi.append(pres)
            recall.append(rec)
            scores.append((score*100))
            waktu.append(time_compute)
        self.score = self.getMeanData(scores)
        self.presisi = self.getMeanData(presisi)
        self.recall = self.getMeanData(recall)
        self.time = round(sum(waktu), 2)

In [3]:
# def loadData():
#         allData = pandas.read_csv("lbp4n4resize64zona16.csv")
#         classData = allData['class']
#         data = allData.drop('class', axis=1)
#         return numpy.array(data), numpy.array(classData)

In [4]:
# dataset, datasetClass = loadData()
# split = 4
# kf = KFold(n_splits=split, shuffle=True)
# kf.get_n_splits(dataset)
# result = []
# scores = []
# for train_index, test_index in kf.split(dataset):
#     dataTrain = dataset[train_index]
#     classTrain = datasetClass[train_index]
#     dataTest = dataset[test_index]
#     classTest = datasetClass[test_index]
#     reduk = Reduksi(dataLatih=dataTrain, kelasLatih=classTrain, k=1)
#     result = entropy = reduk.reduceData()
#     reduk.klasify(entropy, dataTest, classTest)

In [5]:
# print(result)
# hasil = numpy.array(result)
# unik = numpy.unique(hasil)
# for uniq in unik:
#     result = hasil == uniq
#     print(uniq, "=", hasil[result].size)