In [33]:
import numpy as np
import pandas as pd
import xgboost as xgb
import random
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [34]:
class SISA:
    def __init__(self, shards = 5, slices = 5) -> None:
        self.shards = shards
        self.slices = slices
        self.models = [xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss') for _ in range(self.shards)]
    
    def train(self, x : np.ndarray, y : np.ndarray) -> None:
        self.input_shards = np.array_split(x, self.shards)
        self.output_shards = np.array_split(y, self.shards)

        for i, model in enumerate(self.models):
            model.fit(self.input_shards[i], self.output_shards[i])
    
    def predict(self, x : np.ndarray) -> None:
        prediction_results = []
        for current_data in x:
            current_pred = 0
            final_pred = 0
            freq = defaultdict(int)

            for model in self.models:
                current_pred = model.predict([current_data])[0]
                freq[current_pred] += 1

                if freq[current_pred] > freq[final_pred]:
                    final_pred = current_pred
                
            prediction_results.append(final_pred)
        
        return np.array(prediction_results)

    def delete(self, x:np.ndarray, y:int) -> None:
        nsi = self.input_shards.copy()
        nso = self.output_shards.copy()
        for data in x:
            for i, shard in enumerate(nsi):
                for j, element in enumerate(shard):
                    if np.array_equal(data, element):
                        nsi[i] = np.delete(nsi[i], j, axis= 0)
                        nso[i] = np.delete(nso[i], j)
                        break
        
        for i in range(len(nsi)):
            if len(self.input_shards[i]) != len(nsi[i]):
                print(y," " ,len(nsi[1]) + len(nsi[0]))
                self.models[i].fit(nsi[i], nso[i])
                self.input_shards[i] = nsi[i]
                self.output_shards[i] = nso[i]
    
if __name__ == "__main__":
    testing = SISA()
    print(testing.models)


[XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='mlogloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=None, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=None, ...), XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=F

In [35]:
data = pd.read_csv("cleaned_loan.csv")
data

Unnamed: 0,Pekerjaan,Jenis Pinjaman,Plafon (Rp.),Kondisi Joint Income,Kondisi Pisah Harta,Usia,Hasil IDEB SLIK OJK,Tujuan Pemilikan Properti,Lama bekerja,Penghasilan,...,Down Payment,Asset lainnya yang dimiliki,Bidang Usaha,Ukuran Perusahaan,Kondisi Sertifikat Agunan Utama,Jenis Bangunan Utama,Kondisi Properti Utama,Keputusan,Kelayakan Dokumen Pendukung,Keputusan Akhir
0,KARYAWAN,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (< 514,750,000)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",SINGLE : ≥RP 10 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ LIMIT KREDIT,"Keuangan, asuransi, perbankan",Cakupan area : nasional,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,READY STOK,GREEN,LAYAK,Diterima
1,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,PISAH HARTA,36 - 45,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ LIMIT KREDIT,Lainnya,Cakupan area : nasional,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,READY STOK,GREEN,LAYAK,Diterima
2,KARYAWAN,"TAKE OVER + TOP UP KPR, RUMAH TINGGAL","Plafon (> 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,46 - 55,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,PROPERTI/KENDARAAN ≥ LIMIT KREDIT,KLINIK AKUPUNTIR,CAKUPAN AREA : LOKAL,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,SEDANG DIHUNI,GREEN,LAYAK,Diterima
3,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (894,632,536 - 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,36 - 45,"Kol 1, Tidak pernah menunggak",RUMAH TINGGAL (RUMAH KE 2/3),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,PROPERTI/KENDARAAN < 100% LIMIT KREDIT,PERDAGANGAN,CAKUPAN AREA : LOKAL,"SHM, SERTIPIKAT PECAH",RUKO/RUKAN,READY STOK,GREEN,LAYAK,Diterima
4,KARYAWAN,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (< 514,750,000)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 6 JUTA, < RP. 10 JUTA",...,>= 15%,KAS DAN NON KAS ≥ LIMIT KREDIT,COLOUR SALON,CAKUPAN AREA : LOKAL,"SHGB, SERTIPIKAT PECAH",Rumah Tinggal,SEDANG DIHUNI,YELLOW,LAYAK,Diterima
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal > Tipe 70","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",SINGLE : ≥RP 20 JUTA,...,>= 15%,Properti/kendaraan < 50% plafon,"INFORMASI, TELEKOMUNIKASI DAN KOMPUTER",Public listed,MOU - Sertipikat Induk,Rumah Tinggal,Indent,YELLOW,TIDAK,Ditolak
196,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (< 514,750,000)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),≥ 5,"MENIKAH : ≥RP 10 JUTA, <RP 20 JUTA",...,>= 15%,DEPOSITO/INVES/EMAS < 50% plafon,Lainnya,Multinasional,"SHGB, SERTIPIKAT PECAH",Rumah Tinggal,Indent,GREEN,TIDAK,Ditolak
197,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (< 514,750,000)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 6 JUTA, < RP. 10 JUTA",...,>= 15%,DEPOSITO/INVES/EMAS < 50% plafon,Lainnya,CAKUPAN AREA : LOKAL,MOU - Sertipikat Induk,Rumah Tinggal,Indent,YELLOW,TIDAK,Ditolak
198,PENGUSAHA,"Pembelian Baru / Primary, Ruko / Rukan","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,PISAH HARTA,56 - 65,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,null < 50% plafon,Lainnya,Multinasional,MOU - Sertipikat Induk,RUKO/RUKAN,Indent,GREEN,TIDAK,Ditolak


In [36]:
duplicates = data[data.duplicated(keep=False)]
duplicates

Unnamed: 0,Pekerjaan,Jenis Pinjaman,Plafon (Rp.),Kondisi Joint Income,Kondisi Pisah Harta,Usia,Hasil IDEB SLIK OJK,Tujuan Pemilikan Properti,Lama bekerja,Penghasilan,...,Down Payment,Asset lainnya yang dimiliki,Bidang Usaha,Ukuran Perusahaan,Kondisi Sertifikat Agunan Utama,Jenis Bangunan Utama,Kondisi Properti Utama,Keputusan,Kelayakan Dokumen Pendukung,Keputusan Akhir
51,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Pernah Kol 3, 4 atau 5",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",MENIKAH : ≥RP 20 JUTA,...,>= 15%,50% plafon <= Properti/kendaraan<= 100% plafon,Perdagangan / distribusi / eksport-import kebu...,Multinasional,MOU - Sertipikat Induk,Rumah Tinggal,Indent,GREEN,TIDAK,Ditolak
54,PENGUSAHA,"Pembelian Baru / Primary, Rumah Tinggal > Tipe 70","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,46 - 55,"Kol 1, Tidak pernah menunggak",RUMAH TINGGAL (RUMAH KE 2/3),≥ 5,SINGLE : ≥RP 20 JUTA,...,>= 15%,null < 50% plafon,"Farmasi, kosmetik dan alat kedokteran",CAKUPAN AREA : LOKAL,MOU - Sertipikat Induk,Rumah Tinggal,Indent,YELLOW,TIDAK,Ditolak
55,KARYAWAN,"Pembelian Baru / Primary, Apartemen < Tipe 22","Plafon (< 514,750,000)",JOINT INCOME,TIDAK PISAH HARTA,36 - 45,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,50% plafon <= Properti/kendaraan<= 100% plafon,Pendidikan,Cakupan area : nasional,MOU - Sertipikat Induk,Apartemen,Indent,GREEN,TIDAK,Ditolak
56,KARYAWAN,"Pembelian Baru / Primary, Apartemen Tipe 22 - 70","Plafon (514,750,000 - 894,632,536)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 10 JUTA, <RP 20 JUTA",...,>= 15%,null < 50% plafon,Perdagangan / distribusi / eksport-import kebu...,Cakupan area : nasional,MOU - Sertipikat Induk,Apartemen,Indent,GREEN,TIDAK,Ditolak
57,PENGUSAHA,"Pembelian Baru / Primary, Rumah Tinggal > Tipe 70","Plafon (894,632,536 - 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,RUMAH TINGGAL (RUMAH KE 2/3),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ plafon,Perdagangan / distribusi / eksport-import kebu...,Cakupan area : nasional,MOU - Sertipikat Pecah,Rumah Tinggal,Sedang dibangun,YELLOW,TIDAK,Ditolak
60,PENGUSAHA,"Pembelian Baru / Primary, Rumah Tinggal > Tipe 70","Plafon (894,632,536 - 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,RUMAH TINGGAL (RUMAH KE 2/3),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ plafon,Perdagangan / distribusi / eksport-import kebu...,Cakupan area : nasional,MOU - Sertipikat Pecah,Rumah Tinggal,Sedang dibangun,YELLOW,TIDAK,Ditolak
61,KARYAWAN,"Pembelian Baru / Primary, Apartemen Tipe 22 - 70","Plafon (514,750,000 - 894,632,536)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 6 JUTA, < RP. 10 JUTA",...,>= 15%,null < 50% plafon,Lainnya,Cakupan area : nasional,MOU - Sertipikat Induk,Apartemen,Indent,YELLOW,TIDAK,Ditolak
72,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (514,750,000 - 894,632,536)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",MENIKAH : ≥RP 20 JUTA,...,>= 15%,KAS DAN NON KAS ≥ LIMIT KREDIT,"INFORMASI, TELEKOMUNIKASI DAN KOMPUTER",CAKUPAN AREA : LOKAL,MOU - Sertipikat Pecah,Rumah Tinggal,Indent,GREEN,LAYAK,Diterima
78,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (514,750,000 - 894,632,536)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",MENIKAH : ≥RP 20 JUTA,...,>= 15%,KAS DAN NON KAS ≥ LIMIT KREDIT,"INFORMASI, TELEKOMUNIKASI DAN KOMPUTER",CAKUPAN AREA : LOKAL,MOU - Sertipikat Pecah,Rumah Tinggal,Indent,GREEN,LAYAK,Diterima
90,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Pernah Kol 3, 4 atau 5",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",MENIKAH : ≥RP 20 JUTA,...,>= 15%,50% plafon <= Properti/kendaraan<= 100% plafon,Perdagangan / distribusi / eksport-import kebu...,Multinasional,MOU - Sertipikat Induk,Rumah Tinggal,Indent,GREEN,TIDAK,Ditolak


In [37]:
data = data.drop_duplicates()
data

Unnamed: 0,Pekerjaan,Jenis Pinjaman,Plafon (Rp.),Kondisi Joint Income,Kondisi Pisah Harta,Usia,Hasil IDEB SLIK OJK,Tujuan Pemilikan Properti,Lama bekerja,Penghasilan,...,Down Payment,Asset lainnya yang dimiliki,Bidang Usaha,Ukuran Perusahaan,Kondisi Sertifikat Agunan Utama,Jenis Bangunan Utama,Kondisi Properti Utama,Keputusan,Kelayakan Dokumen Pendukung,Keputusan Akhir
0,KARYAWAN,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (< 514,750,000)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",SINGLE : ≥RP 10 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ LIMIT KREDIT,"Keuangan, asuransi, perbankan",Cakupan area : nasional,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,READY STOK,GREEN,LAYAK,Diterima
1,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,PISAH HARTA,36 - 45,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,DEPOSITO/INVES/EMAS ≥ LIMIT KREDIT,Lainnya,Cakupan area : nasional,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,READY STOK,GREEN,LAYAK,Diterima
2,KARYAWAN,"TAKE OVER + TOP UP KPR, RUMAH TINGGAL","Plafon (> 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,46 - 55,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,PROPERTI/KENDARAAN ≥ LIMIT KREDIT,KLINIK AKUPUNTIR,CAKUPAN AREA : LOKAL,"SHM, SERTIPIKAT PECAH",Rumah Tinggal,SEDANG DIHUNI,GREEN,LAYAK,Diterima
3,PENGUSAHA,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (894,632,536 - 1,340,960,436)",JOINT INCOME,TIDAK PISAH HARTA,36 - 45,"Kol 1, Tidak pernah menunggak",RUMAH TINGGAL (RUMAH KE 2/3),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,PROPERTI/KENDARAAN < 100% LIMIT KREDIT,PERDAGANGAN,CAKUPAN AREA : LOKAL,"SHM, SERTIPIKAT PECAH",RUKO/RUKAN,READY STOK,GREEN,LAYAK,Diterima
4,KARYAWAN,"PEMBELIAN BEKAS / SECONDARY, RUMAH TINGGAL","Plafon (< 514,750,000)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 6 JUTA, < RP. 10 JUTA",...,>= 15%,KAS DAN NON KAS ≥ LIMIT KREDIT,COLOUR SALON,CAKUPAN AREA : LOKAL,"SHGB, SERTIPIKAT PECAH",Rumah Tinggal,SEDANG DIHUNI,YELLOW,LAYAK,Diterima
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal > Tipe 70","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5",SINGLE : ≥RP 20 JUTA,...,>= 15%,Properti/kendaraan < 50% plafon,"INFORMASI, TELEKOMUNIKASI DAN KOMPUTER",Public listed,MOU - Sertipikat Induk,Rumah Tinggal,Indent,YELLOW,TIDAK,Ditolak
196,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (< 514,750,000)",JOINT INCOME,TIDAK PISAH HARTA,21 - 35,Data Tidak Tersedia,Rumah Tinggal (Rumah ke 1),≥ 5,"MENIKAH : ≥RP 10 JUTA, <RP 20 JUTA",...,>= 15%,DEPOSITO/INVES/EMAS < 50% plafon,Lainnya,Multinasional,"SHGB, SERTIPIKAT PECAH",Rumah Tinggal,Indent,GREEN,TIDAK,Ditolak
197,KARYAWAN,"Pembelian Baru / Primary, Rumah Tinggal Tipe 2...","Plafon (< 514,750,000)",TIDAK JOINT INCOME,TIDAK PISAH HARTA,21 - 35,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),"≥ 2, < 5","SINGLE : ≥RP 6 JUTA, < RP. 10 JUTA",...,>= 15%,DEPOSITO/INVES/EMAS < 50% plafon,Lainnya,CAKUPAN AREA : LOKAL,MOU - Sertipikat Induk,Rumah Tinggal,Indent,YELLOW,TIDAK,Ditolak
198,PENGUSAHA,"Pembelian Baru / Primary, Ruko / Rukan","Plafon (> 1,340,960,436)",TIDAK JOINT INCOME,PISAH HARTA,56 - 65,"Kol 1, Tidak pernah menunggak",Rumah Tinggal (Rumah ke 1),≥ 5,MENIKAH : ≥RP 20 JUTA,...,>= 15%,null < 50% plafon,Lainnya,Multinasional,MOU - Sertipikat Induk,RUKO/RUKAN,Indent,GREEN,TIDAK,Ditolak


In [38]:
labelEncoder = {}
for col in data.columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    labelEncoder[col] = le

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = le.fit_transform(data[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [39]:
X = data.drop(["Keputusan Akhir"], axis=1)
y = data["Keputusan Akhir"]

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
print(X_train.values)

[[0 7 3 ... 0 0 0]
 [0 1 0 ... 1 0 0]
 [0 1 2 ... 1 0 0]
 ...
 [0 4 0 ... 4 2 0]
 [1 5 1 ... 0 0 0]
 [0 4 1 ... 4 0 0]]


In [42]:
print(y_train.values)

[0 0 0 1 1 0 0 0 1 0 1 0 1 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1
 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 0 0 1 1 0 0 1 1 1 0
 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 1 0 0 1 0 0 1 0 1 1 1 0 1 1 1 0 1 0 1 0 1 1
 1 1 0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0]


In [43]:
sisa_classifier = SISA(2,5)

In [44]:
sisa_classifier.train(X_train.values, y_train.values)

In [45]:
print("Akurasi Model SISA: ", accuracy_score(sisa_classifier.predict(X_test.values), y_test))

Akurasi Model SISA:  0.9444444444444444


In [46]:
exp_results = pd.DataFrame(columns=['Iteration', 'Accuracy'])
exp_results

Unnamed: 0,Iteration,Accuracy


In [47]:
total = len(sisa_classifier.input_shards[1]) + len(sisa_classifier.input_shards[0])
print(total)

144


In [48]:
len(X_train.values)

144

In [49]:
accuracy = 1
iteration = 1
max_random_number = 143
deleted_index = []
while accuracy > 0.7:
    new_index = False
    while new_index == False:
        index = random.randint(0, max_random_number)
        if index not in deleted_index:
            deleted_index.append(index)
            break
    sisa_classifier.delete([X_train.values[index]], iteration)
    y_pred = sisa_classifier.predict(X_test.values)
    accuracy = accuracy_score(y_pred, y_test)
    exp_results = pd.concat([exp_results, pd.DataFrame([{'Iteration': iteration, 'Index' : index,'Accuracy': accuracy}])], ignore_index=True)
    iteration += 1

1   143
2   142
3   141
4   140
5   139
6   138
7   137
8   136
9   135
10   134
11   133
12   132
13   131
14   130
15   129
16   128
17   127
18   126
19   125
20   124
21   123
22   122
23   121
24   120
25   119
26   118
27   117
28   116
29   115
30   114
31   113
32   112
33   111
34   110
35   109
36   108
37   107
38   106
39   105
40   104
41   103
42   102
43   101
44   100
45   99
46   98
47   97
48   96
49   95
50   94
51   93
52   92
53   91
54   90
55   89
56   88
57   87
58   86
59   85
60   84
61   83
62   82
63   81
64   80
65   79
66   78
67   77
68   76
69   75
70   74
71   73
72   72
73   71
74   70
75   69
76   68
77   67
78   66
79   65
80   64
81   63
82   62
83   61
84   60
85   59
86   58
87   57
88   56
89   55
90   54
91   53
92   52
93   51
94   50
95   49
96   48
97   47
98   46
99   45
100   44
101   43
102   42
103   41
104   40
105   39
106   38
107   37
108   36
109   35
110   34
111   33
112   32
113   31


In [50]:
data_result = pd.DataFrame(exp_results)
data_result

Unnamed: 0,Iteration,Accuracy,Index
0,1,0.944444,15.0
1,2,0.944444,36.0
2,3,0.944444,95.0
3,4,0.944444,28.0
4,5,0.916667,56.0
...,...,...,...
108,109,0.888889,127.0
109,110,0.861111,37.0
110,111,0.861111,102.0
111,112,0.916667,68.0


In [51]:
total = len(sisa_classifier.input_shards[0]) + len(sisa_classifier.input_shards[1])
total_y = len(sisa_classifier.output_shards[0]) + len(sisa_classifier.output_shards[1])
print(total)
print(total_y)

31
31


In [52]:
index_sisa = []
for i in range(len(X_train)):
    if i in data_result['Index'].values:
        continue
    else:
        index_sisa.append(i)
print(len(index_sisa))

31


In [53]:
print(index_sisa)

[0, 5, 11, 14, 21, 38, 39, 43, 44, 53, 59, 61, 63, 65, 67, 73, 74, 85, 104, 105, 108, 112, 118, 120, 122, 123, 131, 133, 134, 136, 142]


In [54]:
data_found = []
data_nfound = []
for i in index_sisa:
    data = X_train.values[i]
    for j in range(len(sisa_classifier.input_shards)):
        for k in range(len(sisa_classifier.input_shards[j])):
            if np.array_equal(sisa_classifier.input_shards[j][k], i):
                data_found.append(i)
            else:
                data_nfound.append(i)

In [55]:
unique_array = list(set(data_found))
print(unique_array)

[]


In [56]:
unique_array = list(set(data_nfound))
print(unique_array)

[0, 131, 5, 133, 134, 136, 11, 14, 142, 21, 38, 39, 43, 44, 53, 59, 61, 63, 65, 67, 73, 74, 85, 104, 105, 108, 112, 118, 120, 122, 123]


In [57]:
len(unique_array)

31

In [58]:
filtered_data = data_result.loc[data_result["Index"] == 1]
filtered_data

Unnamed: 0,Iteration,Accuracy,Index
53,54,0.916667,1.0


In [59]:
test = X_train.values[0]
test

array([ 0,  7,  3,  1,  1,  0,  0,  2,  1,  3,  1,  1,  2,  0,  1,  0, 23,
        3,  0,  2,  0,  0,  0])

In [60]:
flag = 0
for i in range(len(sisa_classifier.input_shards[0])):
    if np.array_equal(sisa_classifier.input_shards[0][i], test):
        flag= 1
if flag ==0 :
    print("data berhasil dihapus dari model")
else:
    print("percobaan gagal dihapus")

percobaan gagal dihapus


In [61]:
# data_hapus = X_train.values[0].copy()
# data_hapus

In [62]:
# flag = 0
# for i in range(len(sisa_classifier.input_shards[0])):
#     if np.array_equal(sisa_classifier.input_shards[0][i], data_hapus):
#         flag= 1
# if flag ==0 :
#     print("yess berhasil")
# else:
#     print("percobaan gagal")

In [63]:
# sisa_classifier.delete([X_train.values[0]])

In [64]:
# flag = 0
# for i in range(len(sisa_classifier.input_shards[0])):
#     if np.array_equal(sisa_classifier.input_shards[0][i], data_hapus):
#         flag= 1
# if flag ==0 :
#     print("yess berhasil")
# else:
#     print("percobaan gagal")