In [1]:
# Bagged Decision Trees for Classification
import pandas as pd
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold, cross_validate

# PILIH DATASET

In [2]:
data = pd.read_csv("datanilai2.csv", sep=";")
data.index+=1
data.head()

Unnamed: 0,NIS,NAMA,L/P,Kehadiran (5%),NH,UTS,UAS,Pengetahuan,Keterampilan,Total,Kelulusan,Sikap
1,15501,ADHIKA NUGRAHA,L,38,91,84,76,84,56,82,1,A
2,15801,ADITYA HIMAWAN HOGANTARA R.L.,L,39,65,85,76,73,55,77,0,A
3,15507,AGUNG RISKY SETYAWAN,L,40,63,80,78,72,97,81,1,A
4,15514,ALIFA AYU MIRANTI HARTONO,P,40,74,80,78,77,81,81,1,A
5,15531,ANYS KHOIRIYAH,P,40,72,82,76,76,95,82,1,A


MENGAMBIL 50% DATA UNTUK TRAINING DAN 50% DATA UNTUK TESTING

In [3]:
X = data.iloc[:90,3:-3].values   #1-90
Y = data.iloc[:90,-2:-1].values.ravel()

In [4]:
X_ = data.iloc[90:,3:-3].values    #91-180
Y_ = data.iloc[90:,-2:-1].values.ravel()

# ALGORITMA PADA BAGGING

In [5]:
estimators = []
#Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X,Y)
dt_pred =dt.predict(X_)
estimators.append(('Decision Tree', dt))
#Naive Bayes
nb = GaussianNB()
nb.fit(X,Y)
nb_pred =nb.predict(X_)
estimators.append(('Naive Bayes',nb))
#KNN
knn = KNeighborsClassifier()
knn.fit(X, Y)
knn_pred = knn.predict(X_)
estimators.append(('KNN', knn))                    


In [6]:
#HASIL PERFORMANCE DECISION TREE SEBELUM BAGGING
clf_dt = DecisionTreeClassifier()
clf_dt = clf_dt.fit(X, Y)
y_predt = clf_dt.predict(X_)
print("Accuracy:", metrics.accuracy_score(Y_, y_predt.ravel()))
print("Precision:", metrics.precision_score(Y_, y_predt.ravel()))
print("Recall:", metrics.recall_score(Y_, y_predt.ravel()))

Accuracy: 0.8777777777777778
Precision: 0.847457627118644
Recall: 0.9615384615384616


In [7]:
#HASIL PERFORMANCE NAIVE BAYES SEBELUM BAGGING
clf_nb = GaussianNB()
clf_nb = clf_nb.fit(X, Y)
y_predn = clf_nb.predict(X_)
print("Accuracy:", metrics.accuracy_score(Y_, y_predn.ravel()))
print("Precision:", metrics.precision_score(Y_, y_predn.ravel()))
print("Recall:", metrics.recall_score(Y_, y_predn.ravel()))

Accuracy: 0.9111111111111111
Precision: 0.9583333333333334
Recall: 0.8846153846153846


In [8]:
#HASIL PERFORMANCE KNN SEBELUM BAGGING
clf_knn = KNeighborsClassifier()
clf_knn = clf_knn.fit(X, Y)
y_predk = clf_knn.predict(X_)
print("Accuracy:", metrics.accuracy_score(Y_, y_predk.ravel()))
print("Precision:", metrics.precision_score(Y_, y_predk.ravel()))
print("Recall:", metrics.recall_score(Y_, y_predk.ravel()))

Accuracy: 0.9555555555555556
Precision: 0.9615384615384616
Recall: 0.9615384615384616


# ENSEMBLE DENGAN METODE BAGGING UNTUK DECISION TREE

In [9]:
ensemble1 = BaggingClassifier(base_estimator=dt, n_jobs=4, random_state=0)
ensemble1.fit(X,Y)
ensemble1_pred = ensemble1.predict(X_)
#prediksi data frame
hai = {
    'Y True' : Y_,
    'Decision Tree' : dt_pred,
    'Bagging DT' : ensemble1_pred
}
print("PREDIKSI ALGORITMA DECISION TREE DENGAN BAGGING")
pd.DataFrame(hai).head(7)

PREDIKSI ALGORITMA DECISION TREE DENGAN BAGGING


Unnamed: 0,Y True,Decision Tree,Bagging DT
0,0,0,0
1,0,0,0
2,1,1,1
3,1,1,1
4,0,0,0
5,1,1,1
6,0,0,0


In [10]:
kfold = KFold(n_splits=5)
scores = ['accuracy', 'precision', 'recall']
hasil = cross_validate(ensemble1, X_, Y_, cv=kfold, scoring=scores, return_train_score=True)
hasil_ = pd.DataFrame(hasil)


In [11]:
print("HASIL ALGORITMA DECISION TREE DENGAN BAGGING")

accuracy = hasil_.test_accuracy.mean() 
precision = hasil_.test_precision.mean() 
recall = hasil_.test_recall.mean() 

print("Accuracy : %0.2f" % accuracy)
print("Precision : %0.2f " % precision)
print("Recall : %0.2f " % recall)

HASIL ALGORITMA DECISION TREE DENGAN BAGGING
Accuracy : 0.93
Precision : 0.97 
Recall : 0.89 


# ENSEMBLE DENGAN METODE BAGGING UNTUK NAIVE BAYES

In [12]:
ensemble2 = BaggingClassifier(base_estimator=nb, n_jobs=4, random_state=0)
ensemble2.fit(X,Y)
ensemble2_pred = ensemble2.predict(X_)
# MENAMPILKAN PREDIKSI KEDALAM DATA FRAME
hai2 = {
    'Y True' : Y_,
    'Naive Bayes' : nb_pred,
    'Bagging NB' : ensemble2_pred
}
print("PREDIKSI ALGORITMA NAIVE BAYES DENGAN BAGGING")
pd.DataFrame(hai2).head(7)

PREDIKSI ALGORITMA NAIVE BAYES DENGAN BAGGING


Unnamed: 0,Y True,Naive Bayes,Bagging NB
0,0,0,0
1,0,0,0
2,1,1,1
3,1,1,1
4,0,0,0
5,1,1,1
6,0,0,0


In [13]:
kfold = KFold(n_splits=5)
scores2 = ['accuracy', 'precision', 'recall']
hasil2 = cross_validate(ensemble2, X_, Y_, cv=kfold, scoring=scores2, return_train_score=True)
hasil2_ = pd.DataFrame(hasil2)


In [14]:
print("HASIL BAGGING DENGAN NAIVE BAYES")

accuracy = hasil2_.test_accuracy.mean() 
precision = hasil2_.test_precision.mean() 
recall = hasil2_.test_recall.mean() 

print("Accuracy : %0.2f" % accuracy)
print("Precision : %0.2f " % precision)
print("Recall : %0.2f " % recall)

HASIL BAGGING DENGAN NAIVE BAYES
Accuracy : 0.91
Precision : 0.95 
Recall : 0.87 


# ENSEMBLE DENGAN METODE BAGGING UNTUK KNN

In [15]:
ensemble3 = BaggingClassifier(base_estimator=knn, n_jobs=3, random_state=0)
ensemble3.fit(X,Y)
ensemble3_pred = ensemble3.predict(X_)
#MENAMPILKAN PREDIKSI KEDALAM DATA FRAME
hai3 = {
    'Y True' : Y_,
    'KNN' : knn_pred,
    'BAGGING KNN' : ensemble3_pred
}
print("PREDIKSI ALGORITMA KNN DENGAN BAGGING")
pd.DataFrame(hai3).head(7)

PREDIKSI ALGORITMA KNN DENGAN BAGGING


Unnamed: 0,Y True,KNN,BAGGING KNN
0,0,0,0
1,0,0,0
2,1,1,1
3,1,1,1
4,0,0,0
5,1,1,1
6,0,0,0


In [16]:
kfold = KFold(n_splits=5)
scores3 = ['accuracy', 'precision', 'recall']
hasil3 = cross_validate(ensemble3, X_, Y_, cv=kfold, scoring=scores2, return_train_score=True)
hasil3_ = pd.DataFrame(hasil3)


In [17]:
print("HASIL BAGGING DENGAN KNN")

accuracy = hasil3_.test_accuracy.mean() 
precision = hasil3_.test_precision.mean() 
recall = hasil3_.test_recall.mean() 

print("Accuracy : %0.2f" % accuracy)
print("Precision : %0.2f " % precision)
print("Recall : %0.2f " % recall)

HASIL BAGGING DENGAN KNN
Accuracy : 0.94
Precision : 0.97 
Recall : 0.94 


KESIMPULAN
1. OPTIMASI AKURASI TERBAIK : DECISION TREE 0.92 menjadi 0.93
2. OPTIMASI PRESISI TERBAIK : DECISION TREE 0.88 menjadi 0.97
3. RECALL MENGALAMI PENURUNAN