# Load Dataset

In [41]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [42]:
df = pd.read_excel('dataset_mhs.xlsx')
df

Unnamed: 0,ID,JK,MASA STUDI,IPK,TEPAT WAKTU,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,1,P,4,2.343750,Bertahan,,,,C1,C2,
1,2,L,4,3.736111,Bertahan,,,,Bertahan,Mengundurkan Diri,
2,3,L,4,2.254902,Bertahan,,,P(Bertahan/MD),0.535714,0.464286,1.0
3,4,L,4,3.086957,Bertahan,,,,,,
4,5,L,4,3.115942,Bertahan,,,P(JK),,,
...,...,...,...,...,...,...,...,...,...,...,...
81,82,L,2,2.521739,Mengundurkan Diri,,,,,,
82,83,L,2,3.043478,Mengundurkan Diri,,,,,,
83,84,L,2,1.869565,Mengundurkan Diri,,,,,,
84,85,L,4,3.260870,?,,,,,,


In [43]:
df = df.iloc[:,0:5]
df

Unnamed: 0,ID,JK,MASA STUDI,IPK,TEPAT WAKTU
0,1,P,4,2.343750,Bertahan
1,2,L,4,3.736111,Bertahan
2,3,L,4,2.254902,Bertahan
3,4,L,4,3.086957,Bertahan
4,5,L,4,3.115942,Bertahan
...,...,...,...,...,...
81,82,L,2,2.521739,Mengundurkan Diri
82,83,L,2,3.043478,Mengundurkan Diri
83,84,L,2,1.869565,Mengundurkan Diri
84,85,L,4,3.260870,?


In [44]:
number = LabelEncoder()
df['JK'] = number.fit_transform(df['JK'])
df['TEPAT WAKTU'] = number.fit_transform(df['TEPAT WAKTU'])

In [45]:
df

Unnamed: 0,ID,JK,MASA STUDI,IPK,TEPAT WAKTU
0,1,1,4,2.343750,1
1,2,0,4,3.736111,1
2,3,0,4,2.254902,1
3,4,0,4,3.086957,1
4,5,0,4,3.115942,1
...,...,...,...,...,...
81,82,0,2,2.521739,2
82,83,0,2,3.043478,2
83,84,0,2,1.869565,2
84,85,0,4,3.260870,0


In [46]:
df_ori = df[0:84]
df_pred = df[84:]

In [47]:
df_ori

Unnamed: 0,ID,JK,MASA STUDI,IPK,TEPAT WAKTU
0,1,1,4,2.343750,1
1,2,0,4,3.736111,1
2,3,0,4,2.254902,1
3,4,0,4,3.086957,1
4,5,0,4,3.115942,1
...,...,...,...,...,...
79,80,0,2,2.260870,2
80,81,1,2,3.000000,2
81,82,0,2,2.521739,2
82,83,0,2,3.043478,2


In [48]:
# Pembagian data sebagai Features/Input dan Target/Label
# Target --> Survived = y
# Features --> negasi Survived = X

X = df_ori.drop(columns='TEPAT WAKTU')
y = df_ori['TEPAT WAKTU']

In [49]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [50]:
y_train[y_train==1].size

31

In [51]:
y_train[y_train==2].size

27

# Naive Bayes

In [52]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB()

In [53]:
nb.score(X_train, y_train)

0.8448275862068966

In [54]:
y_pred = nb.predict(X_test)

In [55]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [56]:
print(confusion_matrix(y_test, y_pred))

[[11  3]
 [ 3  9]]


In [57]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.79      0.79      0.79        14
           2       0.75      0.75      0.75        12

    accuracy                           0.77        26
   macro avg       0.77      0.77      0.77        26
weighted avg       0.77      0.77      0.77        26



# C4.5

In [58]:
from sklearn.tree import DecisionTreeClassifier

In [59]:
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

DecisionTreeClassifier()

In [60]:
dtc.score(X_train, y_train)

1.0

In [64]:
y_pred = dtc.predict(X_test)

In [65]:
print(confusion_matrix(y_test, y_pred))

[[14  0]
 [ 0 12]]


In [66]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        12

    accuracy                           1.00        26
   macro avg       1.00      1.00      1.00        26
weighted avg       1.00      1.00      1.00        26



# K-Nearest Neighbor

In [67]:
from sklearn.neighbors import KNeighborsClassifier

In [68]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [69]:
knn.score(X_train, y_train)

1.0

In [70]:
y_pred = knn.predict(X_test)

In [80]:
print(confusion_matrix(y_test, y_pred))

[[12  2]
 [ 0 12]]


In [82]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       1.00      0.86      0.92        14
           2       0.86      1.00      0.92        12

    accuracy                           0.92        26
   macro avg       0.93      0.93      0.92        26
weighted avg       0.93      0.92      0.92        26

