**Import Library**

In [24]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier

from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [25]:
df = pd.read_csv('dataset/mushrooms.csv')
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [26]:
df.describe()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
count,8124,8124,8124,8124,8124,8124,8124,8124,8124,8124,...,8124,8124,8124,8124,8124,8124,8124,8124,8124,8124
unique,2,6,4,10,2,9,2,2,2,12,...,4,9,9,1,4,3,5,9,6,7
top,e,x,y,n,f,n,f,c,b,b,...,s,w,w,p,w,o,p,w,v,d
freq,4208,3656,3244,2284,4748,3528,7914,6812,5612,1728,...,4936,4464,4384,8124,7924,7488,3968,2388,4040,3148


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

**Feature Selection**

In [28]:
le = LabelEncoder()
df_encoded = df.apply(le.fit_transform)

In [29]:
X = df_encoded.drop('class', axis=1)
y = df_encoded['class']

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Split Data and Feature Extraction**

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

**Tugas 1**

In [32]:
# Melatih model Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)

In [33]:
# Hyperparameter tuning untuk Decision Tree
dt_param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_grid_search = GridSearchCV(dt_model, param_grid=dt_param_grid, cv=5)
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_

In [34]:
# Melatih model RandomForest
rf_model = RandomForestClassifier(random_state=42)

In [35]:
# Hyperparameter tuning untuk RandomForest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_grid_search = GridSearchCV(rf_model, param_grid=rf_param_grid, cv=5)
rf_grid_search.fit(X_train, y_train)
rf_best_model = rf_grid_search.best_estimator_

In [36]:
# Menguji model Decision Tree
y_pred_dt = dt_best_model.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)

In [37]:
# Menguji model RandomForest
y_pred_rf = rf_best_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

In [38]:
# Membandingkan akurasi kedua model
print("Akurasi Decision Tree: {:.2f}%".format(accuracy_dt * 100))
print("Akurasi RandomForest: {:.2f}%".format(accuracy_rf * 100))

Akurasi Decision Tree: 100.00%
Akurasi RandomForest: 100.00%


**Tugas 2**

In [39]:
# Hyperparameter tuning untuk Decision Tree
dt_param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_grid_search = GridSearchCV(dt_model, param_grid=dt_param_grid, cv=5)
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_

In [40]:
# Melatih model AdaBoost
adaboost_model = AdaBoostClassifier(base_estimator=dt_best_model, random_state=42)

In [41]:
# Hyperparameter tuning untuk AdaBoost
adaboost_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0]
}

adaboost_grid_search = GridSearchCV(adaboost_model, param_grid=adaboost_param_grid, cv=5)
adaboost_grid_search.fit(X_train, y_train)
adaboost_best_model = adaboost_grid_search.best_estimator_

In [42]:
# Menguji model Decision Tree
y_pred_dt = dt_best_model.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)

In [43]:
# Menguji model AdaBoost
y_pred_adaboost = adaboost_best_model.predict(X_test)
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)

In [44]:
# Membandingkan akurasi kedua model
print("Akurasi Decision Tree: {:.2f}%".format(accuracy_dt * 100))
print("Akurasi AdaBoost: {:.2f}%".format(accuracy_adaboost * 100))

Akurasi Decision Tree: 100.00%
Akurasi AdaBoost: 100.00%


**Tugas 3**

In [45]:
# Load the diabetes dataset
df2 = pd.read_csv('dataset/diabetes.csv')
df2.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [46]:
df2.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [48]:
# Memisahkan fitur (X) dan label (y)
X = df2.drop(columns=['Outcome'])
y = df2['Outcome']

In [49]:
# Membagi dataset menjadi data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [50]:
# Membuat model Logistic Regression
logistic_model = LogisticRegression(solver='liblinear', random_state=42)

In [51]:
# Membuat model SVM dengan kernel polynomial
svm_model = SVC(kernel='poly', degree=3, random_state=42)

In [52]:
# Membuat model Decision Tree
decision_tree_model = DecisionTreeClassifier(random_state=42)

In [53]:
# Menggabungkan ketiga model dalam ensemble voting
ensemble_model = VotingClassifier(
    estimators=[('lr', logistic_model), ('svm', svm_model), ('dt', decision_tree_model)],
    voting='hard'
)

In [54]:
# Melatih model ensemble voting
ensemble_model.fit(X_train, y_train)

In [55]:
# Menguji model ensemble voting
y_pred_ensemble = ensemble_model.predict(X_test)
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)

In [56]:
# Menampilkan akurasi model ensemble voting
print("Akurasi Ensemble Voting: {:.2f}%".format(accuracy_ensemble * 100))

Akurasi Ensemble Voting: 77.92%
