## Tugas Praktikum 2

Terdapat dataset mushroom. Berdasarkan dataset tersebut, bandingkan peforma antara algoritma Decision Tree dan AdaBoost. Gunakan tunning hyperparameter untuk mendapatkan parameter dan akurasi yang terbaik.

In [15]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [16]:
# Load data
df = pd.read_csv('asset/mushrooms.csv')

df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [17]:
# Applying one-hot encoding to the entire dataset except the target column ('class')
df_encoded = pd.get_dummies(df, columns=df.columns[1:])
df_encoded['class'] = df['class'].map({'p': 1, 'e': 0})

# Display the first few rows of the transformed dataset
df_encoded = df_encoded.astype(int)
df_encoded.head()

Unnamed: 0,class,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,...,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,1,0,0,0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
3,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,1,0,0,1,...,0,0,0,0,1,0,0,0,0,0


In [18]:
# Seleksi Fitur
X = df_encoded.drop(columns=['class'])  # Fitur (tanpa kolom 'class')
y = df_encoded['class']

# Split data training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Membuat model Decision Tree
dt = DecisionTreeClassifier()

# Hyperparameter tunning
param_grid_dt = {
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

grid_search_dt = GridSearchCV(dt, param_grid_dt, cv=5)
grid_search_dt.fit(X_train, y_train)

print("Best parameters for Decision Tree: ", grid_search_dt.best_params_)

Best parameters for Decision Tree:  {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}


In [19]:
# Membuat model Decision Tree dengan parameter terbaik
dt_tunned = DecisionTreeClassifier(criterion='gini', max_depth= None, min_samples_leaf= 1, min_samples_split= 2)

dt_tunned.fit(X_train, y_train)

# Memprediksi label set test
y_pred_dt = dt_tunned.predict(X_test)

#  menghitung accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Test set accuracy: {:.2f}".format(acc_dt))
print(f"Test set accuracy: {acc_dt}")

Test set accuracy: 1.00
Test set accuracy: 1.0


In [20]:
ada = AdaBoostClassifier(random_state=42)

# Hyperparameter Tunning
param_grid_ada = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1, 10]
}
grid_ada = GridSearchCV(AdaBoostClassifier(random_state=42), param_grid_ada, cv=5, n_jobs=-1, verbose=1)
grid_ada.fit(X_train, y_train)

print("Best parameters for Random Forest: ", grid_ada.best_params_)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters for Random Forest:  {'learning_rate': 1, 'n_estimators': 50}


In [21]:
# Membuat model AdaBoost dengan parameter terbaik
ada_tunned = AdaBoostClassifier(learning_rate=1, n_estimators= 50)

ada_tunned.fit(X_train,y_train)

# Memprediksi Label set test 
y_pred_rf = ada_tunned.predict(X_test)

#  menghitung accuracy
acc_ada = accuracy_score(y_test, y_pred_dt)
print("Test set accuracy: {:.2f}".format(acc_ada))
print(f"Test set accuracy: {acc_ada}")

Test set accuracy: 1.00
Test set accuracy: 1.0


In [22]:
print("Perbandingan Akurasi")
print(f"Accuracy Score Decision Tree: {acc_dt}")
print(f"Accuracy Score AdaBoost: {acc_ada}")

Perbandingan Akurasi
Accuracy Score Decision Tree: 1.0
Accuracy Score AdaBoost: 1.0
