In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [19]:
# Load dataset
df = pd.read_csv('mushrooms.csv')
df.head()
print(df.isnull().sum())

class                        0
cap-shape                    0
cap-surface                  0
cap-color                    0
bruises                      0
odor                         0
gill-attachment              0
gill-spacing                 0
gill-size                    0
gill-color                   0
stalk-shape                  0
stalk-root                   0
stalk-surface-above-ring     0
stalk-surface-below-ring     0
stalk-color-above-ring       0
stalk-color-below-ring       0
veil-type                    0
veil-color                   0
ring-number                  0
ring-type                    0
spore-print-color            0
population                   0
habitat                      0
dtype: int64


In [20]:
# Menghilangkan nilai kosong di dataframe
df = df.replace('?', np.nan)
df = df.dropna()

# Mengkonversi nilai string menjadi numerik
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col])

In [21]:
# Verify column names
print(df.columns)

# Ensure 'class' column exists
if 'class ' in df.columns:
    X = df.drop('class ', axis=1)
    y = df['class ']
else:
    print("Column 'class' not found in dataframe.")


Index(['class ', 'cap-shape ', 'cap-surface ', 'cap-color ', 'bruises ',
       'odor ', 'gill-attachment ', 'gill-spacing ', 'gill-size ',
       'gill-color ', 'stalk-shape ', 'stalk-root ',
       'stalk-surface-above-ring ', 'stalk-surface-below-ring ',
       'stalk-color-above-ring ', 'stalk-color-below-ring ', 'veil-type ',
       'veil-color ', 'ring-number ', 'ring-type ', 'spore-print-color ',
       'population ', 'habitat'],
      dtype='object')


In [22]:
# Pisahkan data menjadi training set dan testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definisikan parameter untuk Decision Tree
param_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

# Definisikan parameter untuk RandomForest
param_rf = {
    'n_estimators': [10, 50, 100, 200],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 5, 10]
}

In [23]:
# Lakukan tuning hyperparameter untuk Decision Tree
grid_dt = GridSearchCV(DecisionTreeClassifier(), param_dt, cv=5, scoring='accuracy')
grid_dt.fit(X_train, y_train)

# Lakukan tuning hyperparameter untuk RandomForest
grid_rf = GridSearchCV(RandomForestClassifier(), param_rf, cv=5, scoring='accuracy')
grid_rf.fit(X_train, y_train)

# Prediksi menggunakan model terbaik
y_pred_dt = grid_dt.best_estimator_.predict(X_test)
y_pred_rf = grid_rf.best_estimator_.predict(X_test)

# Evaluasi model
print("Decision Tree:")
print("Akurasi:", accuracy_score(y_test, y_pred_dt))
print("Laporan klasifikasi:")
print(classification_report(y_test, y_pred_dt))
print("Matriks konfusi:")
print(confusion_matrix(y_test, y_pred_dt))

print("\nRandom Forest:")
print("Akurasi:", accuracy_score(y_test, y_pred_rf))
print("Laporan klasifikasi:")
print(classification_report(y_test, y_pred_rf))
print("Matriks konfusi:")
print(confusion_matrix(y_test, y_pred_rf))

Decision Tree:
Akurasi: 1.0
Laporan klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Matriks konfusi:
[[843   0]
 [  0 782]]

Random Forest:
Akurasi: 1.0
Laporan klasifikasi:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       843
           1       1.00      1.00      1.00       782

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Matriks konfusi:
[[843   0]
 [  0 782]]
