In [4]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [6]:
# Load data
df = pd.read_csv('../assets/mushrooms.csv')

df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [7]:
# Cek kolom null
df.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [10]:
# Feature selection
X = df.drop(columns=['class'])
y = df['class'].map({'p': 1, 'e': 0})

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X)

# Check the shape of features and instances
print(X.shape)

(8124, 117)


In [11]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [12]:
# Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

# Predict on test set
y_pred_dt = dt.predict(X_test)

# Calculate accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Test set accuracy: {:.2f}".format(acc_dt))

Decision Tree Test set accuracy: 1.00


In [13]:
# AdaBoost
ada = AdaBoostClassifier(n_estimators=50, random_state=1)
ada.fit(X_train, y_train)

# Predict on test set
y_pred_ada = ada.predict(X_test)

# Calculate accuracy
acc_ada = accuracy_score(y_test, y_pred_ada)
print("AdaBoost Test set accuracy: {:.2f}".format(acc_ada))



AdaBoost Test set accuracy: 1.00


In [14]:
# Print classification reports
print("Decision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))
print("AdaBoost Classification Report:\n", classification_report(y_test, y_pred_ada))

Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       820
           1       1.00      1.00      1.00       805

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       820
           1       1.00      1.00      1.00       805

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



In [3]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load data
df = pd.read_csv('../assets/mushrooms.csv')

# Check for null values
print(df.isnull().sum())

# Feature selection
X = df.drop(columns=['class'])  # Drop the target column
y = df['class'].map({'p': 1, 'e': 0})  # Encode labels: 'p' -> 1, 'e' -> 0

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X)

# Check the shape of features and instances
print(X.shape)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

# Predict on test set
y_pred_dt = dt.predict(X_test)

# Calculate accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Test set accuracy: {:.2f}".format(acc_dt))

# Use AdaBoost with the default base estimator (Decision Tree)
ada = AdaBoostClassifier(n_estimators=50, random_state=1)
ada.fit(X_train, y_train)

# Predict on test set
y_pred_ada = ada.predict(X_test)

# Calculate accuracy
acc_ada = accuracy_score(y_test, y_pred_ada)
print("AdaBoost Test set accuracy: {:.2f}".format(acc_ada))

# Optional: Print classification reports
print("Decision Tree Classification Report:\n", classification_report(y_test, y_pred_dt))
print("AdaBoost Classification Report:\n", classification_report(y_test, y_pred_ada))


class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64
(8124, 117)
Decision Tree Test set accuracy: 1.00




AdaBoost Test set accuracy: 1.00
Decision Tree Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       820
           1       1.00      1.00      1.00       805

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       820
           1       1.00      1.00      1.00       805

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

