In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('../DATA/mushrooms.csv')

In [None]:
X = df.drop('class', axis=1)
y = df['class']

In [None]:
X = pd.get_dummies(X, drop_first=True).astype(int)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    "n_estimators":[1,5,10,20,40,100],
    'max_depth':[3,4,5,6], 
    'learning_rate': [.05, .1, .2]
}

In [None]:
gb_model = GradientBoostingClassifier()

In [None]:
model = GridSearchCV(gb_model, param_grid)

In [None]:
model.fit(X_train, y_train)

In [None]:
model.best_params_

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [None]:
y_predicted = model.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_predicted)
cm

In [None]:
print(classification_report(y_test, y_predicted))

In [None]:
disp = ConfusionMatrixDisplay(cm, display_labels=['Class 0', 'Class 1'])
disp.plot()

In [None]:
features = model.best_estimator_.feature_importances_
features

In [None]:
fit_imp = pd.DataFrame(index=X.columns, data=features, columns=['Importance'])
fit_imp.head()

In [None]:
fit_imp = fit_imp[fit_imp['Importance'] > 0.0005].sort_values('Importance')

In [None]:
plt.figure(figsize=(16, 6), dpi=290)
sns.barplot(
    data=fit_imp, 
    x=fit_imp.index,
    y='Importance',
    hue='Importance'
    
)

plt.xticks(rotation=90)
plt.show()