In [1]:
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.metrics import classification_report, f1_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import QuantileTransformer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import xgboost
from sklearn.discriminant_analysis import StandardScaler
from sklearn.preprocessing import MinMaxScaler
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
df = pd.read_csv("./datasets/heart.csv")


df = pd.read_csv("./datasets/heart.csv")
print(df.shape)
# --- Creating Dummy Variables for cp, thal and slope ---
cp = pd.get_dummies(df['cp'], prefix='cp')
thal = pd.get_dummies(df['thal'], prefix='thal')
slope = pd.get_dummies(df['slope'], prefix='slope')

# --- Merge Dummy Variables to Main Data Frame ---
frames = [df, cp, thal, slope]
df = pd.concat(frames, axis = 1)

df = df.drop(columns = ['cp', 'thal', 'slope'])
# --- Seperating Dependent Features ---
x = df.drop(['target'], axis=1)
y = df['target']
comulmn_names = x.columns
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
logre = LogisticRegression(max_iter=1000, random_state=42, solver='liblinear')
logre.fit(x_train, y_train)
y_pred = logre.predict(x_test)
display(accuracy_score(y_test, y_pred))

dt = DecisionTreeClassifier(max_depth=3, min_samples_leaf=5, criterion='entropy', min_samples_split=5,
                                       splitter='random', random_state=1)
# fitting the model
dt.fit(x_train, y_train)

# calculating the predictions
y_pred = dt.predict(x_test)

# displaying the test accuracy
print("The test accuracy score of Decision Tree is ",
      accuracy_score(y_test, y_pred), f1_score(y_test, y_pred, average='macro'))

# instantiating the object
rf =  RandomForestClassifier(n_estimators=20, random_state=2,max_depth=5)

# fitting the model
rf.fit(x_train, y_train)

y_pred_rf = rf.predict(x_test)
# Display test accuracy and F1 score
print("The test accuracy score of Random Forest is ",
        accuracy_score(y_test, y_pred_rf), f1_score(y_test, y_pred_rf, average='macro'))
print(classification_report(y_test,y_pred_rf))
# instantiate the classifier
xgb_classifier = xgboost.XGBClassifier(n_estimators=100, random_state=1)

# fitting the model
xgb_classifier.fit(x_train, y_train)

y_pred_xgb = xgb_classifier.predict(x_test)
# Display test accuracy and F1 score
print("The test accuracy score of Gradient Boosting Classifier is ",
        accuracy_score(y_test, y_pred_xgb), f1_score(y_test, y_pred_xgb, average='macro'))
# instantiating the object and fitting
clf = SVC(kernel='linear', C=1, random_state=42).fit(x_train, y_train)

# predicting the values
y_pred_svm = clf.predict(x_test)

# displaying the test accuracy
print("The test accuracy score of SVM is ", accuracy_score(
    y_test, y_pred_svm), f1_score(y_test, y_pred_svm, average='macro'))
perm_importance = permutation_importance(
    clf, x_test, y_test, n_repeats=30, random_state=42)

# Get feature importances
rf_importances = rf.feature_importances_
xgb_importances = xgb_classifier.feature_importances_
svm_importances = perm_importance
logireg_importances = abs(logre.coef_[0])

# Create a DataFrame with feature importances
feature_importances = pd.DataFrame({
    'Feature': comulmn_names,
    'RandomForest': rf_importances,
    'XGBoost': xgb_importances,
    'logistice regression': logireg_importances,
    'svm': svm_importances.importances_mean,

})

display(feature_importances)


(1025, 14)


0.8390243902439024

The test accuracy score of Decision Tree is  0.8390243902439024 0.8387788660899407
The test accuracy score of Random Forest is  0.9073170731707317 0.9072817729534147
              precision    recall  f1-score   support

           0       0.97      0.85      0.91       107
           1       0.86      0.97      0.91        98

    accuracy                           0.91       205
   macro avg       0.91      0.91      0.91       205
weighted avg       0.91      0.91      0.91       205

The test accuracy score of Gradient Boosting Classifier is  1.0 1.0
The test accuracy score of SVM is  0.8341463414634146 0.8334766819571866


Unnamed: 0,Feature,RandomForest,XGBoost,logistice regression,svm
0,age,0.058656,0.024118,0.084504,-0.008943
1,sex,0.02617,0.022753,0.665423,0.018049
2,trestbps,0.028066,0.016627,0.258174,-0.006992
3,chol,0.038274,0.015957,0.221437,-0.002114
4,fbs,0.003936,0.012483,0.016439,0.000976
5,restecg,0.007087,0.008784,0.258026,-0.010244
6,thalach,0.110539,0.017297,0.384973,-0.002439
7,exang,0.082139,0.028399,0.420773,-0.006992
8,oldpeak,0.100282,0.0305,0.684682,0.016098
9,ca,0.102079,0.064413,0.762777,0.021951


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Melt the DataFrame for easy plotting
melted = feature_importances.melt(id_vars='Feature', var_name='Model', value_name='Importance')

# Create a grid plot using Seaborn
grid_plot = sns.catplot(
    data=melted, kind='bar',
    x='Feature', y='Importance', hue='Model',
    palette='viridis', alpha=0.8, height=6, aspect=2
)

# Rotate x-axis labels for better readability
plt.xticks(rotation=90)

# Set plot title and labels
plt.title('Feature Importances by Model')
plt.xlabel('Features')
plt.ylabel('Importance')

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
from sklearn.inspection import PartialDependenceDisplay
import matplotlib.pyplot as plt
feature_names = [0, 1, 2, 3]

# Decision Tree
fig, ax = plt.subplots(figsize=(12, 6))
ax.set_title("XGBoost - Partial Dependence")
PartialDependenceDisplay.from_estimator(dt, X_train, features=["thal","age"], ax=ax,categorical_features=["thal"])
plt.show()


