In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import export_text


In [24]:
df=pd.read_csv('bank-additional-full.csv',sep=';', quotechar='"')
print(df)

       age          job  marital            education  default housing loan  \
0       56    housemaid  married             basic.4y       no      no   no   
1       57     services  married          high.school  unknown      no   no   
2       37     services  married          high.school       no     yes   no   
3       40       admin.  married             basic.6y       no      no   no   
4       56     services  married          high.school       no      no  yes   
...    ...          ...      ...                  ...      ...     ...  ...   
41183   73      retired  married  professional.course       no     yes   no   
41184   46  blue-collar  married  professional.course       no      no   no   
41185   56      retired  married    university.degree       no     yes   no   
41186   44   technician  married  professional.course       no      no   no   
41187   74      retired  married  professional.course       no     yes   no   

         contact month day_of_week  ...  campaign  

In [22]:
X = df.iloc[:, :-1]  
y = df.iloc[:, -1] 

categorical_columns = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_columns)
    ],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

feature_names = preprocessor.get_feature_names_out(input_features=list(X.columns))

clf = DecisionTreeClassifier(random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print('Classification Report:')
print(classification_report(y_test, y_pred))

tree_rules = export_text(clf, feature_names=list(feature_names))
print('Decision Tree Rules:')
print(tree_rules)


Accuracy: 0.89
Classification Report:
              precision    recall  f1-score   support

          no       0.94      0.93      0.94      7303
         yes       0.50      0.53      0.52       935

    accuracy                           0.89      8238
   macro avg       0.72      0.73      0.73      8238
weighted avg       0.89      0.89      0.89      8238

Decision Tree Rules:
|--- remainder__nr.employed <= 5087.65
|   |--- remainder__duration <= 165.50
|   |   |--- remainder__pdays <= 7.50
|   |   |   |--- cat__day_of_week_mon <= 0.50
|   |   |   |   |--- remainder__duration <= 127.50
|   |   |   |   |   |--- remainder__emp.var.rate <= -2.35
|   |   |   |   |   |   |--- cat__month_nov <= 0.50
|   |   |   |   |   |   |   |--- cat__month_sep <= 0.50
|   |   |   |   |   |   |   |   |--- remainder__euribor3m <= 1.17
|   |   |   |   |   |   |   |   |   |--- remainder__age <= 64.00
|   |   |   |   |   |   |   |   |   |   |--- remainder__duration <= 91.50
|   |   |   |   |   |   |   | 