In [None]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.tree import DecisionTreeClassifier 
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer 
from sklearn.pipeline import Pipeline

In [None]:
# Load data
bank1 = pd.read_csv("bank1.csv")

In [None]:
# Define roles for diamantes1.csv
y = np.where(bank1['y']=='yes',1,0)
X = bank1.drop(bank1.columns[[0,9]],axis=1)

In [None]:
# Define the preprocessing pipeline
categorical_features = X.select_dtypes(include=['object','category']).columns
numeric_features = X.select_dtypes(exclude=['object','category']).columns

preprocessor = ColumnTransformer(
        transformers=[
            ('cat',OneHotEncoder(handle_unknown='ignore',sparse_output=False),categorical_features)
        ],
    remainder = 'passthrough'
)

In [None]:
# Combine preprocessing model and the knn model into a single pipeline
param_grid = {'tree__max_depth': [3,5,10,20]}

modelo_tree = Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('tree',DecisionTreeClassifier())
])

In [None]:
# Setup 10-fold stratified cross-validation
random_seed = 1
kf = StratifiedKFold(n_splits=10,shuffle=True,random_state=random_seed)

# Define score metric. It can be either 'accuracy' or 'roc_auc'

scoring = 'accuracy'

# Define grid

model_tree_grid = GridSearchCV(modelo_tree,param_grid,cv=kf,scoring=scoring)

#Estimate best model

model_tree_grid.fit(X,y)

In [None]:
resultados=pd.DataFrame(model_tree_grid.cv_results_)
resultados.loc[:,['param_tree__max_depth','mean_test_score','std_test_score']]

In [None]:
bank2 = pd.read_csv("bank2.csv")

In [None]:
newy = np.where(bank2['y']=='yes',1,0)
newX = bank2.drop(bank2.columns[[0,9]],axis=1)

In [None]:
y_pred = model_tree_grid.predict(newX)
probs_pred = model_tree_grid.predict_proba(newX)[:,1]


In [None]:
print("Accuracy:",accuracy_score(newy,y_pred))
print("Confusion Matrix:\n",confusion_matrix(newy,y_pred))
print("AUC:",roc_auc_score(newy,probs_pred))