In [136]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_validate, train_test_split,GridSearchCV
import numpy as np
from sklearn.metrics import recall_score, precision_score

In [137]:
X,y = make_moons(n_samples=10_000, noise=0.4, random_state=25)

In [138]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2718)


In [139]:
base_tree = DecisionTreeClassifier()

In [140]:
base_tree.fit(X_train,y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [141]:
cv = cross_validate(base_tree, X_train, y_train, 
                    cv=10, scoring=["recall", "precision"],
                   n_jobs=-1)

In [142]:
cv["test_recall"].mean()

0.8014962593516209

In [143]:
cv["test_precision"].mean()

0.8045624165204698

In [144]:
base_tree = DecisionTreeClassifier()
dt_grid = {"criterion":["gini","entropy"],
              "min_samples_split": np.arange(100,204,4),
              "min_samples_leaf":np.arange(10,54,4)
}

In [145]:
grid_cv = GridSearchCV(base_tree,param_grid=dt_grid,cv=5,
                       scoring="roc_auc",n_jobs=-1)

In [146]:
grid_search=grid_cv.fit(X_train,y_train)


In [147]:
#Hiperparámetros óptimos
grid_search.best_params_

{'criterion': 'entropy', 'min_samples_leaf': 30, 'min_samples_split': 196}

In [148]:
#Mejor score auc sobre el conjunto de prueba
grid_search.best_score_

0.9284284589278682

In [149]:
## Validación sobre conjunto de prueba

In [150]:
y_pred = grid_search.predict(X_test)

In [151]:
precision_score(y_test,y_pred)

0.8747323340471093

In [152]:
recall_score(y_test,y_pred)

0.8252525252525252

In [153]:
print(f"""Precision del modelo base : {cv["test_precision"].mean():0.3}
Recall del modelo base : {cv["test_recall"].mean():0.3}
Precision del modelo mejorado : {precision_score(y_test,y_pred):0.3}
Recall del modelo mejorado : {recall_score(y_test,y_pred):0.3}""")

Precision del modelo base : 0.805
Recall del modelo base : 0.801
Precision del modelo mejorado : 0.875
Recall del modelo mejorado : 0.825
