In [7]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score

In [8]:
X, y = make_moons(n_samples=10000, noise=0.4)
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [9]:
param_grid = {
    'max_leaf_nodes': list(range(2,100)),
    'min_samples_split': [2,3,4],
    'max_features': ["auto", "sqrt", "log2", "none"],
    'criterion': ['gini', 'entropy']
}
dt_clf = DecisionTreeClassifier()
search = GridSearchCV(dt_clf, param_grid=param_grid, cv=5)

In [14]:
search.fit(X_train, y_train)
best = search.best_estimator_
print(search.best_params_)

{'criterion': 'gini', 'max_depth': 26, 'max_features': 'sqrt', 'max_leaf_nodes': 26}


In [15]:
y_pred = best.predict(X_test)
accuracy = accuracy_score(y_pred, y_test)
f1 = f1_score(y_pred, y_test)
print(f'Accuracy: {accuracy}, f1: {f1}')

Accuracy: 0.8372, f1: 0.8328542094455852
