# **Implement Decision Tree using Sklearn**

#### **Hyperparameters**

- `max_depth`
- `min_sample_split`
- `min_sample_leaf`
- `max_features`
- `criterion`

In [10]:
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold, train_test_split
from sklearn.metrics import classification_report

In [63]:
X, Y = make_classification(n_samples=10000, n_features=10)

In [64]:
x_train, x_test, y_train, y_test = train_test_split(
    X, Y, test_size=.2, random_state=92873, shuffle=True
    )

In [72]:
params = {
    "max_depth" : [2, 5, 10, 20],
    "min_samples_split" : [2, 5, 10, 20],
    "min_samples_leaf" : [2, 6, 10, 20],
    "max_features" : ["sqrt", "log2"],
    "criterion" : ["gini", "entropy"],
    "splitter" : ["best", "random"]
}

In [73]:
cv = KFold(n_splits=5, shuffle=True, random_state=8223)

In [74]:
model = RandomizedSearchCV(
    cv=cv, 
    estimator=DecisionTreeClassifier(),
    param_distributions=params,
    scoring="accuracy",
    error_score="raise"
)

In [75]:
model.fit(x_train, y_train)

In [76]:
model.best_params_   # these are the best hyperparameters for this data

{'splitter': 'best',
 'min_samples_split': 10,
 'min_samples_leaf': 20,
 'max_features': 'sqrt',
 'max_depth': 10,
 'criterion': 'entropy'}

In [77]:
best_model = model.best_estimator_

In [78]:
y_pred = best_model.predict(x_test)

In [79]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.93      0.93      0.93       980
           1       0.93      0.93      0.93      1020

    accuracy                           0.93      2000
   macro avg       0.93      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000

