In [5]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Create synthetic dataset
X, y = make_classification(
    n_features=10,
    n_samples=1000,
    n_informative=8,
    n_redundant=2,
    n_repeated=0,
    n_classes=2,
    random_state=42
)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

### Grid Search CV

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

clf = GridSearchCV(
    DecisionTreeClassifier(),
    {
        "criterion": ["gini", "entropy"],
        "max_depth": [5, 10, 15, 20]
    },
    cv=5,
    return_train_score=False
)

clf.fit(X, y)
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.01698,0.002847,0.002678,0.001525,gini,5,"{'criterion': 'gini', 'max_depth': 5}",0.78,0.8,0.75,0.79,0.77,0.778,0.017205,8
1,0.01935,0.004534,0.001889,0.000618,gini,10,"{'criterion': 'gini', 'max_depth': 10}",0.79,0.715,0.8,0.79,0.81,0.781,0.033823,6
2,0.022458,0.003017,0.001989,0.000431,gini,15,"{'criterion': 'gini', 'max_depth': 15}",0.8,0.725,0.82,0.805,0.81,0.792,0.034147,5
3,0.015149,0.001432,0.001364,0.000334,gini,20,"{'criterion': 'gini', 'max_depth': 20}",0.8,0.73,0.805,0.815,0.815,0.793,0.032031,4
4,0.01284,0.001962,0.00109,0.000219,entropy,5,"{'criterion': 'entropy', 'max_depth': 5}",0.765,0.78,0.76,0.815,0.78,0.78,0.019235,7
5,0.01662,0.004899,0.001022,0.000405,entropy,10,"{'criterion': 'entropy', 'max_depth': 10}",0.765,0.795,0.835,0.775,0.8,0.794,0.024166,3
6,0.025941,0.008311,0.00156,0.000711,entropy,15,"{'criterion': 'entropy', 'max_depth': 15}",0.77,0.8,0.84,0.775,0.845,0.806,0.031528,1
7,0.023863,0.002447,0.001468,0.000329,entropy,20,"{'criterion': 'entropy', 'max_depth': 20}",0.76,0.8,0.825,0.79,0.845,0.804,0.029223,2


### Randomized Search CV

In [12]:
from sklearn.model_selection import RandomizedSearchCV

clf = RandomizedSearchCV(
    DecisionTreeClassifier(),
    {
        "criterion": ["gini", "entropy"],
        "max_depth": [5, 10, 15, 20]
    },
    cv=5,
    return_train_score=False,
    n_iter=3
)

clf.fit(X, y)
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_criterion,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.018111,0.004324,0.001992,0.000892,20,gini,"{'max_depth': 20, 'criterion': 'gini'}",0.765,0.73,0.835,0.8,0.83,0.792,0.039825,2
1,0.013813,0.001058,0.001154,3.5e-05,15,gini,"{'max_depth': 15, 'criterion': 'gini'}",0.795,0.705,0.835,0.82,0.815,0.794,0.046303,1
2,0.012992,0.000835,0.00113,1e-05,10,gini,"{'max_depth': 10, 'criterion': 'gini'}",0.805,0.73,0.8,0.795,0.8,0.786,0.028178,3
