In [44]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import numpy as np

In [45]:
X, y = make_classification(
    n_samples=1000,       # Total number of rows
    n_features=10,        # Total feature columns
    n_informative=5,      # Features that actually predict the target
    n_redundant=2,        # Linear combinations of informative features
    n_classes=3,          # <--- MULTICLASS (3 distinct labels)
    random_state=42
)

df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(10)])
df['target'] = y

print(df['target'].value_counts())
print(df.head())

target
2    335
1    334
0    331
Name: count, dtype: int64
   feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  \
0  -2.568916  -0.257409  -2.679357   3.864818   2.564998  -0.737556   
1   0.622861   0.534544   0.018283  -0.283382   1.907637  -0.341310   
2  -0.171251  -0.496278   1.613347   2.488069  -1.677966   0.303604   
3  -0.871423  -0.333946   3.368446   0.972153  -0.134388   0.212820   
4   2.346402  -0.699965  -0.203251  -0.256745  -1.974251   0.619663   

   feature_6  feature_7  feature_8  feature_9  target  
0  -3.330985  -1.213370  -1.473105  -0.846386       1  
1   1.206240  -1.093532  -0.469791  -0.188022       1  
2  -2.104579   0.714531   3.475999   0.623386       0  
3   0.700899   0.716046  -1.300910   3.439831       1  
4  -1.247950  -1.662115   3.921452  -0.759491       0  


In [46]:
df.target.value_counts()

target
2    335
1    334
0    331
Name: count, dtype: int64

In [47]:
x_train,x_test,y_train,y_test = train_test_split(X,y)

In [48]:
clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [49]:
y_pred = clf.predict(x_test)

In [50]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.81      0.92      0.86        89
           1       0.77      0.73      0.75        79
           2       0.78      0.71      0.74        82

    accuracy                           0.79       250
   macro avg       0.79      0.79      0.79       250
weighted avg       0.79      0.79      0.79       250



In [51]:
# training decision tree with best hyper parameter

In [52]:
param_grid = {
    'criterion': ['gini', 'entropy','log_loss'],
    'splitter' : ['best','random'],
    'max_depth': [i for i in range(5)],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'ccp_alpha': np.linspace(0,0.1,10)
}

In [53]:
grid_search = GridSearchCV(
    estimator=DecisionTreeClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',  # Metric to optimize
    n_jobs=-1,           # Use all CPU cores
    verbose=1            # Print progress
)

In [54]:
grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 2700 candidates, totalling 13500 fits


2700 fits failed out of a total of 13500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2700 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\DELL\miniconda3\envs\campusx\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\DELL\miniconda3\envs\campusx\Lib\site-packages\sklearn\base.py", line 1358, in wrapper
    estimator._validate_params()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "C:\Users\DELL\miniconda3\envs\campusx\Lib\site-packages\sklearn\base.py", line 471, in _validate_params
    validate_parameter_constraints(
    ~~~~~~~~~~~~~~~~~~~~~~~~

0,1,2
,estimator,DecisionTreeC...ndom_state=42)
,param_grid,"{'ccp_alpha': array([0. ..., 0.1 ]), 'criterion': ['gini', 'entropy', ...], 'max_depth': [0, 1, ...], 'min_samples_leaf': [1, 2, ...], ...}"
,scoring,'accuracy'
,n_jobs,-1
,refit,True
,cv,5
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,4
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [55]:
grid_search.best_estimator_

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,4
,min_samples_split,2
,min_samples_leaf,4
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [None]:
grid_search.best_score_