## Hyperparameter Tuning

### Importing Libraries

In [1]:
from sklearn.datasets import make_classification
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import numpy as np

In [2]:
X, y = make_classification(n_samples= 1000,
                           n_features = 5,
                           n_classes = 2)

In [3]:
type(X)

numpy.ndarray

In [4]:
type(y)

numpy.ndarray

In [5]:
print(X.shape)

(1000, 5)


In [6]:
print(y.shape)

(1000,)


In [7]:
import pandas as pd
df1 = pd.DataFrame(data = X, columns =['col1','col2','col3','col4','col5'])
df2 = pd.DataFrame(data = y, columns = ['Target Variable'])

In [8]:
df = pd.concat([df1, df2], axis = 1)

In [9]:
df.head()

Unnamed: 0,col1,col2,col3,col4,col5,Target Variable
0,0.355713,0.74146,1.092079,-0.851777,0.990631,1
1,-1.643005,-1.026886,-0.075931,-0.369161,1.100314,1
2,-0.348085,0.915614,0.671615,-0.321957,0.058245,0
3,0.680569,-1.078433,0.068213,-0.547212,1.410177,1
4,0.68339,-1.065085,0.043363,-0.514557,1.351409,1


In [10]:
X = df.drop('Target Variable', axis = 1)
y = df[['Target Variable']]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Applying Decision Tree

In [12]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Accuracy score of the Decision Tree is", accuracy_score(y_test, y_pred_dt)*100)

Accuracy score of the Decision Tree is 93.0


## Applying  Hyperparameter Tuning

In [13]:
param_grid = {
    'max_depth': [3,5,10],
    'min_samples_split': [2,5,10],
    'criterion': ['gini', 'entropy']
}

In [14]:
grid_search = GridSearchCV(dt,
             param_grid = param_grid,
             scoring='accuracy',
             cv=5)
grid_search.fit(X_train, y_train)

In [15]:
grid_search.best_estimator_

In [16]:
grid_search.best_score_

0.975

In [17]:
grid_search.best_params_

{'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 10}

In [18]:
print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

Best Parameters: {'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 10}


In [19]:
y_pred = best_model.predict(X_test)

In [20]:
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# 2. Initialize the model
clf = DecisionTreeClassifier(random_state=42)

# 3. Perform 5-fold cross-validation
scores = cross_val_score(clf, X, y, cv=5, scoring='accuracy')

# 4. Output results
print("Cross-validation scores:", scores)
print("Mean accuracy:", np.mean(scores))
print("Standard deviation:", np.std(scores))

Cross-validation scores: [0.835 0.88  0.815 0.85  0.895]
Mean accuracy: 0.8550000000000001
Standard deviation: 0.02915475947422653
