In [14]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

In [15]:
# Load dataset
cancer = datasets.load_breast_cancer()

In [16]:
# Split into features and labels
X = cancer.data
y = cancer.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
# Define the tune_hyperparameters
def tune_hyperparameters(name, model, param_grid):
    # Define a k-fold cross-validation object
    cv = KFold(n_splits=5, shuffle=True, random_state=42)

    # Choose the search method based on the name
    if name == 'Grid Search':
        # Use GridSearchCV to perform grid search
        search = GridSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=1)
    elif name == 'Random Search':
        # Use RandomizedSearchCV to perform random search
        search = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=1, n_iter=10)
    else:
        # Raise an error if the name is not valid
        raise ValueError('Invalid name. Choose either Grid Search or Random Search.')

    # Fit the search object on the training data
    search.fit(X_train, y_train)

    # Return the best estimator and the search object
    return search.best_estimator_, search

In [18]:
# Define a Decision Tree classifier with its hyperparameters
clsf = DecisionTreeClassifier()
param_grid = {'criterion': ['gini'], 'max_depth': np.arange(1, 11)}

# Perform k-fold cross-validation and grid search or random search
best_estimator, search = tune_hyperparameters('Grid Search', clsf, param_grid)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [19]:
# Evaluate the performance of Decision Tree Classifier on the test set
y_pred = best_estimator.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
sensitivity = precision_score(y_test, y_pred)
specificity = recall_score(y_test, y_pred)
print('Accuracy:', accuracy)
print('Sensitivity:', sensitivity)
print('Specificity:', specificity)

Accuracy: 0.9473684210526315
Sensitivity: 0.9452054794520548
Specificity: 0.971830985915493


In [20]:
# Print the best score and the best parameters
print('Best score:', search.best_score_)
print('Best parameters:', search.best_params_)

Best score: 0.9142857142857144
Best parameters: {'criterion': 'gini', 'max_depth': 3}
