In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
# Train a basic model
basic_rf = RandomForestClassifier(random_state=42)
basic_rf.fit(X_train, y_train)

# Evaluate basic model
y_pred_basic = basic_rf.predict(X_test)
basic_accuracy = accuracy_score(y_test, y_pred_basic)
print(f"Accuracy without tuning: {basic_accuracy:.4f}")


Accuracy without tuning: 1.0000


In [3]:
# Define the hyperparameters grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}


In [4]:
# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=5, n_jobs=-1, verbose=2)

# Fit GridSearchCV
grid_search.fit(X_train, y_train)
print(f"Best parameters from GridSearchCV: {grid_search.best_params_}")


Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Best parameters from GridSearchCV: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 200}


In [5]:
# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=42),
                                   param_distributions=param_grid,
                                   n_iter=20, cv=5, n_jobs=-1, verbose=2, random_state=42)

# Fit RandomizedSearchCV
random_search.fit(X_train, y_train)
print(f"Best parameters from RandomizedSearchCV: {random_search.best_params_}")


Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters from RandomizedSearchCV: {'n_estimators': 50, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_depth': 30, 'bootstrap': False}


In [15]:
best_model = grid_search.best_estimator_  


y_pred_tuned = best_model.predict(X_test)
tuned_accuracy = accuracy_score(y_test, y_pred_tuned)
print(f"Accuracy with tuning: {tuned_accuracy:.4f}")


Accuracy with tuning: 1.0000


In [21]:
import pandas as pd


comparison_data = {
    "Model": ["RandomForestClassifier"],
    "Without Tuning": [1.0000],
    "With Tuning" :[1.0000]
}

# Convert data to a DataFrame
comparison_df = pd.DataFrame(comparison_data)
comparison_df


Unnamed: 0,Model,Without Tuning,With Tuning
0,RandomForestClassifier,1.0,1.0
