In [1]:
# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Generate a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Random Forest model
rf = RandomForestClassifier(random_state=42)

# Define the hyperparameter grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],          # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],        # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],        # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],          # Minimum samples required at each leaf node
    'criterion': ['gini', 'entropy']        # Splitting criterion
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, verbose=2, n_jobs=-1)

# Perform Grid Search
grid_search.fit(X_train, y_train)

# Best parameters from Grid Search
print("Best Parameters from GridSearchCV:")
print(grid_search.best_params_)

# Evaluate the model with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Define the hyperparameter grid for RandomizedSearchCV
param_dist = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10, 15],
    'min_samples_leaf': [1, 2, 4, 6],
    'criterion': ['gini', 'entropy']
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=20, cv=3, verbose=2, n_jobs=-1, random_state=42)

# Perform Randomized Search
random_search.fit(X_train, y_train)

# Best parameters from Randomized Search
print("\nBest Parameters from RandomizedSearchCV:")
print(random_search.best_params_)

# Evaluate the model with best parameters from Randomized Search
best_model_random = random_search.best_estimator_
y_pred_random = best_model_random.predict(X_test)
print("\nClassification Report (Randomized Search):")
print(classification_report(y_test, y_pred_random))


Fitting 3 folds for each of 216 candidates, totalling 648 fits
Best Parameters from GridSearchCV:
{'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.87      0.89       160
           1       0.86      0.90      0.88       140

    accuracy                           0.88       300
   macro avg       0.88      0.88      0.88       300
weighted avg       0.88      0.88      0.88       300

Fitting 3 folds for each of 20 candidates, totalling 60 fits

Best Parameters from RandomizedSearchCV:
{'n_estimators': 300, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 20, 'criterion': 'gini'}

Classification Report (Randomized Search):
              precision    recall  f1-score   support

           0       0.92      0.88      0.90       160
           1       0.87      0.91      0.89       140

    accuracy              