In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error

# Sample dataset
data = pd.DataFrame({
    "CapacityUtilization": [0.8, 0.6, 0.9, 0.4],
    "DayOfWeek": [1, 3, 4, 5],
    "HistoricalWaitTime": [30, 45, 20, 60],
    "WaitTime": [35, 50, 25, 55]
})

# Define features and target
features = data[["CapacityUtilization", "DayOfWeek", "HistoricalWaitTime"]]
target = data["WaitTime"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Define the Random Forest model and hyperparameter grid
param_grid = {
    "n_estimators": [50, 100, 200],  # Number of trees in the forest
    "max_depth": [None, 10, 20],  # Maximum depth of the tree
    "min_samples_split": [2, 5, 10]  # Minimum number of samples required to split an internal node
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=RandomForestRegressor(random_state=42),  # Base estimator
    param_grid=param_grid,  # Parameter grid to search
    cv=2,  # 5-fold cross-validation
    scoring="neg_mean_squared_error",  # Scoring metric to evaluate performance
    verbose=1,  # Print progress during grid search
    n_jobs=-1  # Use all available CPU cores for faster processing
)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Extract best parameters and score
best_params = grid_search.best_params_
best_score = -grid_search.best_score_  # Convert negative MSE to positive for interpretability

# Evaluate on the test set
best_model = grid_search.best_estimator_
test_predictions = best_model.predict(X_test)
test_mse = mean_squared_error(y_test, test_predictions)

# Output results
print("\n--- Hyperparameter Tuning Results ---")
print("Best Parameters:", best_params)
print("Best Cross-Validation MSE:", best_score)
print("Test Set MSE:", test_mse)


Fitting 2 folds for each of 27 candidates, totalling 54 fits

--- Hyperparameter Tuning Results ---
Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best Cross-Validation MSE: 397.9200000000001
Test Set MSE: 153.75999999999996
