# Step 1: Import Libraries
### Linnerud Dataset
- Description: A multi-output regression dataset that includes exercise and physiological variables collected from individuals.
- Usage: Useful for exploring relationships between physical exercise and physiological responses.

In [7]:
# Import necessary libraries for regression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Load and Prepare the Dataset


In [11]:
from sklearn.datasets import fetch_california_housing

california_housing = fetch_california_housing()
X_california = california_housing.data  
y_california = california_housing.target

In [13]:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_california, y_california, test_size=0.2, random_state=42)

# Step 3: Initialize and Train the Random Forest Regressor


In [14]:
rf_regressor = RandomForestRegressor(
    n_estimators=100,  # Number of trees in the forest
    max_depth=5,       # Maximum depth of each tree
    random_state=42    # For reproducibility
)

rf_regressor.fit(X_train_reg, y_train_reg)

# Step 4: Make Predictions and Evaluate the Model


In [15]:
y_pred_reg = rf_regressor.predict(X_test_reg)

mse = mean_squared_error(y_test_reg, y_pred_reg)
r2 = r2_score(y_test_reg, y_pred_reg)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")

Mean Squared Error: 0.46
R-squared Score: 0.65


## Common Hyperparameters
**n_estimators:** Number of trees in the forest.</br>
**max_depth:** Maximum depth of the tree.</br>
**min_samples_split:** Minimum number of samples required to split an internal node.</br>
**min_samples_leaf:** Minimum number of samples required to be at a leaf node.</br>
**max_features:** The number of features to consider when looking for the best split.</br>

### GridSearchCV

In [18]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],                 # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],                # Maximum depth of each tree
    'min_samples_split': [2, 5, 10],                # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],                  # Minimum samples required at a leaf node
    'max_features': ['auto', 'sqrt', 'log2']        # Number of features to consider for best split
}

In [19]:
grid_search_regressor = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5)

In [21]:
grid_search_regressor.fit(X_train_reg, y_train_reg)

KeyboardInterrupt: 

In [None]:
print("Best Parameters:", grid_search_regressor.best_params_)
print("Best Score:", grid_search_regressor.best_score_)

### RandomizedSearchCV

In [22]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

param_dist = {
    'n_estimators': randint(50, 200),                # Randomly choose number of trees between 50 and 200
    'max_depth': [None] + list(range(10, 31)),       # None or maximum depth between 10 to 30
    'min_samples_split': randint(2, 11),             # Randomly choose minimum samples to split between 2 and 10
    'min_samples_leaf': randint(1, 5),               # Randomly choose minimum samples at leaf node between 1 and 4
    'max_features': ['auto', 'sqrt', 'log2']         # Number of features to consider for best split
}

random_search_regressor = RandomizedSearchCV(
    estimator=rf_regressor, 
    param_distributions=param_dist,
    n_iter=100, cv=5
)

random_search_regressor.fit(X_train_reg, y_train_reg)

print("Best Parameters:", random_search_regressor.best_params_)
print("Best Score:", random_search_regressor.best_score_)

165 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
165 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 1466, in wrapper
    estimator._validate_params()
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "c:\Users\csawa\desktop\AIML\Flask\venv\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, i

Best Parameters: {'max_depth': 28, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 65}
Best Score: 0.8156272729060927
