In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import pandas as pd
import numpy as np

class RandomForestModel:
    """
    A wrapper class for Random Forest Regression model with enhanced functionality
    """
    def __init__(self, 
                 n_estimators=100, 
                 max_depth=None, 
                 min_samples_split=2, 
                 min_samples_leaf=1, 
                 max_features='log2'):
        """
        Initialize the Random Forest model with configurable parameters

        Parameters:
        - n_estimators: Number of trees in the forest
        - max_depth: Maximum depth of the trees
        - min_samples_split: Minimum samples to split an internal node
        - min_samples_leaf: Minimum samples at a leaf node
        - max_features: Number of features to consider for best split
        """
        self.model = RandomForestRegressor(
            n_estimators=n_estimators, 
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42
        )
        self.feature_importances_ = None
        self.best_params_ = None

    def fit(self, X_train, y_train):
        """
        Train the Random Forest model

        Parameters:
        - X_train: Training features
        - y_train: Training target values
        """
        self.model.fit(X_train, y_train)
        self.feature_importances_ = self.model.feature_importances_
        return self

    def predict(self, X_test):
        """
        Make predictions using the trained model

        Parameters:
        - X_test: Test features

        Returns:
        - Predictions
        """
        return self.model.predict(X_test)

    def optimize_hyperparameters(self, X_train, y_train, param_grid=None, cv=5):
        """
        Optimize hyperparameters using GridSearchCV

        Parameters:
        - X_train: Training features
        - y_train: Training target values
        - param_grid: Dictionary of hyperparameters to search
        - cv: Number of cross-validation folds

        Returns:
        - Optimized model
        """
        if param_grid is None:
            param_grid = {
                'n_estimators': [100, 200, 300],
                'max_depth': [ 10, 20, 30],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4],
                'max_features': ['log2', 'sqrt']
            }

        grid_search = GridSearchCV(
            estimator=RandomForestRegressor(random_state=42), 
            param_grid=param_grid, 
            cv=cv, 
            scoring='neg_mean_squared_error', 
            n_jobs=-1
        )
        grid_search.fit(X_train, y_train)
        
        # Update the model with best parameters
        self.model = grid_search.best_estimator_
        self.best_params_ = grid_search.best_params_
        
        return self

    def evaluate(self, X_test, y_test):
        """
        Evaluate model performance

        Parameters:
        - X_test: Test features
        - y_test: True target values

        Returns:
        - Dictionary of performance metrics
        """
        y_pred = self.predict(X_test)
        return {
            'mse': mean_squared_error(y_test, y_pred),
            'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
            'mae': mean_absolute_error(y_test, y_pred),
            'r2': r2_score(y_test, y_pred)
        }

    def save(self, filepath):
        """
        Save the trained model to a file

        Parameters:
        - filepath: Path to save the model
        """
        joblib.dump(self, filepath)

    @classmethod
    def load(cls, filepath):
        """
        Load a trained model from a file

        Parameters:
        - filepath: Path to the saved model

        Returns:
        - Loaded model
        """
        return joblib.load(filepath)

# Utility functions
def train_random_forest(X_train, y_train, **kwargs):
    """
    Convenience function to train a Random Forest model

    Parameters:
    - X_train: Training features
    - y_train: Training target values
    - **kwargs: Additional parameters for RandomForestModel

    Returns:
    - Trained RandomForestModel
    """
    model = RandomForestModel(**kwargs)
    return model.fit(X_train, y_train)