In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import pandas as pd


def train_random_forest(X_train, y_train, n_estimators=100, max_depth=None, 
                        min_samples_split=2, min_samples_leaf=1, max_features='log2'):
    """
    Train a Random Forest model with more configurable parameters.

    Parameters:
    - X_train: Training features.
    - y_train: Training target values.
    - n_estimators: Number of trees in the forest.
    - max_depth: Maximum depth of the trees.
    - min_samples_split: Minimum number of samples required to split an internal node.
    - min_samples_leaf: Minimum number of samples required to be at a leaf node.
    - max_features: Number of features to consider when looking for the best split.

    Returns:
    - Trained Random Forest model.
    """
    rf = RandomForestRegressor(
        n_estimators=n_estimators, 
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42
    )
    rf.fit(X_train, y_train)

    print(f"Model trained with parameters:")
    print(f"  - n_estimators: {n_estimators}")
    print(f"  - max_depth: {max_depth}")
    print(f"  - min_samples_split: {min_samples_split}")
    print(f"  - min_samples_leaf: {min_samples_leaf}")
    print(f"  - max_features: {max_features}")
    
    return rf

def optimize_hyperparameters(X_train, y_train, param_grid, cv=5):
    """
    Optimize hyperparameters for Random Forest using GridSearchCV.

    Parameters:
    - X_train: Training features.
    - y_train: Training target values.
    - param_grid: Dictionary of hyperparameters to search.
    - cv: Number of cross-validation folds.

    Returns:
    - Best model and best parameters.
    """

    rf = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_, grid_search.best_params_

def save_model(model, filepath):
    """
    Save the trained model to a file.

    Parameters:
    - model: Trained model.
    - filepath: Path to save the model.
    """
    joblib.dump(model, filepath)

def load_model(filepath):
    """
    Load a trained model from a file.

    Parameters:
    - filepath: Path to the saved model.

    Returns:
    - Loaded model.
    """
    return joblib.load(filepath)