In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

def find_best_model_and_estimators(X_train, y_train):
    model_params = {
        'random_forest': {
            'model': RandomForestRegressor(), 
            'params': {
                'randomforestregressor__n_estimators': [50, 100, 200],
                'randomforestregressor__max_depth': [None, 10, 20, 30, 40],
                'randomforestregressor__min_samples_split': [2, 5, 10],
                'randomforestregressor__min_samples_leaf': [1, 2, 4],
                'randomforestregressor__criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],  # Fixed criterion
                'randomforestregressor__bootstrap': [True, False],
            }
        }
    }

    scores = []
    best_estimators = {}

    cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)

    for algo, mp in model_params.items():
        pipe = make_pipeline(StandardScaler(), mp['model'])
        clf = GridSearchCV(pipe, mp['params'], cv=cv, return_train_score=False, n_jobs=-1)
        clf.fit(X_train, y_train)
        
        # Make predictions on the training set to compute RMSE
        y_pred = clf.predict(X_train)
        rmse = np.sqrt(mean_squared_error(y_train, y_pred))
        
        scores.append({
            'model': algo,
            'RMSE': rmse,
            'best_params': clf.best_params_
        })
        
        best_estimators[algo] = clf.best_estimator_

    return pd.DataFrame(scores, columns=['model', 'RMSE', 'best_params']), best_estimators
