In [5]:
# Import necessary libraries for preprocessing and modeling
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, FunctionTransformer
from sklearn.ensemble import RandomForestRegressor

In [9]:
# Load the dataset
Ames = pd.read_csv('Ames_HousePrice.csv', index_col = 0)
Ames.head(10)

Unnamed: 0,PID,GrLivArea,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
1,909176150,856,126000,30,RL,,7890,Pave,,Reg,...,166,0,,,,0,3,2010,WD,Normal
2,905476230,1049,139500,120,RL,42.0,4235,Pave,,Reg,...,0,0,,,,0,2,2009,WD,Normal
3,911128020,1001,124900,30,C (all),60.0,6060,Pave,,Reg,...,0,0,,,,0,11,2007,WD,Normal
4,535377150,1039,114000,70,RL,80.0,8146,Pave,,Reg,...,111,0,,,,0,5,2009,WD,Normal
5,534177230,1665,227000,60,RL,70.0,8400,Pave,,Reg,...,0,0,,,,0,11,2009,WD,Normal
6,908128060,1922,198500,85,RL,64.0,7301,Pave,,Reg,...,0,0,,,,0,7,2009,ConLD,Normal
7,902135020,936,93000,20,RM,60.0,6000,Pave,Pave,Reg,...,0,0,,,,0,2,2009,WD,Normal
8,528228540,1246,187687,20,RL,53.0,3710,Pave,,Reg,...,0,0,,,,0,3,2008,New,Partial
9,923426010,889,137500,20,RL,74.0,12395,Pave,,Reg,...,0,0,,,,0,4,2008,WD,Normal
10,908186050,1072,140000,180,RM,35.0,3675,Pave,,Reg,...,0,0,,,,0,5,2007,WD,Normal


### I. Setup
Load the dataset- remove columns with missing values. Identify numeric and categorical features and target.

In [10]:
# Adjust data types for categorical variables
for col in ['MSSubClass', 'YrSold', 'MoSold']:
    Ames[col] = Ames[col].astype('object')
# Exclude 'PID' and 'SalePrice' from features and specifically handle the 'Electrical' column
numeric_features = Ames.select_dtypes(include=['int64', 'float64']).drop(columns=['PID', 'SalePrice']).columns
categorical_features = Ames.select_dtypes(include=['object']).columns.difference(['Electrical'])
electrical_feature = ['Electrical']

### II. Set up Pipeline:
For pre-processing and regression with 5-fold cross-validation. Pass through numerical data, ordinally encode ordinal categorical variables, one-hot encode all other categorical variables and drop none. Instantiate RandomForest model.

In [11]:
# Manually specify the categories for ordinal encoding according to the data dictionary
ordinal_order = {
    'Electrical': ['Mix', 'FuseP', 'FuseF', 'FuseA', 'SBrkr'],  # Electrical system
    'LotShape': ['IR3', 'IR2', 'IR1', 'Reg'],  # General shape of property
    'Utilities': ['ELO', 'NoSeWa', 'NoSewr', 'AllPub'],  # Type of utilities available
    'LandSlope': ['Sev', 'Mod', 'Gtl'],  # Slope of property
    'ExterQual': ['Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Evaluates the quality of the material on the exterior
    'ExterCond': ['Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Evaluates the present condition of the material on the exterior
    'BsmtQual': ['None', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Height of the basement
    'BsmtCond': ['None', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],  # General condition of the basement
    'BsmtExposure': ['None', 'No', 'Mn', 'Av', 'Gd'],  # Walkout or garden level basement walls
    'BsmtFinType1': ['None', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ'],  # Quality of basement finished area
    'BsmtFinType2': ['None', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ'],  # Quality of second basement finished area
    'HeatingQC': ['Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Heating quality and condition
    'KitchenQual': ['Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Kitchen quality
    'Functional': ['Sal', 'Sev', 'Maj2', 'Maj1', 'Mod', 'Min2', 'Min1', 'Typ'],  # Home functionality
    'FireplaceQu': ['None', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Fireplace quality
    'GarageFinish': ['None', 'Unf', 'RFn', 'Fin'],  # Interior finish of the garage
    'GarageQual': ['None', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Garage quality
    'GarageCond': ['None', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],  # Garage condition
    'PavedDrive': ['N', 'P', 'Y'],  # Paved driveway
    'PoolQC': ['None', 'Fa', 'TA', 'Gd', 'Ex'],  # Pool quality
    'Fence': ['None', 'MnWw', 'GdWo', 'MnPrv', 'GdPrv']  # Fence quality
}

In [12]:
# Extract list of ALL ordinal features from dictionary
ordinal_features = list(ordinal_order.keys())
# List of ordinal features except Electrical
ordinal_except_electrical = [feature for feature in ordinal_features if feature != 'Electrical']
# Define transformations for various feature types
electrical_transformer = Pipeline(steps=[
    ('impute_electrical', SimpleImputer(strategy='most_frequent')),
    ('ordinal_electrical', OrdinalEncoder(categories=[ordinal_order['Electrical']]))
])
numeric_transformer = Pipeline(steps=[
    ('impute_mean', SimpleImputer(strategy='mean'))
])
# Updated categorical imputer using SimpleImputer
categorical_imputer = SimpleImputer(strategy='constant', fill_value='None')
ordinal_transformer = Pipeline([
    ('impute_ordinal', categorical_imputer),
    ('ordinal', OrdinalEncoder(categories=[ordinal_order[feature] for feature in ordinal_features if feature in ordinal_except_electrical]))
])
nominal_features = [feature for feature in categorical_features if feature not in ordinal_features]
categorical_transformer = Pipeline([
    ('impute_nominal', categorical_imputer),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [13]:
# Combined preprocessor for numeric, ordinal, nominal, and specific electrical data
preprocessor = ColumnTransformer(
    transformers=[
        ('electrical', electrical_transformer, ['Electrical']),
        ('num', numeric_transformer, numeric_features),
        ('ordinal', ordinal_transformer, ordinal_except_electrical),
        ('nominal', categorical_transformer, nominal_features)
])

In [14]:
# Define model pipelines including Gradient Boosting Regressor
models = {
    'Random Forest (Default of 100 Trees)': RandomForestRegressor(random_state=42),
    'Random Forest (200 Trees)': RandomForestRegressor(n_estimators=200, random_state=42),
}

In [15]:
# Evaluate models using cross-validation and print results
results = {}
for name, model in models.items():
    model_pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    scores = cross_val_score(model_pipeline, Ames.drop(columns='SalePrice'), Ames['SalePrice'], cv=5)
    results[name] = round(scores.mean(), 4)
    print(f"{name}: Mean CV R² = {results[name]}")

Random Forest (Default of 100 Trees): Mean CV R² = 0.8939
Random Forest (200 Trees): Mean CV R² = 0.8952


In [16]:
# Experiment with GridSearchCV
from sklearn.model_selection import GridSearchCV
# Parameter grid for GridSearchCV
param_grid = {
    'regressor__n_estimators': [100, 200, 300, 400, 500],
}
# Setup the GridSearchCV
grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='r2', verbose=1)
# Fit the GridSearchCV to the data
grid_search.fit(Ames.drop(columns='SalePrice'), Ames['SalePrice'])
# Best parameters and best score from Grid Search
print("Best parameters (Grid Search):", grid_search.best_params_)
print("Best score (Grid Search):", round(grid_search.best_score_, 4))

Fitting 5 folds for each of 5 candidates, totalling 25 fits
Best parameters (Grid Search): {'regressor__n_estimators': 400}
Best score (Grid Search): 0.8955


In [17]:
# Experiment with GridSearchCV
from sklearn.model_selection import GridSearchCV
# Parameter grid for GridSearchCV
param_grid = {
    'regressor__n_estimators': [100, 200, 300, 400, 500],
    'regressor__max_depth': [2, 3, 4, 5]
}
# Setup the GridSearchCV
grid_search = GridSearchCV(model_pipeline, param_grid, cv=5, scoring='r2', verbose=1)
# Fit the GridSearchCV to the data
grid_search.fit(Ames.drop(columns='SalePrice'), Ames['SalePrice'])
# Best parameters and best score from Grid Search
print("Best parameters (Grid Search):", grid_search.best_params_)
print("Best score (Grid Search):", round(grid_search.best_score_, 4))

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters (Grid Search): {'regressor__max_depth': 5, 'regressor__n_estimators': 400}
Best score (Grid Search): 0.8571


In [126]:
# Define the ordinal_categories dictionary
ordinal_categories = {
    'LotShape': ['IR3', 'IR2', 'IR1', 'Reg'],
    'LandSlope': ['Sev', 'Mod', 'Gtl'],
    ('ExterQual', 'ExterCond', 'HeatingQC', 'KitchenQual'): ['Po', 'Fa', 'TA', 'Gd', 'Ex'],
    ('BsmtQual', 'BsmtCond'): ['No Bsmt', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],
    'BsmtExposure': ['No Bsmt', 'No', 'Mn', 'Av', 'Gd'],
    ('BsmtFinType1', 'BsmtFinType2'): ['No Bsmt', 'Unf', 'LwQ', 'Rec', 'BLQ', 'ALQ', 'GLQ'],
    'Electrical': ['FuseP', 'FuseF', 'FuseA', 'SBrkr'],
    'FireplaceQu': ['No Fireplace', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],
    'GarageFinish': ['No Garage', 'Unf', 'RFn', 'Fin'],
    ('GarageQual', 'GarageCond'): ['No Garage', 'Po', 'Fa', 'TA', 'Gd', 'Ex'],
    'PavedDrive': ['N', 'P', 'Y'],
    'PoolQC': ['No Pool', 'Fa', 'TA', 'Gd', 'Ex'],
    'Fence': ['No Fence', 'MnWw', 'GdWo', 'MnPrv', 'GdPrv']
}


# Function to extract categories for each feature
def get_categories_dict(ordinal_categories):
    categories_dict = {}
    for key, value in ordinal_categories.items():
        if isinstance(key, tuple):
            for sub_key in key:
                categories_dict[sub_key] = value
        else:
            categories_dict[key] = value
    return categories_dict

# Extract categories for each feature
categories_dict = get_categories_dict(ordinal_categories)

# Separate feature names and their corresponding categories
feature_names = list(categories_dict.keys())
categories = [categories_dict[feature] for feature in feature_names]

# Define transformers for numerical and categorical features
numerical_features = X.select_dtypes(include=[np.number]).columns.tolist()
ordinal_categorical_features = feature_names

non_ordinal_categorical_features = [feature for feature in categorical_features if feature not in ordinal_categorical_features]

ordinal_encoder = OrdinalEncoder(categories=categories)

categorical_transformer = OneHotEncoder(drop=None, handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, non_ordinal_categorical_features),
        ('ord', ordinal_encoder, ordinal_categorical_features ),
        ('num', 'passthrough', numerical_features)  # Pass through numerical features unchanged
    ]
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=0))
])

### III. Run Pipeline: 
Report the untuned RandomForest R2- basic idea of how the model performs.

In [127]:
import time
start_time = time.time()

# Perform cross-validation and store results in a dictionary
cv_results = {}
scores = cross_val_score(pipeline, X, y)
cv_results = round(scores.mean(), 6)
# Output the mean cross-validation scores
print(cv_results)

#an untuned RandomForestRegressor has an R2 of .9024

print(f"{time.time() - start_time} seconds")

0.90241
27.17897605895996 seconds


### IV. Tuning: Hyperparameters
Find optimal parameters for RandomForest using GridSearchCV.

In [128]:
# Define the parameter grid
start_time = time.time()
param_grid = {
    'regressor__n_estimators': [500, 550, 600, 650, 700, 750, 800], # # of trees in forest
    'regressor__max_depth': [None, 10], #The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.
    'regressor__min_samples_split': [2, 10], #The minimum number of samples required to split an internal node
    'regressor__min_samples_leaf': [1], #default 1
    'regressor__max_features': ['sqrt', 'log2'] # 
}


#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 600} Best R2: 0.9019397731638554


param_grid_1 = {
    'regressor__max_depth': [None, 10], 
    'regressor__max_features': ['sqrt'],
    'regressor__min_samples_leaf': [1, 5],
    'regressor__min_samples_split': [2, 5],
    'regressor__n_estimators': [560, 570, 580, 590, 600, 610, 620, 630, 640]
}


#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 580} Best R² Score: 0.9019955018656252


param_grid_2 = {
    'regressor__max_depth': [None, 5], 
    'regressor__max_features': ['sqrt'],
    'regressor__min_samples_leaf': [1, 3],
    'regressor__min_samples_split': [2, 3],
    'regressor__n_estimators': [560, 570, 580, 590, 600]
}


#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 580} Best R² Score: 0.9019955018656252

param_grid_3 = {
    'regressor__max_depth': [None, 3], 
    'regressor__max_features': ['sqrt'],
    'regressor__min_samples_leaf': [1, 2],
    'regressor__min_samples_split': [2],
    'regressor__n_estimators': [575, 580, 585, 590]
}

#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 580} Best R² Score: 0.9019955018656252


param_grid_4 = {
    'regressor__max_depth': [None, 10, 50], 
    'regressor__max_features': ['sqrt'],
    'regressor__min_samples_leaf': [1, 2],
    'regressor__min_samples_split': [2, 3],
    'regressor__n_estimators': [576, 577, 578, 579, 580, 581, 582, 583, 584]
}


#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 582} Best R² Score: 0.9020390361534398


# Initialize the RandomForestRegressor
random_forest = RandomForestRegressor(random_state=0)

# Set up the pipeline (assuming preprocessor is already defined)
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', random_forest)
])

# Set up the GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, 
                           cv=5, n_jobs=-1, verbose=2, scoring='r2')

# Fit the GridSearchCV to the data
grid_search.fit(X, y)

# Print the best parameters and the best score
print("Best Parameters:", grid_search.best_params_)
print("Best R² Score:", grid_search.best_score_)
print(f"{time.time() - start_time} seconds")

Fitting 5 folds for each of 56 candidates, totalling 280 fits
Best Parameters: {'regressor__max_depth': None, 'regressor__max_features': 'sqrt', 'regressor__min_samples_leaf': 1, 'regressor__min_samples_split': 2, 'regressor__n_estimators': 600}
Best R² Score: 0.9019397731638554
209.81568217277527 seconds
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=500; total time=   4.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=550; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=600; total time=   4.5s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=600; tota

[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=500; total time=   4.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=550; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=600; total time=   4.5s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=650; total time=   4.9s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=650; total time=   5.1s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_sam

[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=500; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=500; total time=   3.8s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=550; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=600; total time=   4.6s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=650; total time=   5.1s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_sam

[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=500; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=550; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=550; total time=   4.2s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=600; total time=   4.6s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_samples_split=2, regressor__n_estimators=650; total time=   5.0s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__min_sam

In [122]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=0, max_depth = None, 
                                        max_features = 'sqrt', min_samples_leaf = 1, 
                                        min_samples_split = 2, n_estimators = 582))
])

#{'regressor__max_depth': None, 
#'regressor__max_features': 'sqrt', 
#'regressor__min_samples_leaf': 1, 
#'regressor__min_samples_split': 2, 
#'regressor__n_estimators': 582} Best R² Score: 0.9020390361534398

# Perform cross-validation and store results in a dictionary
cv_results = {}
scores = cross_val_score(pipeline, X, y)
cv_results = round(scores.mean(), 6)
# Output the mean cross-validation scores
print(cv_results)

#Tuned RandomForest Regressor has an R2 of 

0.902039


## V. Latest Tuning Attempt

In [139]:
# Define the parameter grid
start_time = time.time()
param_grid = {
    'regressor__n_estimators': [100, 200, 300, 400, 500, 600], # # of trees in forest
}

#{'regressor__n_estimators': 200}
#0.9031478722700008

param_grid_1 = {
    'regressor__min_samples_leaf': [1, 2, 3, 4],
    'regressor__n_estimators': [125, 150, 175, 200, 225, 250, 275]
}


#Best Parameters: {'regressor__min_samples_leaf': 1, 
#'regressor__n_estimators': 200}
#Best R² Score: 0.9031478722700008

param_grid_2 = {
    'regressor__min_samples_leaf': [1, 2],
    'regressor__n_estimators': [180, 190, 200, 210, 220]
}

#Best Parameters: {'regressor__min_samples_leaf': 1, 
#'regressor__n_estimators': 190}
#Best R² Score: 0.9032154211851081

param_grid_3 = {
    'regressor__min_samples_leaf': [1, 2],
    'regressor__n_estimators': [185, 190, 195],
    'regressor__max_depth': [None, 1, 5, 10],
}

#Best Parameters: {'regressor__max_depth': None, 
#'regressor__min_samples_leaf': 1, 
#'regressor__n_estimators': 190} Best R² Score: 0.9032154211851081

param_grid_4 = {
    'regressor__min_samples_leaf': [1],
    'regressor__n_estimators': [188, 190, 192],
    'regressor__max_depth': [None, 1, 5],
}

#Best Parameters: {'regressor__max_depth': None, 
#'regressor__min_samples_leaf': 1, 
#'regressor__n_estimators': 192}
#Best R² Score: 0.9032616311709502

###############
param_grid_5 = {
    'regressor__min_samples_leaf': [1],
    'regressor__n_estimators': [188, 189, 190, 191, 192, 193, 194, 195],
    'regressor__max_depth': [None, 1, 2],
    'regressor__max_features': ['sqrt', 'log2']
}
##HMMMMMMMMMMMMMMMMMMMMMMMM WHAT??


# Initialize the RandomForestRegressor
random_forest = RandomForestRegressor(random_state=0)

# Set up the pipeline (assuming preprocessor is already defined)
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', random_forest)
])

# Set up the GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid_5, 
                           cv=5, n_jobs=-1, verbose=2, scoring='r2')

# Fit the GridSearchCV to the data
grid_search.fit(X, y)

# Print the best parameters and the best score
print("Best Parameters:", grid_search.best_params_)
print("Best R² Score:", grid_search.best_score_)
print(f"{time.time() - start_time} seconds")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'regressor__max_depth': None, 'regressor__max_features': 'sqrt', 'regressor__min_samples_leaf': 1, 'regressor__n_estimators': 189}
Best R² Score: 0.9007973972353549
44.06819987297058 seconds
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=188; total time=   1.4s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=188; total time=   1.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=189; total time=   1.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=190; total time=   1.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=191; total time=   1.4s
[CV]

[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=188; total time=   1.5s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=189; total time=   1.4s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=189; total time=   1.4s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=190; total time=   1.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=191; total time=   1.3s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=192; total time=   1.4s
[CV] END regressor__max_depth=None, regressor__max_features=sqrt, regressor__min_samples_leaf=1, regressor__n_estimators=193; tota