# Grid Search

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit
import used_cars

Import data.

In [None]:
df = pd.read_feather("../Datasets/used_cars_data_large.feather", 
                     used_cars.Info.columns).fillna(pd.NA)
df = used_cars.remove_null_rows(df, [
    'back_legroom', 'front_legroom', 'fuel_tank_volume', 'height', 'length', 
    'maximum_seating', 'width', 'body_type', 'fuel_type', 'transmission', 
    'wheel_system', 'engine_type', 'power', 'torque'
])
df.insert(df.shape[1]-1, "price", df.pop("price"))

In [None]:
X = pd.DataFrame(df.iloc[:, :-1])
y = pd.DataFrame(df.iloc[:, -1])["price"]

In [None]:
cv = ShuffleSplit(
    n_splits=5, test_size=0.1, random_state=0
)

## Random Forest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline

In [None]:
rfreg_pipe = used_cars.make_used_cars_pipeline(
    model=RandomForestRegressor(random_state=0), random_state=0, 
    final_cols=[
'powerRPM', 'savings_amount', 'torqueRPM', 'pca_fuel_economy_1', 
    'pca_engine_1', 'pca_car_usage_1', 'avg_car_space', 
    'franchise_make_encoded_1'
    ],
)

In [None]:
rfreg_param_grid = {
    "model__n_estimators": [10, 25, 50, 100, 125, 150, 175, 200],
    "model__max_samples": [0.5, 1.0],
}

In [None]:
rfreg_grid_search = GridSearchCV(
    rfreg_pipe, rfreg_param_grid, scoring=[
        "r2", "neg_mean_absolute_error", "neg_root_mean_squared_error", 
        "neg_mean_absolute_percentage_error"
    ], refit="r2", cv=cv, verbose=1, n_jobs=-1
)
rfreg_grid_search.fit(X, y)
rfreg_grid_search.best_params_

Fitting 5 folds for each of 16 candidates, totalling 80 fits




Pickle results.

In [None]:
import joblib

In [None]:
with open("../Output/grid_search_8_large.joblib", "wb") as f:
    joblib.dump(rfreg_grid_search, f)

In [None]:
rfreg_results = pd.DataFrame(rfreg_grid_search.cv_results_)

## XGB Regressor

In [None]:
from xgboost import XGBRegressor

In [None]:
xgbreg_pipe = used_cars.make_used_cars_pipeline(
    model=XGBRegressor(random_state=0), random_state=0, 
    final_cols=[
'powerRPM', 'savings_amount', 'torqueRPM', 'pca_fuel_economy_1', 
    'pca_engine_1', 'pca_car_usage_1', 'avg_car_space', 
    'franchise_make_encoded_1'
    ],
)

In [None]:
xgbreg_param_grid = {
    "model__max_depth": [0, 5, 10, 20],
    "model__learning_rate": [0.1, 0.15, 0.2],
    "model__n_estimators": [10, 25, 50, 100, 125, 150],
}

In [None]:
xgbreg_grid_search = GridSearchCV(
    xgbreg_pipe, xgbreg_param_grid, scoring=[
        "r2", "neg_mean_absolute_error", "neg_root_mean_squared_error", 
        "neg_mean_absolute_percentage_error"
    ], refit="r2", cv=cv, verbose=1, n_jobs=-1
)
xgbreg_grid_search.fit(X, y)
xgbreg_grid_search.best_params_

Fitting 10 folds for each of 72 candidates, totalling 720 fits


180 fits failed out of a total of 720.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Gillbert\anaconda3\envs\sc1015\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Gillbert\anaconda3\envs\sc1015\lib\site-packages\sklearn\pipeline.py", line 405, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "c:\Users\Gillbert\anaconda3\envs\sc1015\lib\site-packages\xgboost\core.py", line 620, in inner_f
    return func(**kwargs)
  File "c:\Users\Gillbert\anaconda3\envs\sc1015\lib\site-packages\xgboost\sklearn.py", line 1025, in fi

{'model__learning_rate': 0.2,
 'model__max_depth': 5,
 'model__n_estimators': 150}

Pickle results.

In [None]:
with open("../Output/grid_search_8_large_xgb.joblib", "wb") as f:
    joblib.dump(xgbreg_grid_search, f)

In [28]:
xgbreg_results = pd.DataFrame(xgbreg_grid_search.cv_results_)