In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
import optuna
from sklearn.metrics import mean_squared_error
import xgboost as xgb

cat_types = ["model", "brand", "ext_col", "int_col", "accident", 
             "clean_title", "body_style",
             'engine','fuel_type']
df = pd.read_csv('cars_train_enriched_acc_noassumption.csv')
df['miles_per_year'] = df['milage']
df['miles_per_year'] = df.apply(lambda x: x['miles_per_year'] / x['age'] if x['age']>0 else 0, axis=1)
df = df.astype({col: "category" for col in cat_types})

In [10]:
drop_cols = ['id', 'price_diff', 'adjusted_price_diff', 'transmission', 'full_name', 'brand_model']
y = df['price']
X = df.drop(['price'], axis=1).drop(drop_cols, axis=1)
feature_importances = []
threshold_opt_cats = ["model", "ext_col", "accident", 
             "clean_title", "body_style",
             'engine','fuel_type', 'int_col', 'brand']
def objective(trial):
    params = {
        'price_threshold': trial.suggest_int('price_threshold', 100000, 3000000, log=True),
        'assume_accident': trial.suggest_int('assume_accident', 0, 1),
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'booster': 'gbtree',
        'n_estimators': trial.suggest_int('n_estimators', 50, 1200),
        'eta':trial.suggest_float('eta', 0.0001, 0.5, log = True),  # learning rate
        'max_depth': trial.suggest_int('max_depth', 4, 8),
        'min_child_weight': trial.suggest_float('min_child_weight', 1e-5, 100, log = True),
        'subsample': trial.suggest_float('subsample', 0.1, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.05, 1.0),
        'lambda': trial.suggest_float('lambda', 0.1, 1000, log = True),
        'alpha': trial.suggest_float('alpha', 1e-4, 100, log = True),
        'tree_method': 'hist',  
        'device':'cpu'
    }
    threshold = {cat:trial.suggest_int(f'{cat}_threshold', 1, 2000) for cat in threshold_opt_cats}
    include_col = {col:trial.suggest_int(f'include_{col}', 0, 1) for col in X.columns}
    
    errors = []
    n=5
    kf = KFold(n_splits=n, shuffle = True, random_state = 1219)
    kf.get_n_splits(X)
    for train_index, valid_index in kf.split(X):
        #X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.15)
        X_train = X.loc[train_index, :]
        y_train = y[train_index]
        X_valid = X.loc[valid_index, :]
        y_valid = y[valid_index]
        
        X_train = X_train[y_train<params['price_threshold']]
        y_train = y_train[y_train<params['price_threshold']]

        if params['assume_accident']==1: 
            X_train.loc[X_train['accident'] == 'unknown', 'accident'] = 'atleast1accidentordamagereported'
            X_valid.loc[X_valid['accident'] == 'unknown', 'accident'] = 'atleast1accidentordamagereported'

        for cat in cat_types:
            value_counts = X_train[cat].value_counts().to_dict()
            X_train[cat] = X_train[cat].apply(lambda x: x if value_counts[x] > threshold[cat] else "unknown")
            X_valid[cat] = X_valid[cat].apply(lambda x: x if (x in value_counts) and (value_counts[x] > threshold[cat]) else "unknown")

        X_train = X_train[[col for col in include_col if include_col[col]==1]]
        X_valid = X_valid[[col for col in include_col if include_col[col]==1]]
        
        if len(X_train.columns)==0: return 10000000000   #really large number for the case where we drop all columns

        X_train = X_train.astype({col: "category" for col in cat_types if col in X_train.columns})
        X_valid = X_valid.astype({col: "category" for col in cat_types if col in X_valid.columns})

        dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
        dvalid = xgb.DMatrix(X_valid, label=y_valid, enable_categorical=True)

        # Train the model
        model = xgb.train(params, dtrain, evals=[(dvalid, 'validation')], num_boost_round=params['n_estimators'], early_stopping_rounds=35, verbose_eval=False)
        feature_importances.append(model.get_score(importance_type='gain'))  # get feature importance
        # Predict on the validation set
        y_pred_valid = model.predict(dvalid)

        # Calculate RMSE on the validation set
        rmse = mean_squared_error(y_valid, y_pred_valid, squared=False)
        errors.append(rmse)
    
    return sum(errors)/n



study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=500)


best_params = study.best_params
print(f"Best hyperparameters: {best_params}")

print("Parameter importance:\n", optuna.importance.get_param_importances(study))


[I 2024-09-26 20:48:32,682] A new study created in memory with name: no-name-8d515808-b1de-4950-9a15-85c13374240f
Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:49:00,495] Trial 0 finished with value: 78584.45071661941 and parameters: {'price_threshold': 755639, 'assume_accident': 0, 'n_estimators': 540, 'eta': 0.00010643230411515313, 'max_depth': 7, 'min_child_weight': 6.460082655781379, 'subsample': 0.9392125788202175, 'colsample_bytree': 0.09264974571885709, 'lambda': 4.0134807658441805, 'alpha': 0.08521817451650826, 'model_threshold': 578, 'ext_col_threshold': 1949, 'accident_threshold': 103, 'clean

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:50:10,826] Trial 3 finished with value: 78134.59497093948 and parameters: {'price_threshold': 257644, 'assume_accident': 0, 'n_estimators': 202, 'eta': 0.0010035974297640876, 'max_depth': 5, 'min_child_weight': 1.7581368173507286, 'subsample': 0.3538089224637663, 'colsample_bytree': 0.5755779480363691, 'lambda': 512.2548756276738, 'alpha': 0.00013150855879240052, 'model_threshold': 1197, 'ext_col_threshold': 1125, 'accident_threshold': 1872, 'clean_title_threshold': 1646, 'body_style_threshold': 811, 'engine_threshold': 450, 'fuel_type_threshold': 393, 'int_col_threshold': 338, 'brand_threshold': 939, 'include_brand': 0, 'include_model': 0, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 0, 'include_eng

[I 2024-09-26 20:51:57,953] Trial 6 finished with value: 76792.38489804904 and parameters: {'price_threshold': 1560620, 'assume_accident': 1, 'n_estimators': 687, 'eta': 0.0004640130564654129, 'max_depth': 5, 'min_child_weight': 0.0007411188568038045, 'subsample': 0.25242605002534874, 'colsample_bytree': 0.9218306637049928, 'lambda': 665.0253919143936, 'alpha': 0.49064461224791567, 'model_threshold': 1358, 'ext_col_threshold': 1537, 'accident_threshold': 1340, 'clean_title_threshold': 62, 'body_style_threshold': 159, 'engine_threshold': 272, 'fuel_type_threshold': 1125, 'int_col_threshold': 580, 'brand_threshold': 298, 'include_brand': 0, 'include_model': 0, 'include_model_year': 1, 'include_milage': 1, 'include_fuel_type': 0, 'include_engine': 0, 'include_ext_col': 0, 'include_int_col': 1, 'include_accident': 1, 'include_clean_title': 0, 'include_body_style': 0, 'include_msrp': 1, 'include_age': 0, 'include_reliability': 0, 'include_adjusted_msrp': 1, 'include_miles_per_year': 0}. Bes

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:53:53,584] Trial 10 finished with value: 75478.96093516692 and parameters: {'price_threshold': 103554, 'assume_accident': 1, 'n_estimators': 269, 'eta': 0.007452959046543864, 'max_depth': 6, 'min_child_weight': 0.12948441742372627, 'subsample': 0.664751022930446, 'colsample_bytree': 0.9973962256083198, 'lambda': 92.33634736052608, 'alpha': 21.264906743084314, 'model_threshold': 1907, 'ext_col_threshold': 557, 'accident_threshold': 557, 'clean_title_threshold': 1966, 'body_style_threshold': 59, 'engine_threshold': 1501, 'fuel_type_threshold': 11, 'int_col_th

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:55:19,907] Trial 13 finished with value: 72787.6041076538 and parameters: {'price_threshold': 832095, 'assume_accident': 1, 'n_estimators': 374, 'eta': 0.010970294586603814, 'max_depth': 8, 'min_child_weight': 47.299758918286614, 'subsample': 0.6146359821332805, 'colsample_bytree': 0.7837508461434985, 'lambda': 0.9978850473764659, 'alpha': 0.002738796756130554, 'model_threshold': 1502, 'ext_col_threshold': 769, 'accident_threshold': 1393, 'clean_title_threshold': 539, 'body_style_threshold': 377, 'engine_threshold': 1351, 'fuel_type_threshold': 1977, 'int_col_threshold': 1516, 'brand_threshold': 1977, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:56:38,253] Trial 17 finished with value: 73739.9491737582 and parameters: {'price_threshold': 975418, 'assume_accident': 1, 'n_estimators': 435, 'eta': 0.0028620283520413483, 'max_depth': 8, 'min_child_weight': 0.022756747177853368, 'subsample': 0.531504070080692, 'colsample_bytree': 0.8434015118811599, 'lambda': 2.0478349309521087, 'alpha': 0.0003236342238206071, 'model_threshold': 1188, 'ext_col_threshold': 1375, 'accident_threshold': 1173, 'clean_title_threshold': 288, 'body_style_threshold': 28, 'engine_threshold': 898, 'fuel_type_threshold': 1757, 'int

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:57:19,337] Trial 20 finished with value: 72843.3169028612 and parameters: {'price_threshold': 1223362, 'assume_accident': 1, 'n_estimators': 648, 'eta': 0.018767324497306948, 'max_depth': 6, 'min_child_weight': 0.0362160375926934, 'subsample': 0.6318298589269526, 'colsample_bytree': 0.6305312389617657, 'lambda': 0.11421728663692872, 'alpha': 1.9922945454399346, 'model_threshold': 1397, 'ext_col_threshold': 920, 'accident_threshold': 1762, 'clean_title_threshold': 711, 'body_style_threshold': 1959, 'engine_threshold': 1654, 'fuel_type_threshold': 1527, 'int_col_threshold': 1679, 'brand_threshold': 1985, 'include_brand': 0, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 0, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 0, 'includ

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:59:10,340] Trial 24 finished with value: 72925.96745933546 and parameters: {'price_threshold': 1640485, 'assume_accident': 1, 'n_estimators': 483, 'eta': 0.026933961398150234, 'max_depth': 8, 'min_child_weight': 3.9181795910843236, 'subsample': 0.5888747981311, 'colsample_bytree': 0.6585777341019818, 'lambda': 0.3789849531540697, 'alpha': 0.00411073837685114, 'model_threshold': 1083, 'ext_col_threshold': 590, 'accident_threshold': 1498, 'clean_title_threshold': 271, 'body_style_threshold': 658, 'engine_threshold': 1999, 'fuel_type_threshold': 1715, 'int_col

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 20:59:42,577] Trial 27 finished with value: 72966.34638231224 and parameters: {'price_threshold': 1051743, 'assume_accident': 1, 'n_estimators': 605, 'eta': 0.44243734464901446, 'max_depth': 4, 'min_child_weight': 4.650122353916956, 'subsample': 0.6209114473954267, 'colsample_bytree': 0.2492753371387621, 'lambda': 0.23822357222404336, 'alpha': 0.05407207916113724, 'model_threshold': 1482, 'ext_col_threshold': 219, 'accident_threshold': 1291, 'clean_title_threshold': 1017, 'body_style_threshold': 449, 'engine_threshold': 1613, 'fuel_type_threshold': 1855, 'int_col_threshold': 848, 'brand_threshold': 1160, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:02:02,026] Trial 31 finished with value: 72752.24025584405 and parameters: {'price_threshold': 1369105, 'assume_accident': 1, 'n_estimators': 525, 'eta': 0.01433717721429876, 'max_depth': 8, 'min_child_weight': 9.435244576472122, 'subsample': 0.7052826851387474, 'colsample_bytree': 0.6531188910604594, 'lambda': 4.309931630637898, 'alpha': 0.0012012659596443277, 'model_threshold': 1685, 'ext_col_threshold': 593, 'accident_threshold': 1572, 'clean_title_threshold': 1635, 'body_style_threshold': 349, 'engine_threshold': 1314, 'fuel_type_threshold': 1646, 'int_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:03:24,328] Trial 34 finished with value: 72764.41987290335 and parameters: {'price_threshold': 1405593, 'assume_accident': 1, 'n_estimators': 514, 'eta': 0.03916913446702158, 'max_depth': 8, 'min_child_weight': 2.177303108851413, 'subsample': 0.7273712951691721, 'colsample_bytree': 0.5134933829273898, 'lambda': 15.81207789098311, 'alpha': 0.000802613554308697, 'model_threshold': 1880, 'ext_col_threshold': 491, 'accident_threshold': 1983, 'clean_title_threshold': 1785, 'body_style_threshold': 1353, 'engine_threshold': 1256, 'fuel_type_threshold': 1436, 'int_col_threshold': 1296, 'brand_threshold': 1818, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'includ

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:06:54,335] Trial 38 finished with value: 73620.54251329042 and parameters: {'price_threshold': 2488349, 'assume_accident': 0, 'n_estimators': 1010, 'eta': 0.0221880417511927, 'max_depth': 4, 'min_child_weight': 0.00010603837258055381, 'subsample': 0.9096821532350701, 'colsample_bytree': 0.5974261981177537, 'lambda': 2.1649232635919726, 'alpha': 0.00024407323110197388, 'model_threshold': 1234, 'ext_col_threshold': 633, 'accident_threshold': 1742, 'clean_title_threshold': 1763, 'body_style_threshold': 1555, 'engine_threshold': 896, 'fuel_type_threshold': 1442

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:08:54,138] Trial 41 finished with value: 72795.35320220495 and parameters: {'price_threshold': 1780324, 'assume_accident': 1, 'n_estimators': 735, 'eta': 0.03630904129457844, 'max_depth': 7, 'min_child_weight': 35.018789680070455, 'subsample': 0.7529067037618001, 'colsample_bytree': 0.9093874584304604, 'lambda': 5.9937862300462195, 'alpha': 0.006191186496537762, 'model_threshold': 1835, 'ext_col_threshold': 495, 'accident_threshold': 1658, 'clean_title_threshold': 1208, 'body_style_threshold': 794, 'engine_threshold': 1418, 'fuel_type_threshold': 1316, 'int_col_threshold': 1253, 'brand_threshold': 1787, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'inclu

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:10:45,880] Trial 45 finished with value: 72732.63937144773 and parameters: {'price_threshold': 1570912, 'assume_accident': 1, 'n_estimators': 796, 'eta': 0.01268255069807086, 'max_depth': 6, 'min_child_weight': 2.9954498812010475, 'subsample': 0.7703293716469344, 'colsample_bytree': 0.7935064144256585, 'lambda': 10.076704946615294, 'alpha': 0.0001842158370656401, 'model_threshold': 1013, 'ext_col_threshold': 656, 'accident_threshold': 1737, 'clean_title_threshold': 1061, 'body_style_threshold': 722, 'engine_threshold': 1119, 'fuel_type_threshold': 1215, 'in

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:13:47,094] Trial 48 finished with value: 75534.34687402644 and parameters: {'price_threshold': 445964, 'assume_accident': 0, 'n_estimators': 1163, 'eta': 0.0007463466202623446, 'max_depth': 5, 'min_child_weight': 6.390452517628728, 'subsample': 0.19989716998429502, 'colsample_bytree': 0.7123442760312011, 'lambda': 1.5715084382086584, 'alpha': 0.0018855772587536082, 'model_threshold': 201, 'ext_col_threshold': 842, 'accident_threshold': 1782, 'clean_title_threshold': 1843, 'body_style_threshold': 104, 'engine_threshold': 1030, 'fuel_type_threshold': 1554, 'int_col_threshold': 1308, 'brand_threshold': 1633, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'inc

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:15:58,971] Trial 52 finished with value: 73311.29882194466 and parameters: {'price_threshold': 244144, 'assume_accident': 1, 'n_estimators': 701, 'eta': 0.018119395217047075, 'max_depth': 6, 'min_child_weight': 77.83984231652299, 'subsample': 0.6682054377107467, 'colsample_bytree': 0.7572387662702846, 'lambda': 11.224360500067709, 'alpha': 32.5639803936678, 'model_threshold': 1583, 'ext_col_threshold': 707, 'accident_threshold': 1721, 'clean_title_threshold': 1010, 'body_style_threshold': 850, 'engine_threshold': 1271, 'fuel_type_threshold': 1189, 'int_col_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:19:13,586] Trial 55 finished with value: 72770.53361139022 and parameters: {'price_threshold': 1588868, 'assume_accident': 1, 'n_estimators': 812, 'eta': 0.010721464266402657, 'max_depth': 6, 'min_child_weight': 72.06952452370105, 'subsample': 0.6451527009041694, 'colsample_bytree': 0.6505883892364935, 'lambda': 9.899863999732188, 'alpha': 0.7676274699030013, 'model_threshold': 1285, 'ext_col_threshold': 224, 'accident_threshold': 1346, 'clean_title_threshold': 1115, 'body_style_threshold': 593, 'engine_threshold': 1570, 'fuel_type_threshold': 1816, 'int_col_threshold': 1600, 'brand_threshold': 1725, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:23:55,458] Trial 59 finished with value: 72761.9797091306 and parameters: {'price_threshold': 1155744, 'assume_accident': 1, 'n_estimators': 963, 'eta': 0.01448746334212047, 'max_depth': 6, 'min_child_weight': 6.2423972408645305, 'subsample': 0.5320893952069243, 'colsample_bytree': 0.9595415539150578, 'lambda': 1.1312636433207444, 'alpha': 0.00032076916266052447, 'model_threshold': 1154, 'ext_col_threshold': 934, 'accident_threshold': 1097, 'clean_title_threshold': 1338, 'body_style_threshold': 463, 'engine_threshold': 1881, 'fuel_type_threshold': 1795, 'in

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:27:05,499] Trial 62 finished with value: 72723.20974013399 and parameters: {'price_threshold': 745482, 'assume_accident': 1, 'n_estimators': 876, 'eta': 0.027010244918327937, 'max_depth': 6, 'min_child_weight': 14.15931521509398, 'subsample': 0.4326836323264792, 'colsample_bytree': 0.8829633856024723, 'lambda': 2.6259249797632616, 'alpha': 0.00019457637918552373, 'model_threshold': 1040, 'ext_col_threshold': 688, 'accident_threshold': 1296, 'clean_title_threshold': 1277, 'body_style_threshold': 311, 'engine_threshold': 1125, 'fuel_type_threshold': 1942, 'int_col_threshold': 1662, 'brand_threshold': 219, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'inclu

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:29:14,096] Trial 66 finished with value: 72734.05344457345 and parameters: {'price_threshold': 700959, 'assume_accident': 1, 'n_estimators': 973, 'eta': 0.029663475460770097, 'max_depth': 6, 'min_child_weight': 5.449724010098695, 'subsample': 0.46908629830852894, 'colsample_bytree': 0.9241207930656503, 'lambda': 0.9599087302672384, 'alpha': 0.00013969647067048132, 'model_threshold': 1224, 'ext_col_threshold': 699, 'accident_threshold': 902, 'clean_title_threshold': 1434, 'body_style_threshold': 140, 'engine_threshold': 683, 'fuel_type_threshold': 1995, 'int

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:30:18,614] Trial 69 finished with value: 73004.02851464177 and parameters: {'price_threshold': 348437, 'assume_accident': 1, 'n_estimators': 960, 'eta': 0.07169173455885633, 'max_depth': 5, 'min_child_weight': 6.842321447036639, 'subsample': 0.3663672950443194, 'colsample_bytree': 0.8527134641917895, 'lambda': 2.961622919820834, 'alpha': 0.00010165104285899894, 'model_threshold': 1411, 'ext_col_threshold': 866, 'accident_threshold': 1129, 'clean_title_threshold': 1092, 'body_style_threshold': 91, 'engine_threshold': 586, 'fuel_type_threshold': 1843, 'int_col_threshold': 1816, 'brand_threshold': 188, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_c

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:35:57,480] Trial 73 finished with value: 72722.01282634914 and parameters: {'price_threshold': 588302, 'assume_accident': 1, 'n_estimators': 826, 'eta': 0.005195001705732561, 'max_depth': 6, 'min_child_weight': 1.5834279803349767, 'subsample': 0.4893328758174549, 'colsample_bytree': 0.935774354079778, 'lambda': 1.938181209624101, 'alpha': 0.00014836469897610608, 'model_threshold': 824, 'ext_col_threshold': 717, 'accident_threshold': 704, 'clean_title_threshold': 946, 'body_style_threshold': 145, 'engine_threshold': 140, 'fuel_type_threshold': 1964, 'int_col

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:40:42,763] Trial 76 finished with value: 72847.04017794933 and parameters: {'price_threshold': 582904, 'assume_accident': 1, 'n_estimators': 760, 'eta': 0.004553616348765217, 'max_depth': 6, 'min_child_weight': 1.7581371901664677, 'subsample': 0.5275755906447868, 'colsample_bytree': 0.9254480113526532, 'lambda': 1.6115209160107236, 'alpha': 0.0004661713242522251, 'model_threshold': 770, 'ext_col_threshold': 897, 'accident_threshold': 443, 'clean_title_threshold': 757, 'body_style_threshold': 188, 'engine_threshold': 318, 'fuel_type_threshold': 1879, 'int_col_threshold': 1995, 'brand_threshold': 692, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_c

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:45:51,287] Trial 80 finished with value: 73229.01064306908 and parameters: {'price_threshold': 539838, 'assume_accident': 1, 'n_estimators': 662, 'eta': 0.0027990912339666122, 'max_depth': 6, 'min_child_weight': 0.00020549020408780767, 'subsample': 0.4662140937363807, 'colsample_bytree': 0.9451925478515469, 'lambda': 1.9719965660236214, 'alpha': 0.0001483593833505883, 'model_threshold': 842, 'ext_col_threshold': 786, 'accident_threshold': 928, 'clean_title_threshold': 695, 'body_style_threshold': 230, 'engine_threshold': 379, 'fuel_type_threshold': 1999, 'i

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:49:10,156] Trial 83 finished with value: 72624.33359026596 and parameters: {'price_threshold': 824272, 'assume_accident': 1, 'n_estimators': 903, 'eta': 0.008646362507869942, 'max_depth': 6, 'min_child_weight': 2.3304734013201958, 'subsample': 0.3261221624286028, 'colsample_bytree': 0.9304717793138593, 'lambda': 5.610664886901991, 'alpha': 0.0004160545354869028, 'model_threshold': 527, 'ext_col_threshold': 725, 'accident_threshold': 482, 'clean_title_threshold': 790, 'body_style_threshold': 53, 'engine_threshold': 91, 'fuel_type_threshold': 1811, 'int_col_threshold': 1789, 'brand_threshold': 95, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_clean

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:54:07,311] Trial 87 finished with value: 72735.37707905812 and parameters: {'price_threshold': 804292, 'assume_accident': 1, 'n_estimators': 1004, 'eta': 0.010420698897009433, 'max_depth': 6, 'min_child_weight': 2.5339616533934466, 'subsample': 0.23344378343850333, 'colsample_bytree': 0.9147597503460827, 'lambda': 3.659923141562258, 'alpha': 0.0010485407758149016, 'model_threshold': 607, 'ext_col_threshold': 547, 'accident_threshold': 613, 'clean_title_threshold': 515, 'body_style_threshold': 213, 'engine_threshold': 285, 'fuel_type_threshold': 1864, 'int_c

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 21:57:31,915] Trial 90 finished with value: 72627.66543242708 and parameters: {'price_threshold': 1058973, 'assume_accident': 1, 'n_estimators': 983, 'eta': 0.008385239909124377, 'max_depth': 6, 'min_child_weight': 27.752377143191502, 'subsample': 0.19242701778403024, 'colsample_bytree': 0.9035060941763805, 'lambda': 0.5903451712932413, 'alpha': 0.0003760132689946422, 'model_threshold': 239, 'ext_col_threshold': 643, 'accident_threshold': 226, 'clean_title_threshold': 805, 'body_style_threshold': 311, 'engine_threshold': 53, 'fuel_type_threshold': 1762, 'int_col_threshold': 1813, 'brand_threshold': 144, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:00:40,126] Trial 94 finished with value: 72680.6348502395 and parameters: {'price_threshold': 1058706, 'assume_accident': 1, 'n_estimators': 1123, 'eta': 0.01622664189303158, 'max_depth': 6, 'min_child_weight': 28.163189480843894, 'subsample': 0.17113265947571477, 'colsample_bytree': 0.8562460888657041, 'lambda': 0.28102397862115, 'alpha': 0.000551153750943363, 'model_threshold': 9, 'ext_col_threshold': 1214, 'accident_threshold': 183, 'clean_title_threshold': 828, 'body_style_threshold': 175, 'engine_threshold': 56, 'fuel_type_threshold': 1706, 'int_col_th

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:02:35,517] Trial 97 finished with value: 72843.34085390213 and parameters: {'price_threshold': 645242, 'assume_accident': 1, 'n_estimators': 983, 'eta': 0.015952698179107678, 'max_depth': 6, 'min_child_weight': 64.32143784977012, 'subsample': 0.27243079579986074, 'colsample_bytree': 0.8641403526471126, 'lambda': 0.5338987538333758, 'alpha': 0.00033035409417593375, 'model_threshold': 105, 'ext_col_threshold': 1733, 'accident_threshold': 63, 'clean_title_threshold': 993, 'body_style_threshold': 276, 'engine_threshold': 29, 'fuel_type_threshold': 1641, 'int_col_threshold': 1815, 'brand_threshold': 45, 'include_brand': 1, 'include_model': 1, 'include_model_year': 1, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 0, 'include_ext_col': 0, 'include_int_col': 0, 'include_accident': 1, 'include_cl

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:07:46,485] Trial 101 finished with value: 72644.9490289506 and parameters: {'price_threshold': 951467, 'assume_accident': 1, 'n_estimators': 1148, 'eta': 0.010276276066690362, 'max_depth': 6, 'min_child_weight': 29.797269543259794, 'subsample': 0.22206452421387082, 'colsample_bytree': 0.8986151446061263, 'lambda': 0.26248492233500087, 'alpha': 0.0004216490996952761, 'model_threshold': 272, 'ext_col_threshold': 1456, 'accident_threshold': 224, 'clean_title_threshold': 705, 'body_style_threshold': 6, 'engine_threshold': 114, 'fuel_type_threshold': 1780, 'int_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:10:58,029] Trial 104 finished with value: 72765.64277965079 and parameters: {'price_threshold': 702110, 'assume_accident': 1, 'n_estimators': 1026, 'eta': 0.012538542074615817, 'max_depth': 6, 'min_child_weight': 95.6867153405133, 'subsample': 0.16286649199815992, 'colsample_bytree': 0.9173344180322738, 'lambda': 0.6696936277042903, 'alpha': 0.00024247954825418476, 'model_threshold': 322, 'ext_col_threshold': 1855, 'accident_threshold': 406, 'clean_title_threshold': 1039, 'body_style_threshold': 56, 'engine_threshold': 45, 'fuel_type_threshold': 1860, 'int_col_threshold': 1771, 'brand_threshold': 90, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:15:27,494] Trial 108 finished with value: 72633.83060348425 and parameters: {'price_threshold': 1151380, 'assume_accident': 1, 'n_estimators': 1107, 'eta': 0.01371126686615564, 'max_depth': 6, 'min_child_weight': 17.967737870371504, 'subsample': 0.29038153083659, 'colsample_bytree': 0.9649037802229252, 'lambda': 0.4299463023118584, 'alpha': 0.000301081287013584, 'model_threshold': 121, 'ext_col_threshold': 1830, 'accident_threshold': 95, 'clean_title_threshold': 763, 'body_style_threshold': 97, 'engine_threshold': 47, 'fuel_type_threshold': 1657, 'int_col_t

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:17:56,679] Trial 111 finished with value: 72662.24103857805 and parameters: {'price_threshold': 1148875, 'assume_accident': 1, 'n_estimators': 1129, 'eta': 0.021558839759919412, 'max_depth': 6, 'min_child_weight': 11.355901731681062, 'subsample': 0.2891279506987875, 'colsample_bytree': 0.9850632113745428, 'lambda': 0.161928853212268, 'alpha': 0.000739913842055358, 'model_threshold': 136, 'ext_col_threshold': 1769, 'accident_threshold': 79, 'clean_title_threshold': 808, 'body_style_threshold': 93, 'engine_threshold': 44, 'fuel_type_threshold': 1697, 'int_col_threshold': 1632, 'brand_threshold': 97, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 0, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_cle

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:21:27,732] Trial 115 finished with value: 73026.83837127057 and parameters: {'price_threshold': 1225920, 'assume_accident': 1, 'n_estimators': 1019, 'eta': 0.012027835556635641, 'max_depth': 6, 'min_child_weight': 39.90023971144888, 'subsample': 0.3079607108765598, 'colsample_bytree': 0.8478925770332295, 'lambda': 0.31234415190707, 'alpha': 0.0003659963178094954, 'model_threshold': 101, 'ext_col_threshold': 1669, 'accident_threshold': 94, 'clean_title_threshold': 536, 'body_style_threshold': 58, 'engine_threshold': 1, 'fuel_type_threshold': 1679, 'int_col_t

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:23:57,647] Trial 118 finished with value: 72707.21803331198 and parameters: {'price_threshold': 854223, 'assume_accident': 1, 'n_estimators': 995, 'eta': 0.018999290915782063, 'max_depth': 6, 'min_child_weight': 0.0019294506487292932, 'subsample': 0.2095607974233059, 'colsample_bytree': 0.9243488040877167, 'lambda': 0.6358172217586667, 'alpha': 0.0008672189349646003, 'model_threshold': 310, 'ext_col_threshold': 1935, 'accident_threshold': 125, 'clean_title_threshold': 1061, 'body_style_threshold': 172, 'engine_threshold': 107, 'fuel_type_threshold': 1812, 'int_col_threshold': 751, 'brand_threshold': 78, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 1, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'inclu

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:31:09,059] Trial 122 finished with value: 72637.96280110264 and parameters: {'price_threshold': 1339268, 'assume_accident': 1, 'n_estimators': 1162, 'eta': 0.005589890239006889, 'max_depth': 6, 'min_child_weight': 24.183996046321422, 'subsample': 0.3685460553590302, 'colsample_bytree': 0.7942803247394965, 'lambda': 0.7023331012327779, 'alpha': 0.7294606299740887, 'model_threshold': 177, 'ext_col_threshold': 1684, 'accident_threshold': 53, 'clean_title_threshold': 970, 'body_style_threshold': 332, 'engine_threshold': 137, 'fuel_type_threshold': 1458, 'int_co

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:36:14,272] Trial 125 finished with value: 72704.72194233349 and parameters: {'price_threshold': 1481608, 'assume_accident': 1, 'n_estimators': 1106, 'eta': 0.010062839370226902, 'max_depth': 6, 'min_child_weight': 37.75802270037983, 'subsample': 0.12504688888604465, 'colsample_bytree': 0.7622491829500945, 'lambda': 0.32775707302542784, 'alpha': 0.8165127827204464, 'model_threshold': 221, 'ext_col_threshold': 1805, 'accident_threshold': 215, 'clean_title_threshold': 1011, 'body_style_threshold': 373, 'engine_threshold': 170, 'fuel_type_threshold': 1661, 'int_col_threshold': 1505, 'brand_threshold': 130, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'includ

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:43:40,403] Trial 129 finished with value: 72723.69111344151 and parameters: {'price_threshold': 1183070, 'assume_accident': 1, 'n_estimators': 1138, 'eta': 0.005146154439154401, 'max_depth': 7, 'min_child_weight': 7.733235600273279, 'subsample': 0.2818890662929199, 'colsample_bytree': 0.8894416146713953, 'lambda': 0.7899874165457812, 'alpha': 1.053866047209962, 'model_threshold': 176, 'ext_col_threshold': 1504, 'accident_threshold': 292, 'clean_title_threshold': 673, 'body_style_threshold': 166, 'engine_threshold': 315, 'fuel_type_threshold': 1855, 'int_col

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:46:28,147] Trial 132 finished with value: 72644.37686506011 and parameters: {'price_threshold': 1052245, 'assume_accident': 1, 'n_estimators': 909, 'eta': 0.018306546474438114, 'max_depth': 6, 'min_child_weight': 5.059807911140702, 'subsample': 0.2420894578184172, 'colsample_bytree': 0.8044022585740205, 'lambda': 6.64792432895029, 'alpha': 0.03716161367066725, 'model_threshold': 564, 'ext_col_threshold': 1766, 'accident_threshold': 368, 'clean_title_threshold': 958, 'body_style_threshold': 117, 'engine_threshold': 54, 'fuel_type_threshold': 1900, 'int_col_threshold': 1777, 'brand_threshold': 54, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include_clean

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:50:26,709] Trial 136 finished with value: 72786.20099909617 and parameters: {'price_threshold': 725217, 'assume_accident': 1, 'n_estimators': 1052, 'eta': 0.00923523736454332, 'max_depth': 6, 'min_child_weight': 38.53557578796617, 'subsample': 0.41035944405990715, 'colsample_bytree': 0.8833225461377625, 'lambda': 0.9222483118059668, 'alpha': 0.1433260396899922, 'model_threshold': 90, 'ext_col_threshold': 1565, 'accident_threshold': 471, 'clean_title_threshold': 1004, 'body_style_threshold': 128, 'engine_threshold': 158, 'fuel_type_threshold': 1763, 'int_col

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:54:13,961] Trial 139 finished with value: 72679.66943585439 and parameters: {'price_threshold': 1062825, 'assume_accident': 1, 'n_estimators': 930, 'eta': 0.006322556832580259, 'max_depth': 6, 'min_child_weight': 0.06001857357215395, 'subsample': 0.44623120276145717, 'colsample_bytree': 0.8190866650785288, 'lambda': 0.5668273292343144, 'alpha': 0.23319068287468145, 'model_threshold': 217, 'ext_col_threshold': 1609, 'accident_threshold': 260, 'clean_title_threshold': 1029, 'body_style_threshold': 273, 'engine_threshold': 40, 'fuel_type_threshold': 1761, 'int_col_threshold': 1951, 'brand_threshold': 37, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'include

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 22:58:29,859] Trial 143 finished with value: 72622.53117076735 and parameters: {'price_threshold': 1343704, 'assume_accident': 1, 'n_estimators': 1121, 'eta': 0.007511257696811641, 'max_depth': 6, 'min_child_weight': 43.621881752358554, 'subsample': 0.3203909213462951, 'colsample_bytree': 0.9018160282072963, 'lambda': 0.27428161262890927, 'alpha': 0.0004814565069726787, 'model_threshold': 280, 'ext_col_threshold': 1498, 'accident_threshold': 26, 'clean_title_threshold': 982, 'body_style_threshold': 161, 'engine_threshold': 97, 'fuel_type_threshold': 1838, 'int

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:02:15,046] Trial 146 finished with value: 72628.59554519742 and parameters: {'price_threshold': 1360842, 'assume_accident': 1, 'n_estimators': 1140, 'eta': 0.009688326194188818, 'max_depth': 6, 'min_child_weight': 15.864070251503767, 'subsample': 0.41136045557070067, 'colsample_bytree': 0.9221021521786383, 'lambda': 0.3337321714279632, 'alpha': 0.00019701736609841375, 'model_threshold': 416, 'ext_col_threshold': 1665, 'accident_threshold': 139, 'clean_title_threshold': 728, 'body_style_threshold': 334, 'engine_threshold': 166, 'fuel_type_threshold': 1408, 'int_col_threshold': 1589, 'brand_threshold': 67, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'incl

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:06:28,687] Trial 150 finished with value: 72730.52575984481 and parameters: {'price_threshold': 1361326, 'assume_accident': 1, 'n_estimators': 1123, 'eta': 0.011945348657312794, 'max_depth': 6, 'min_child_weight': 10.329689568181319, 'subsample': 0.37556288118148773, 'colsample_bytree': 0.8906737103291275, 'lambda': 1.2615579657963805, 'alpha': 0.000271518272824217, 'model_threshold': 117, 'ext_col_threshold': 1374, 'accident_threshold': 146, 'clean_title_threshold': 805, 'body_style_threshold': 214, 'engine_threshold': 219, 'fuel_type_threshold': 1700, 'in

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:08:45,206] Trial 153 finished with value: 72607.45035290078 and parameters: {'price_threshold': 887619, 'assume_accident': 1, 'n_estimators': 1081, 'eta': 0.010858615987895753, 'max_depth': 6, 'min_child_weight': 56.2032069686221, 'subsample': 0.34959367443577927, 'colsample_bytree': 0.9458321267487235, 'lambda': 0.36892420446011304, 'alpha': 0.0002195716275979774, 'model_threshold': 420, 'ext_col_threshold': 1656, 'accident_threshold': 126, 'clean_title_threshold': 749, 'body_style_threshold': 315, 'engine_threshold': 107, 'fuel_type_threshold': 1432, 'int_col_threshold': 1652, 'brand_threshold': 186, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'includ

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:14:12,940] Trial 157 finished with value: 72804.75170134843 and parameters: {'price_threshold': 864826, 'assume_accident': 1, 'n_estimators': 1086, 'eta': 0.003822857239395803, 'max_depth': 6, 'min_child_weight': 47.45263118573532, 'subsample': 0.3612069478138199, 'colsample_bytree': 0.42863357730560064, 'lambda': 0.5150813931511187, 'alpha': 0.00012620363294915335, 'model_threshold': 386, 'ext_col_threshold': 1655, 'accident_threshold': 325, 'clean_title_threshold': 741, 'body_style_threshold': 246, 'engine_threshold': 147, 'fuel_type_threshold': 1438, 'in

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:18:23,314] Trial 160 finished with value: 72629.2517617658 and parameters: {'price_threshold': 1231774, 'assume_accident': 1, 'n_estimators': 1075, 'eta': 0.010604432740813307, 'max_depth': 6, 'min_child_weight': 49.59693820646641, 'subsample': 0.31573149517158383, 'colsample_bytree': 0.8452847642550018, 'lambda': 0.7204363351990478, 'alpha': 0.0003551866873547065, 'model_threshold': 446, 'ext_col_threshold': 1637, 'accident_threshold': 142, 'clean_title_threshold': 814, 'body_style_threshold': 355, 'engine_threshold': 177, 'fuel_type_threshold': 1414, 'int_col_threshold': 1711, 'brand_threshold': 142, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'includ

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:21:57,442] Trial 164 finished with value: 72701.13431847068 and parameters: {'price_threshold': 1697979, 'assume_accident': 1, 'n_estimators': 1109, 'eta': 0.011698614949479162, 'max_depth': 6, 'min_child_weight': 94.9908475493447, 'subsample': 0.2688694823433132, 'colsample_bytree': 0.778251369730872, 'lambda': 0.6142178108039075, 'alpha': 0.0005279146538004709, 'model_threshold': 402, 'ext_col_threshold': 1591, 'accident_threshold': 38, 'clean_title_threshold': 165, 'body_style_threshold': 320, 'engine_threshold': 215, 'fuel_type_threshold': 1263, 'int_co

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:24:25,893] Trial 167 finished with value: 72704.63699345951 and parameters: {'price_threshold': 1465021, 'assume_accident': 1, 'n_estimators': 1139, 'eta': 0.0127057877498051, 'max_depth': 6, 'min_child_weight': 24.138672198667084, 'subsample': 0.36310062197209436, 'colsample_bytree': 0.7314617692124463, 'lambda': 0.29533754505001397, 'alpha': 0.00023131744303261768, 'model_threshold': 308, 'ext_col_threshold': 1408, 'accident_threshold': 119, 'clean_title_threshold': 709, 'body_style_threshold': 279, 'engine_threshold': 247, 'fuel_type_threshold': 1365, 'int_col_threshold': 1677, 'brand_threshold': 37, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 0, 'include_accident': 1, 'inclu

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:28:16,211] Trial 171 finished with value: 77770.86898673783 and parameters: {'price_threshold': 1412073, 'assume_accident': 1, 'n_estimators': 1068, 'eta': 0.000105018508341548, 'max_depth': 6, 'min_child_weight': 49.79120115154766, 'subsample': 0.31814657884985015, 'colsample_bytree': 0.8060296874582009, 'lambda': 0.6370327107359984, 'alpha': 0.00037175902991589733, 'model_threshold': 438, 'ext_col_threshold': 1642, 'accident_threshold': 160, 'clean_title_threshold': 813, 'body_style_threshold': 356, 'engine_threshold': 184, 'fuel_type_threshold': 1398, 'i

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:31:34,428] Trial 174 finished with value: 72603.88367928723 and parameters: {'price_threshold': 1376758, 'assume_accident': 1, 'n_estimators': 1082, 'eta': 0.013800747807405638, 'max_depth': 6, 'min_child_weight': 44.30397790828226, 'subsample': 0.2611848524463739, 'colsample_bytree': 0.835289624231608, 'lambda': 0.3649668513265254, 'alpha': 0.00026356247160416437, 'model_threshold': 646, 'ext_col_threshold': 1663, 'accident_threshold': 72, 'clean_title_threshold': 920, 'body_style_threshold': 568, 'engine_threshold': 83, 'fuel_type_threshold': 1498, 'int_col_threshold': 1713, 'brand_threshold': 119, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 1, 'include_accident': 1, 'include_

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:33:44,051] Trial 178 finished with value: 72643.34747937175 and parameters: {'price_threshold': 1608615, 'assume_accident': 1, 'n_estimators': 979, 'eta': 0.02289115759167658, 'max_depth': 6, 'min_child_weight': 19.08691637222707, 'subsample': 0.24865416298456305, 'colsample_bytree': 0.7582135948220134, 'lambda': 0.38155614997372583, 'alpha': 0.000104632586057101, 'model_threshold': 614, 'ext_col_threshold': 1716, 'accident_threshold': 77, 'clean_title_threshold': 932, 'body_style_threshold': 620, 'engine_threshold': 78, 'fuel_type_threshold': 1473, 'int_co

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:35:20,502] Trial 181 finished with value: 72626.42284599374 and parameters: {'price_threshold': 1309652, 'assume_accident': 1, 'n_estimators': 1041, 'eta': 0.019257030863538062, 'max_depth': 6, 'min_child_weight': 25.929846382814862, 'subsample': 0.22999107585051326, 'colsample_bytree': 0.8984066064270727, 'lambda': 0.5048709722494589, 'alpha': 0.00022126218172332677, 'model_threshold': 597, 'ext_col_threshold': 1662, 'accident_threshold': 78, 'clean_title_threshold': 938, 'body_style_threshold': 485, 'engine_threshold': 117, 'fuel_type_threshold': 1498, 'int_col_threshold': 1781, 'brand_threshold': 113, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 1, 'include_accident': 1, 'incl

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:37:46,994] Trial 185 finished with value: 72691.53117507817 and parameters: {'price_threshold': 1302721, 'assume_accident': 0, 'n_estimators': 994, 'eta': 0.017598953363138273, 'max_depth': 6, 'min_child_weight': 77.13290391948391, 'subsample': 0.2301226943695398, 'colsample_bytree': 0.7753870019481628, 'lambda': 0.48537958257073166, 'alpha': 0.00028349966578572884, 'model_threshold': 729, 'ext_col_threshold': 1789, 'accident_threshold': 46, 'clean_title_threshold': 1044, 'body_style_threshold': 683, 'engine_threshold': 74, 'fuel_type_threshold': 1459, 'int

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:39:32,544] Trial 188 finished with value: 72632.69317187327 and parameters: {'price_threshold': 1665724, 'assume_accident': 0, 'n_estimators': 1012, 'eta': 0.025694533643516464, 'max_depth': 6, 'min_child_weight': 66.95553472516313, 'subsample': 0.2821772953033018, 'colsample_bytree': 0.7514071833795254, 'lambda': 0.1688823108282674, 'alpha': 0.0005479157836499205, 'model_threshold': 578, 'ext_col_threshold': 1767, 'accident_threshold': 37, 'clean_title_threshold': 962, 'body_style_threshold': 595, 'engine_threshold': 103, 'fuel_type_threshold': 1575, 'int_col_threshold': 1773, 'brand_threshold': 5, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 1, 'include_accident': 1, 'include_c

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:41:45,313] Trial 192 finished with value: 72632.53771787128 and parameters: {'price_threshold': 1217173, 'assume_accident': 0, 'n_estimators': 943, 'eta': 0.03774031405117178, 'max_depth': 6, 'min_child_weight': 51.81507593200599, 'subsample': 0.2333459269478545, 'colsample_bytree': 0.7974510846319841, 'lambda': 0.3159317516550893, 'alpha': 0.00029043383335027793, 'model_threshold': 623, 'ext_col_threshold': 1672, 'accident_threshold': 56, 'clean_title_threshold': 918, 'body_style_threshold': 629, 'engine_threshold': 35, 'fuel_type_threshold': 1502, 'int_co

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

Parameters: { "assume_accident", "n_estimators", "price_threshold" } are not used.

[I 2024-09-26 23:44:00,312] Trial 195 finished with value: 72606.71924569672 and parameters: {'price_threshold': 1556722, 'assume_accident': 0, 'n_estimators': 951, 'eta': 0.01804493860540013, 'max_depth': 6, 'min_child_weight': 22.608696547808545, 'subsample': 0.2752733709809438, 'colsample_bytree': 0.7915325216561243, 'lambda': 0.19223044856169258, 'alpha': 0.0005221704979269993, 'model_threshold': 661, 'ext_col_threshold': 1789, 'accident_threshold': 72, 'clean_title_threshold': 945, 'body_style_threshold': 461, 'engine_threshold': 67, 'fuel_type_threshold': 1554, 'int_col_threshold': 1770, 'brand_threshold': 117, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 1, 'include_accident': 1, 'include_

KeyboardInterrupt: 

In [11]:
best_params = {'price_threshold': 1194471, 'assume_accident': 0, 'n_estimators': 951, 'eta': 0.013253672942340268, 'max_depth': 6, 'min_child_weight': 38.26739593627571, 'subsample': 0.20646752099818685, 'colsample_bytree': 0.787577422310926, 'lambda': 0.30415017474541467, 'alpha': 0.00030546530869858226, 'model_threshold': 627, 'ext_col_threshold': 1679, 'accident_threshold': 62, 'clean_title_threshold': 917, 'body_style_threshold': 542, 'engine_threshold': 49, 'fuel_type_threshold': 1492, 'int_col_threshold': 1832, 'brand_threshold': 113, 'include_brand': 1, 'include_model': 1, 'include_model_year': 0, 'include_milage': 0, 'include_fuel_type': 1, 'include_engine': 1, 'include_ext_col': 1, 'include_int_col': 1, 'include_accident': 1, 'include_clean_title': 0, 'include_body_style': 1, 'include_msrp': 1, 'include_age': 1, 'include_reliability': 0, 'include_adjusted_msrp': 0, 'include_miles_per_year': 1}
best_params['objective'] = 'reg:squarederror'
best_params['eval_metric'] = 'rmse'
best_params['device'] = 'cpu'
not_included = [col for col in X.columns if best_params[f'include_{col}']==0]

drop_cols_train = ['id', 'price_diff', 'adjusted_price_diff', 'transmission', 'full_name', 'brand_model']
y = df['price']
X = df.drop(['price'], axis=1).drop(drop_cols_train, axis=1)

X = X[y<best_params['price_threshold']]
y = y[y<best_params['price_threshold']]

drop_cols_test = ['transmission', 'full_name', 'brand_model']
dt = pd.read_csv('cars_test_enriched_acc_noassumption.csv')
dt['miles_per_year'] = dt['milage']
dt['miles_per_year'] = dt.apply(lambda x: x['miles_per_year'] / x['age'] if x['age']>0 else 0, axis=1)

if best_params['assume_accident']==1: 
    X.loc[X['accident'] == 'unknown', 'accident'] = 'atleast1accidentordamagereported'
    dt.loc[dt['accident'] == 'unknown', 'accident'] = 'atleast1accidentordamagereported'




for cat in cat_types:
        value_counts = X[cat].value_counts().to_dict()
        X[cat] = X[cat].apply(lambda x: x if value_counts[x] > best_params[f'{cat}_threshold'] else "unknown")
        dt[cat] = dt[cat].apply(lambda x: x if (x in value_counts) and (value_counts[x] > best_params[f'{cat}_threshold']) else "unknown")

X = X.astype({col: "category" for col in cat_types})
dt = dt.astype({col: "category" for col in cat_types})

X.drop(not_included, axis = 1, inplace= True)
dt.drop(drop_cols_test, axis=1, inplace=True)
dt.drop(not_included, axis=1, inplace=True)



dtrain = xgb.DMatrix(X, label=y, enable_categorical=True)
model = xgb.train(best_params, dtrain, num_boost_round=best_params['n_estimators'])

Parameters: { "accident_threshold", "assume_accident", "body_style_threshold", "brand_threshold", "clean_title_threshold", "engine_threshold", "ext_col_threshold", "fuel_type_threshold", "include_accident", "include_adjusted_msrp", "include_age", "include_body_style", "include_brand", "include_clean_title", "include_engine", "include_ext_col", "include_fuel_type", "include_int_col", "include_milage", "include_miles_per_year", "include_model", "include_model_year", "include_msrp", "include_reliability", "int_col_threshold", "model_threshold", "n_estimators", "price_threshold" } are not used.



In [12]:
ids = dt.pop('id')
dtest = xgb.DMatrix(dt, enable_categorical=True)  
pred = model.predict(dtest)



# Create a submission DataFrame
submission_df = pd.DataFrame({
    'id': ids,
    'price': pred
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission15.csv', index=False)