In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
import optuna

In [2]:
df = pd.read_csv(r'C:\Users\Professional\Desktop\Kaggle competitions\30-days-of-ml\data\train_folds.csv')
df_test = pd.read_csv(r'C:\Users\Professional\Desktop\Kaggle competitions\30-days-of-ml\data\test.csv')
sample_submission = pd.read_csv(r'C:\Users\Professional\Desktop\Kaggle competitions\30-days-of-ml\data\sample_submission.csv')

useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if col.startswith("cat")]
df_test = df_test[useful_features]

for col in object_cols:
    temp_df = []
    temp_test_feat = None
    for fold in range(5):
        xtrain =  df[df.kfold != fold].reset_index(drop=True)
        xvalid = df[df.kfold == fold].reset_index(drop=True)
        feat = xtrain.groupby(col)["target"].agg("mean")
        feat = feat.to_dict()
        xvalid.loc[:, f"tar_enc_{col}"] = xvalid[col].map(feat)
        temp_df.append(xvalid)
        if temp_test_feat is None:
            temp_test_feat = df_test[col].map(feat)
        else:
            temp_test_feat += df_test[col].map(feat)
    
    temp_test_feat /= 5
    df_test.loc[:, f"tar_enc_{col}"] = temp_test_feat
    df = pd.concat(temp_df)
    

useful_features = [c for c in df.columns if c not in ("id", "target", "kfold")]
object_cols = [col for col in useful_features if col.startswith("cat")]
df_test = df_test[useful_features]

In [26]:
def run(trial):
    fold = 0
    learning_rate = trial.suggest_float("learning_rate", 1e-3, 0.4, log=True)
    reg_lambda = trial.suggest_loguniform("reg_lambda", 1e-8, 100.0)
    subsample = trial.suggest_float("subsample", 0.1, 1.0)
    colsample_bylevel = trial.suggest_float("colsample_bylevel", 0.1, 1.0)
    max_depth = trial.suggest_int("max_depth", 1, 8)

    xtrain = df[df.kfold != fold].reset_index(drop=True)
    xvalid = df[df.kfold == fold].reset_index(drop=True)

    ytrain = xtrain.target
    yvalid = xvalid.target

    xtrain = xtrain[useful_features]
    xvalid = xvalid[useful_features]

    ordinal_encoder = preprocessing.OrdinalEncoder()
    xtrain[object_cols] = ordinal_encoder.fit_transform(xtrain[object_cols])
    xvalid[object_cols] = ordinal_encoder.transform(xvalid[object_cols])

    model = CatBoostRegressor(
        random_state=42,
        task_type = "GPU",
        n_estimators=8000,
        bootstrap_type="Bernoulli",
        learning_rate=learning_rate,
        subsample=subsample,
        reg_lambda=reg_lambda,
        max_depth=max_depth,
    )
    model.fit(xtrain, ytrain, early_stopping_rounds=800, eval_set=[(xvalid, yvalid)], verbose=1000)
    preds_valid = model.predict(xvalid)
    rmse = mean_squared_error(yvalid, preds_valid, squared=False)
    return rmse

In [27]:
study = optuna.create_study(direction="minimize")
study.optimize(run, n_trials=15)

[32m[I 2021-08-24 07:38:24,381][0m A new study created in memory with name: no-name-e4d5f4ab-d4a5-4a12-a9ef-bd8a85121178[0m


0:	learn: 0.7463312	test: 0.7452627	best: 0.7452627 (0)	total: 7.09ms	remaining: 56.7s
1000:	learn: 0.7178207	test: 0.7230532	best: 0.7230527 (999)	total: 6.41s	remaining: 44.8s
2000:	learn: 0.7094455	test: 0.7213875	best: 0.7213875 (2000)	total: 12.9s	remaining: 38.7s
3000:	learn: 0.7020173	test: 0.7208884	best: 0.7208817 (2988)	total: 19.4s	remaining: 32.2s
4000:	learn: 0.6951390	test: 0.7207361	best: 0.7207326 (3994)	total: 25.7s	remaining: 25.7s
bestTest = 0.7207211196
bestIteration = 4083
Shrink model to first 4084 iterations.


[32m[I 2021-08-24 07:38:58,469][0m Trial 0 finished with value: 0.7207209795491829 and parameters: {'learning_rate': 0.033283368472661196, 'reg_lambda': 6.180730194339443, 'subsample': 0.2093815583335964, 'colsample_bylevel': 0.5198618986889302, 'max_depth': 6}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7467278	test: 0.7456590	best: 0.7456590 (0)	total: 5.75ms	remaining: 46s
1000:	learn: 0.7355175	test: 0.7349490	best: 0.7349490 (1000)	total: 5.12s	remaining: 35.8s
2000:	learn: 0.7319088	test: 0.7317241	best: 0.7317241 (2000)	total: 10.3s	remaining: 30.7s
3000:	learn: 0.7297008	test: 0.7298690	best: 0.7298690 (3000)	total: 15.4s	remaining: 25.7s
4000:	learn: 0.7279163	test: 0.7284232	best: 0.7284232 (4000)	total: 20.5s	remaining: 20.5s
5000:	learn: 0.7265135	test: 0.7273825	best: 0.7273825 (5000)	total: 25.6s	remaining: 15.3s
6000:	learn: 0.7252472	test: 0.7264730	best: 0.7264730 (6000)	total: 30.7s	remaining: 10.2s
7000:	learn: 0.7241604	test: 0.7257569	best: 0.7257569 (7000)	total: 35.7s	remaining: 5.1s
7999:	learn: 0.7232349	test: 0.7251442	best: 0.7251442 (7999)	total: 40.8s	remaining: 0us
bestTest = 0.7251441577
bestIteration = 7999


[32m[I 2021-08-24 07:39:42,161][0m Trial 1 finished with value: 0.7251442280314992 and parameters: {'learning_rate': 0.003738944269756206, 'reg_lambda': 0.2302525692496485, 'subsample': 0.7783662964379253, 'colsample_bylevel': 0.9617592349898778, 'max_depth': 4}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7467034	test: 0.7456362	best: 0.7456362 (0)	total: 4.14ms	remaining: 33.2s
1000:	learn: 0.7350281	test: 0.7343480	best: 0.7343480 (1000)	total: 3.39s	remaining: 23.7s
2000:	learn: 0.7316099	test: 0.7311765	best: 0.7311765 (2000)	total: 6.8s	remaining: 20.4s
3000:	learn: 0.7297049	test: 0.7294558	best: 0.7294558 (3000)	total: 10.4s	remaining: 17.3s
4000:	learn: 0.7283259	test: 0.7282612	best: 0.7282612 (4000)	total: 13.7s	remaining: 13.7s
5000:	learn: 0.7271746	test: 0.7273027	best: 0.7273027 (5000)	total: 17.1s	remaining: 10.3s
6000:	learn: 0.7261515	test: 0.7264793	best: 0.7264793 (6000)	total: 20.4s	remaining: 6.8s
7000:	learn: 0.7252375	test: 0.7257427	best: 0.7257427 (7000)	total: 23.7s	remaining: 3.39s
7999:	learn: 0.7244801	test: 0.7251707	best: 0.7251707 (7999)	total: 27.1s	remaining: 0us
bestTest = 0.7251707324
bestIteration = 7999


[32m[I 2021-08-24 07:40:12,013][0m Trial 2 finished with value: 0.7251708340055415 and parameters: {'learning_rate': 0.009527279444135655, 'reg_lambda': 1.0729732863163667e-07, 'subsample': 0.15754856796489564, 'colsample_bylevel': 0.8793885060047034, 'max_depth': 2}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7467410	test: 0.7456728	best: 0.7456728 (0)	total: 7.93ms	remaining: 1m 3s
1000:	learn: 0.7363883	test: 0.7359290	best: 0.7359290 (1000)	total: 6.29s	remaining: 44s
2000:	learn: 0.7326186	test: 0.7327388	best: 0.7327388 (2000)	total: 12.5s	remaining: 37.6s
3000:	learn: 0.7302030	test: 0.7308237	best: 0.7308237 (3000)	total: 18.7s	remaining: 31.2s
4000:	learn: 0.7284201	test: 0.7295163	best: 0.7295163 (4000)	total: 24.9s	remaining: 24.9s
5000:	learn: 0.7269695	test: 0.7285117	best: 0.7285117 (5000)	total: 31.2s	remaining: 18.7s
6000:	learn: 0.7257091	test: 0.7276740	best: 0.7276740 (6000)	total: 37.4s	remaining: 12.4s
7000:	learn: 0.7245150	test: 0.7268810	best: 0.7268810 (7000)	total: 43.5s	remaining: 6.21s
7999:	learn: 0.7234237	test: 0.7261756	best: 0.7261756 (7999)	total: 49.8s	remaining: 0us
bestTest = 0.726175569
bestIteration = 7999


[32m[I 2021-08-24 07:41:04,680][0m Trial 3 finished with value: 0.726175339102174 and parameters: {'learning_rate': 0.001942793821755954, 'reg_lambda': 0.0002875596906790846, 'subsample': 0.2653290703196742, 'colsample_bylevel': 0.8755571075277606, 'max_depth': 6}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7467507	test: 0.7456822	best: 0.7456822 (0)	total: 5.42ms	remaining: 43.4s
1000:	learn: 0.7394259	test: 0.7385155	best: 0.7385155 (1000)	total: 4.86s	remaining: 34s
2000:	learn: 0.7363703	test: 0.7357344	best: 0.7357344 (2000)	total: 9.63s	remaining: 28.9s
3000:	learn: 0.7343458	test: 0.7338858	best: 0.7338858 (3000)	total: 14.5s	remaining: 24.1s
4000:	learn: 0.7328708	test: 0.7325659	best: 0.7325659 (4000)	total: 19.4s	remaining: 19.3s
5000:	learn: 0.7316895	test: 0.7315371	best: 0.7315371 (5000)	total: 24.2s	remaining: 14.5s
6000:	learn: 0.7307032	test: 0.7307097	best: 0.7307097 (6000)	total: 29s	remaining: 9.66s
7000:	learn: 0.7298498	test: 0.7300026	best: 0.7300026 (7000)	total: 33.8s	remaining: 4.83s
7999:	learn: 0.7290453	test: 0.7293419	best: 0.7293419 (7999)	total: 38.7s	remaining: 0us
bestTest = 0.729341906
bestIteration = 7999


[32m[I 2021-08-24 07:41:46,106][0m Trial 4 finished with value: 0.7293418091359418 and parameters: {'learning_rate': 0.0015696029468877278, 'reg_lambda': 1.5825805063016402e-08, 'subsample': 0.35174479284421056, 'colsample_bylevel': 0.6522467776330793, 'max_depth': 4}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7466426	test: 0.7455721	best: 0.7455721 (0)	total: 5.31ms	remaining: 42.5s
1000:	learn: 0.7295057	test: 0.7296513	best: 0.7296513 (1000)	total: 4.89s	remaining: 34.2s
2000:	learn: 0.7250491	test: 0.7261682	best: 0.7261682 (2000)	total: 9.68s	remaining: 29s
3000:	learn: 0.7222836	test: 0.7243424	best: 0.7243424 (3000)	total: 14.6s	remaining: 24.3s
4000:	learn: 0.7203139	test: 0.7231160	best: 0.7231160 (4000)	total: 19.6s	remaining: 19.6s
5000:	learn: 0.7188023	test: 0.7223328	best: 0.7223327 (4999)	total: 24.5s	remaining: 14.7s
6000:	learn: 0.7174430	test: 0.7217313	best: 0.7217313 (6000)	total: 29.3s	remaining: 9.75s
7000:	learn: 0.7161764	test: 0.7212988	best: 0.7212988 (7000)	total: 34.1s	remaining: 4.86s
7999:	learn: 0.7150295	test: 0.7210170	best: 0.7210170 (7999)	total: 38.9s	remaining: 0us
bestTest = 0.7210170091
bestIteration = 7999


[32m[I 2021-08-24 07:42:27,807][0m Trial 5 finished with value: 0.7210168295932609 and parameters: {'learning_rate': 0.011897888679502922, 'reg_lambda': 22.274831284826373, 'subsample': 0.9825431984962631, 'colsample_bylevel': 0.2632568699999095, 'max_depth': 4}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7450886	test: 0.7439769	best: 0.7439769 (0)	total: 6.64ms	remaining: 53.1s
1000:	learn: 0.6987056	test: 0.7210936	best: 0.7210009 (694)	total: 5.52s	remaining: 38.6s
bestTest = 0.7210009364
bestIteration = 694
Shrink model to first 695 iterations.


[32m[I 2021-08-24 07:42:38,441][0m Trial 6 finished with value: 0.7210009822364954 and parameters: {'learning_rate': 0.14860877636420555, 'reg_lambda': 0.04384434735234172, 'subsample': 0.40262099101168913, 'colsample_bylevel': 0.7908975547694621, 'max_depth': 5}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7466964	test: 0.7456280	best: 0.7456280 (0)	total: 4.81ms	remaining: 38.5s
1000:	learn: 0.7333790	test: 0.7329165	best: 0.7329165 (1000)	total: 4.14s	remaining: 29s
2000:	learn: 0.7297071	test: 0.7297297	best: 0.7297297 (2000)	total: 8.4s	remaining: 25.2s
3000:	learn: 0.7273493	test: 0.7278876	best: 0.7278876 (3000)	total: 12.7s	remaining: 21.2s
4000:	learn: 0.7254425	test: 0.7264077	best: 0.7264077 (4000)	total: 17s	remaining: 17s
5000:	learn: 0.7239292	test: 0.7252530	best: 0.7252530 (5000)	total: 21.3s	remaining: 12.8s
6000:	learn: 0.7227191	test: 0.7244148	best: 0.7244148 (6000)	total: 25.6s	remaining: 8.53s
7000:	learn: 0.7216844	test: 0.7237578	best: 0.7237578 (7000)	total: 29.9s	remaining: 4.26s
7999:	learn: 0.7207558	test: 0.7231760	best: 0.7231760 (7999)	total: 34.2s	remaining: 0us
bestTest = 0.7231760237
bestIteration = 7999


[32m[I 2021-08-24 07:43:15,549][0m Trial 7 finished with value: 0.7231756889931532 and parameters: {'learning_rate': 0.00796798684214976, 'reg_lambda': 2.140607850922924e-06, 'subsample': 0.45482650256528, 'colsample_bylevel': 0.8747127875194655, 'max_depth': 3}. Best is trial 0 with value: 0.7207209795491829.[0m


0:	learn: 0.7444151	test: 0.7433356	best: 0.7433356 (0)	total: 5.43ms	remaining: 43.4s
1000:	learn: 0.7066299	test: 0.7211690	best: 0.7206881 (704)	total: 4.05s	remaining: 28.3s
bestTest = 0.7206881025
bestIteration = 704
Shrink model to first 705 iterations.


[32m[I 2021-08-24 07:43:24,143][0m Trial 8 finished with value: 0.7206880949738793 and parameters: {'learning_rate': 0.3116814103668553, 'reg_lambda': 0.020696232528184935, 'subsample': 0.3049405676201832, 'colsample_bylevel': 0.16983181332459224, 'max_depth': 3}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7460423	test: 0.7449754	best: 0.7449754 (0)	total: 9.97ms	remaining: 1m 19s
1000:	learn: 0.7095525	test: 0.7213576	best: 0.7213576 (1000)	total: 6.26s	remaining: 43.8s
2000:	learn: 0.6943539	test: 0.7207562	best: 0.7207278 (1729)	total: 12.5s	remaining: 37.5s
bestTest = 0.7207278268
bestIteration = 1729
Shrink model to first 1730 iterations.


[32m[I 2021-08-24 07:43:42,385][0m Trial 9 finished with value: 0.7207278475781743 and parameters: {'learning_rate': 0.055065532938118444, 'reg_lambda': 2.6369277955373323e-08, 'subsample': 0.60315551363809, 'colsample_bylevel': 0.4949922286748527, 'max_depth': 6}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7424682	test: 0.7416065	best: 0.7416065 (0)	total: 9.89ms	remaining: 1m 19s
bestTest = 0.7245621208
bestIteration = 102
Shrink model to first 103 iterations.


[32m[I 2021-08-24 07:43:51,964][0m Trial 10 finished with value: 0.7245620929899433 and parameters: {'learning_rate': 0.3100260589454329, 'reg_lambda': 0.00027812968726000824, 'subsample': 0.6069363833201424, 'colsample_bylevel': 0.13164567471170968, 'max_depth': 8}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7466013	test: 0.7455348	best: 0.7455348 (0)	total: 4.17ms	remaining: 33.3s
1000:	learn: 0.7329881	test: 0.7323841	best: 0.7323841 (1000)	total: 2.69s	remaining: 18.8s
2000:	learn: 0.7304357	test: 0.7300043	best: 0.7300043 (2000)	total: 5.47s	remaining: 16.4s
3000:	learn: 0.7291938	test: 0.7288858	best: 0.7288858 (3000)	total: 8.3s	remaining: 13.8s
4000:	learn: 0.7283271	test: 0.7281018	best: 0.7281017 (3999)	total: 11s	remaining: 11s
5000:	learn: 0.7276405	test: 0.7275013	best: 0.7275013 (5000)	total: 13.8s	remaining: 8.27s
6000:	learn: 0.7270692	test: 0.7270102	best: 0.7270102 (6000)	total: 16.5s	remaining: 5.51s
7000:	learn: 0.7265774	test: 0.7265895	best: 0.7265890 (6999)	total: 19.4s	remaining: 2.77s
7999:	learn: 0.7261673	test: 0.7262340	best: 0.7262340 (7999)	total: 22.4s	remaining: 0us
bestTest = 0.7262340431
bestIteration = 7999


[32m[I 2021-08-24 07:44:17,168][0m Trial 11 finished with value: 0.726233809395939 and parameters: {'learning_rate': 0.03635421280994362, 'reg_lambda': 86.59270330445635, 'subsample': 0.12039028926534498, 'colsample_bylevel': 0.3528166675939638, 'max_depth': 1}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7453294	test: 0.7443234	best: 0.7443234 (0)	total: 9.91ms	remaining: 1m 19s
1000:	learn: 0.6679592	test: 0.7228487	best: 0.7222602 (736)	total: 7.96s	remaining: 55.7s
bestTest = 0.7222602462
bestIteration = 736
Shrink model to first 737 iterations.


[32m[I 2021-08-24 07:44:31,935][0m Trial 12 finished with value: 0.7222602809068469 and parameters: {'learning_rate': 0.09214008867048275, 'reg_lambda': 0.28119876152359363, 'subsample': 0.25709022568392054, 'colsample_bylevel': 0.4970640998972675, 'max_depth': 8}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7435116	test: 0.7424494	best: 0.7424494 (0)	total: 6.88ms	remaining: 55s
1000:	learn: 0.6648464	test: 0.7269283	best: 0.7223076 (231)	total: 6.33s	remaining: 44.2s
bestTest = 0.722307613
bestIteration = 231
Shrink model to first 232 iterations.


[32m[I 2021-08-24 07:44:40,794][0m Trial 13 finished with value: 0.7223076033353495 and parameters: {'learning_rate': 0.2792867688001318, 'reg_lambda': 2.6459690383287824, 'subsample': 0.2542996145989843, 'colsample_bylevel': 0.11804148543646666, 'max_depth': 6}. Best is trial 8 with value: 0.7206880949738793.[0m


0:	learn: 0.7465828	test: 0.7455174	best: 0.7455174 (0)	total: 4.46ms	remaining: 35.7s


KeyboardInterrupt: 

In [5]:
study.best_params

{'learning_rate': 0.019620560209436984,
 'reg_lambda': 0.01011649689512418,
 'subsample': 0.5976074901918561,
 'colsample_bylevel': 0.3858760189885154,
 'max_depth': 6}