In [1]:
!pip install optuna



In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import roc_auc_score

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

from catboost import CatBoostClassifier

import os
print(os.listdir("../input"))

import optuna

['df_test.csv', '__notebook__.ipynb', 'lgb_oof.csv', '__output__.json', 'lgbm_importances.png', 'lgb_all_predictions.csv', 'df_train.csv', '__results___files', 'custom.css', '__results__.html', 'lgb_submission.csv']


In [3]:
df_train = pd.read_csv('../input/df_train.csv')
df_test = pd.read_csv('../input/df_test.csv')

In [4]:
random_state = 416
np.random.seed(random_state)

In [5]:
def run_catboost(df_train, df_test, cat_params, features):
    
    #X_test = df_test[features].values
    
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
    oof = df_train[['ID_code', 'target']]
    oof['predict'] = 0
    #predictions = df_test[['ID_code']]
    #val_aucs = []
    #feature_importance_df = pd.DataFrame()
    
    for fold, (trn_idx, val_idx) in enumerate(skf.split(df_train, df_train['target'])):
        X_train, y_train = df_train.iloc[trn_idx][features], df_train.iloc[trn_idx]['target']
        X_valid, y_valid = df_train.iloc[val_idx][features], df_train.iloc[val_idx]['target']
    
        cat_model = CatBoostClassifier(**cat_params)
        #categorical_features_pos = column_index(X_train, categorical)
        cat_model.fit(X_train, y_train, eval_set=(X_valid, y_valid), 
                      #cat_features=categorical_features_pos,
                      use_best_model=True)
        
        oof['predict'][val_idx] = cat_model.predict_proba(X_valid)[:, 1]
        #predictions['fold{}'.format(fold+1)] = cat_model.predict_proba(X_test)[:, 1]
        #val_score = roc_auc_score(y_valid, oof['predict'][val_idx])
        #val_aucs.append(val_score)
        
        # feature importance
        #fold_importance_df = pd.DataFrame(list(cat_model.get_feature_importance()), columns=['feature','importance'])
        #fold_importance_df["fold"] = fold + 1
        
        #feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    return roc_auc_score(oof['target'], oof['predict'])

In [6]:
def objective(trial):
    # Set your parameters as you like
    # 'objective' and 'metric' should be change for your purpose
    cat_params = {
        "objective": "Logloss",
        "bootstrap_type": 'Poisson',
        "iterations": 100000,
        "learning_rate": 0.03,
        "max_depth": 5,
        "eval_metric": 'AUC',
        "random_seed": 1225,
        "subsample": 0.9992666140793471,
        "bagging_temperature": trial.suggest_loguniform('bagging_temperature', 1.8, 1.9),
        "random_strength": 1,
        "l2_leaf_reg": 91,
        "od_type": 'Iter',
        "metric_period": 1000,
        "task_type": "GPU",
        "od_wait": 1000,
        "border_count": 32,
        "max_ctr_complexity": 5,
        #"boosting_type": 'Plain'
    }

    # Set your 'metric'
    features = [col for col in df_train.columns if col not in ['target', 'ID_code']]
    score = run_catboost(df_train, df_test, cat_params, features)
    
    # Optuna search minimun point
    # So if you want to get max accuracy, you sholud return 1-accuracy
    return 1-score

In [7]:
if __name__ == '__main__':
    study = optuna.create_study()
    study.optimize(objective, n_trials=10)

    print('Number of finished trials: {}'.format(len(study.trials)))

    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))

    print('  Params: ')
    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 15.3ms	remaining: 25m 25s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 8.36s	remaining: 13m 46s
2000:	learn: 0.9245827	test: 0.9006059	best: 0.9006059 (2000)	total: 17.6s	remaining: 14m 23s
3000:	learn: 0.9360136	test: 0.9055066	best: 0.9055079 (2998)	total: 27s	remaining: 14m 32s
4000:	learn: 0.9430414	test: 0.9073381	best: 0.9073420 (3998)	total: 36.4s	remaining: 14m 32s
5000:	learn: 0.9478400	test: 0.9080983	best: 0.9080983 (5000)	total: 45.4s	remaining: 14m 22s
6000:	learn: 0.9516715	test: 0.9086114	best: 0.9086167 (5981)	total: 54.7s	remaining: 14m 17s
7000:	learn: 0.9549268	test: 0.9088968	best: 0.9089023 (6967)	total: 1m 4s	remaining: 14m 11s
8000:	learn: 0.9577814	test: 0.9092031	best: 0.9092095 (7991)	total: 1m 13s	remaining: 14m 6s
9000:	learn: 0.9603890	test: 0.9092759	best: 0.9092761 (8999)	total: 1m 23s	remaining: 14m 4s
10000:	learn: 0.9627478	test: 0.9096062	best: 0.9096093 (9996)	tota

[I 2019-04-09 08:16:13,563] Finished trial#0 resulted in value: 0.08743489681740324. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 12.5ms	remaining: 20m 48s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 9.32s	remaining: 15m 21s
2000:	learn: 0.9245825	test: 0.9006061	best: 0.9006061 (2000)	total: 18.3s	remaining: 14m 57s
3000:	learn: 0.9359559	test: 0.9054418	best: 0.9054442 (2998)	total: 27.3s	remaining: 14m 42s
4000:	learn: 0.9432339	test: 0.9072279	best: 0.9072280 (3999)	total: 36.1s	remaining: 14m 25s
5000:	learn: 0.9479851	test: 0.9080756	best: 0.9080756 (5000)	total: 45s	remaining: 14m 14s
6000:	learn: 0.9518053	test: 0.9086762	best: 0.9086788 (5998)	total: 53.7s	remaining: 14m
7000:	learn: 0.9550500	test: 0.9088437	best: 0.9088683 (6955)	total: 1m 2s	remaining: 13m 50s
8000:	learn: 0.9578717	test: 0.9090790	best: 0.9090848 (7984)	total: 1m 11s	remaining: 13m 38s
9000:	learn: 0.9605459	test: 0.9091780	best: 0.9092080 (8922)	total: 1m 19s	remaining: 13m 27s
10000:	learn: 0.9628794	test: 0.9094971	best: 0.9095144 (9933)	total:

[I 2019-04-09 08:27:17,442] Finished trial#1 resulted in value: 0.08759734718916556. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 13.1ms	remaining: 21m 53s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 9.35s	remaining: 15m 24s
2000:	learn: 0.9245827	test: 0.9006060	best: 0.9006060 (2000)	total: 18.5s	remaining: 15m 7s
3000:	learn: 0.9360135	test: 0.9055066	best: 0.9055079 (2998)	total: 27.5s	remaining: 14m 47s
4000:	learn: 0.9430415	test: 0.9073380	best: 0.9073421 (3998)	total: 37s	remaining: 14m 46s
5000:	learn: 0.9478399	test: 0.9080983	best: 0.9080983 (5000)	total: 46.4s	remaining: 14m 40s
6000:	learn: 0.9516715	test: 0.9086112	best: 0.9086165 (5981)	total: 55.7s	remaining: 14m 33s
7000:	learn: 0.9549268	test: 0.9088960	best: 0.9089010 (6967)	total: 1m 4s	remaining: 14m 19s
8000:	learn: 0.9577847	test: 0.9092084	best: 0.9092146 (7991)	total: 1m 13s	remaining: 14m 4s
9000:	learn: 0.9603791	test: 0.9092805	best: 0.9092810 (8999)	total: 1m 22s	remaining: 13m 51s
10000:	learn: 0.9627329	test: 0.9095974	best: 0.9095974 (10000)	tot

[I 2019-04-09 08:38:22,449] Finished trial#2 resulted in value: 0.08754510099985358. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 13.8ms	remaining: 22m 56s
1000:	learn: 0.9023330	test: 0.8854338	best: 0.8854338 (1000)	total: 9.18s	remaining: 15m 8s
2000:	learn: 0.9245826	test: 0.9006060	best: 0.9006060 (2000)	total: 18.3s	remaining: 14m 55s
3000:	learn: 0.9359559	test: 0.9054419	best: 0.9054442 (2998)	total: 27.7s	remaining: 14m 56s
4000:	learn: 0.9432340	test: 0.9072278	best: 0.9072282 (3999)	total: 37.1s	remaining: 14m 50s
5000:	learn: 0.9479851	test: 0.9080755	best: 0.9080755 (5000)	total: 47s	remaining: 14m 51s
6000:	learn: 0.9518053	test: 0.9086761	best: 0.9086787 (5998)	total: 55.9s	remaining: 14m 35s
7000:	learn: 0.9550527	test: 0.9088442	best: 0.9088659 (6955)	total: 1m 4s	remaining: 14m 21s
8000:	learn: 0.9579322	test: 0.9090521	best: 0.9090591 (7985)	total: 1m 13s	remaining: 14m 1s
9000:	learn: 0.9606090	test: 0.9091780	best: 0.9092072 (8921)	total: 1m 21s	remaining: 13m 45s
10000:	learn: 0.9629239	test: 0.9094320	best: 0.9094427 (9953)	tota

[I 2019-04-09 08:49:55,612] Finished trial#3 resulted in value: 0.08762931913000616. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 12ms	remaining: 19m 59s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 8.4s	remaining: 13m 51s
2000:	learn: 0.9245827	test: 0.9006059	best: 0.9006059 (2000)	total: 17s	remaining: 13m 53s
3000:	learn: 0.9360135	test: 0.9055064	best: 0.9055079 (2998)	total: 25.8s	remaining: 13m 55s
4000:	learn: 0.9430415	test: 0.9073381	best: 0.9073422 (3998)	total: 34.6s	remaining: 13m 50s
5000:	learn: 0.9478399	test: 0.9080984	best: 0.9080984 (5000)	total: 43.3s	remaining: 13m 42s
6000:	learn: 0.9516715	test: 0.9086111	best: 0.9086166 (5981)	total: 52.6s	remaining: 13m 43s
7000:	learn: 0.9549262	test: 0.9088956	best: 0.9089018 (6967)	total: 1m 1s	remaining: 13m 42s
8000:	learn: 0.9577885	test: 0.9091927	best: 0.9092016 (7991)	total: 1m 11s	remaining: 13m 39s
9000:	learn: 0.9604065	test: 0.9092600	best: 0.9092608 (8999)	total: 1m 20s	remaining: 13m 29s
10000:	learn: 0.9626940	test: 0.9094825	best: 0.9094963 (9954)	total

[I 2019-04-09 09:00:59,745] Finished trial#4 resulted in value: 0.08763553982977601. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 13.5ms	remaining: 22m 33s
1000:	learn: 0.9023330	test: 0.8854338	best: 0.8854338 (1000)	total: 8.84s	remaining: 14m 34s
2000:	learn: 0.9245825	test: 0.9006062	best: 0.9006062 (2000)	total: 18.3s	remaining: 14m 54s
3000:	learn: 0.9360135	test: 0.9055067	best: 0.9055079 (2998)	total: 27.8s	remaining: 14m 58s
4000:	learn: 0.9430415	test: 0.9073380	best: 0.9073418 (3998)	total: 37.1s	remaining: 14m 49s
5000:	learn: 0.9478399	test: 0.9080983	best: 0.9080983 (5000)	total: 46.6s	remaining: 14m 44s
6000:	learn: 0.9516715	test: 0.9086112	best: 0.9086167 (5981)	total: 55.9s	remaining: 14m 35s
7000:	learn: 0.9549267	test: 0.9088959	best: 0.9089010 (6967)	total: 1m 5s	remaining: 14m 29s
8000:	learn: 0.9577847	test: 0.9092083	best: 0.9092148 (7991)	total: 1m 14s	remaining: 14m 20s
9000:	learn: 0.9604000	test: 0.9093004	best: 0.9093006 (8999)	total: 1m 24s	remaining: 14m 12s
10000:	learn: 0.9627832	test: 0.9095673	best: 0.9095697 (9984)	

[I 2019-04-09 09:12:23,396] Finished trial#5 resulted in value: 0.08748319823342654. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 11.9ms	remaining: 19m 46s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 8.68s	remaining: 14m 18s
2000:	learn: 0.9245826	test: 0.9006060	best: 0.9006060 (2000)	total: 17.7s	remaining: 14m 27s
3000:	learn: 0.9359558	test: 0.9054418	best: 0.9054443 (2998)	total: 26.2s	remaining: 14m 5s
4000:	learn: 0.9432340	test: 0.9072280	best: 0.9072281 (3999)	total: 34.6s	remaining: 13m 49s
5000:	learn: 0.9479850	test: 0.9080755	best: 0.9080755 (5000)	total: 42.9s	remaining: 13m 35s
6000:	learn: 0.9518053	test: 0.9086762	best: 0.9086787 (5998)	total: 51.3s	remaining: 13m 24s
7000:	learn: 0.9550527	test: 0.9088442	best: 0.9088660 (6955)	total: 59.7s	remaining: 13m 12s
8000:	learn: 0.9579196	test: 0.9090608	best: 0.9090650 (7984)	total: 1m 8s	remaining: 13m 2s
9000:	learn: 0.9606317	test: 0.9091628	best: 0.9091797 (8922)	total: 1m 16s	remaining: 12m 52s
10000:	learn: 0.9629750	test: 0.9094172	best: 0.9094445 (9933)	tot

[I 2019-04-09 09:24:46,584] Finished trial#6 resulted in value: 0.0874690647548726. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 12.2ms	remaining: 20m 22s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 9.46s	remaining: 15m 35s
2000:	learn: 0.9245826	test: 0.9006060	best: 0.9006060 (2000)	total: 19s	remaining: 15m 29s
3000:	learn: 0.9360135	test: 0.9055064	best: 0.9055079 (2998)	total: 28.6s	remaining: 15m 24s
4000:	learn: 0.9430415	test: 0.9073380	best: 0.9073420 (3998)	total: 38.1s	remaining: 15m 13s
5000:	learn: 0.9478399	test: 0.9080983	best: 0.9080983 (5000)	total: 47.5s	remaining: 15m 2s
6000:	learn: 0.9516715	test: 0.9086111	best: 0.9086167 (5981)	total: 56.7s	remaining: 14m 48s
7000:	learn: 0.9549267	test: 0.9088967	best: 0.9089024 (6967)	total: 1m 5s	remaining: 14m 33s
8000:	learn: 0.9577814	test: 0.9092029	best: 0.9092095 (7991)	total: 1m 14s	remaining: 14m 21s
9000:	learn: 0.9603890	test: 0.9092759	best: 0.9092759 (9000)	total: 1m 23s	remaining: 14m 6s
10000:	learn: 0.9627478	test: 0.9096062	best: 0.9096093 (9996)	tota

[I 2019-04-09 09:35:58,460] Finished trial#7 resulted in value: 0.08753055846852698. Current best value is 0.08743489681740324 with parameters: {'bagging_temperature': 1.8516867103545565}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 12.3ms	remaining: 20m 32s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 8.46s	remaining: 13m 56s
2000:	learn: 0.9245827	test: 0.9006059	best: 0.9006059 (2000)	total: 16.9s	remaining: 13m 49s
3000:	learn: 0.9360136	test: 0.9055064	best: 0.9055079 (2998)	total: 25.6s	remaining: 13m 47s
4000:	learn: 0.9430415	test: 0.9073380	best: 0.9073420 (3998)	total: 34.1s	remaining: 13m 37s
5000:	learn: 0.9478400	test: 0.9080984	best: 0.9080984 (5000)	total: 42.9s	remaining: 13m 35s
6000:	learn: 0.9516714	test: 0.9086109	best: 0.9086165 (5981)	total: 51.8s	remaining: 13m 32s
7000:	learn: 0.9549381	test: 0.9089089	best: 0.9089089 (7000)	total: 1m	remaining: 13m 27s
8000:	learn: 0.9578491	test: 0.9091311	best: 0.9091381 (7984)	total: 1m 9s	remaining: 13m 19s
9000:	learn: 0.9604451	test: 0.9091948	best: 0.9091997 (8924)	total: 1m 18s	remaining: 13m 14s
10000:	learn: 0.9627767	test: 0.9093990	best: 0.9094120 (9910)	tota

[I 2019-04-09 09:49:01,890] Finished trial#8 resulted in value: 0.08742990908400272. Current best value is 0.08742990908400272 with parameters: {'bagging_temperature': 1.8800584587984723}.


0:	learn: 0.5973849	test: 0.6022441	best: 0.6022441 (0)	total: 12.3ms	remaining: 20m 29s
1000:	learn: 0.9023330	test: 0.8854339	best: 0.8854339 (1000)	total: 9.34s	remaining: 15m 23s
2000:	learn: 0.9245826	test: 0.9006060	best: 0.9006060 (2000)	total: 18.9s	remaining: 15m 25s
3000:	learn: 0.9360136	test: 0.9055066	best: 0.9055079 (2998)	total: 28.4s	remaining: 15m 19s
4000:	learn: 0.9430415	test: 0.9073381	best: 0.9073419 (3998)	total: 37.3s	remaining: 14m 54s
5000:	learn: 0.9478400	test: 0.9080984	best: 0.9080984 (5000)	total: 46.7s	remaining: 14m 47s
6000:	learn: 0.9516715	test: 0.9086113	best: 0.9086166 (5981)	total: 56.1s	remaining: 14m 37s
7000:	learn: 0.9549268	test: 0.9088967	best: 0.9089023 (6967)	total: 1m 6s	remaining: 14m 38s
8000:	learn: 0.9577814	test: 0.9092029	best: 0.9092095 (7991)	total: 1m 15s	remaining: 14m 23s
9000:	learn: 0.9603966	test: 0.9092775	best: 0.9092775 (9000)	total: 1m 23s	remaining: 14m 6s
10000:	learn: 0.9627726	test: 0.9095916	best: 0.9095948 (9996)	t

[I 2019-04-09 10:00:58,006] Finished trial#9 resulted in value: 0.08749994727118915. Current best value is 0.08742990908400272 with parameters: {'bagging_temperature': 1.8800584587984723}.


Number of finished trials: 10
Best trial:
  Value: 0.08742990908400272
  Params: 
    bagging_temperature: 1.8800584587984723
