In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('.'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [8]:
TRAIN_PATH = "train/train.csv"
TEST_PATH = "test/test.csv"

In [4]:
def load_csv(file_path):
    df = pd.read_csv(file_path)
    return df

In [6]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import log_loss
import time


def calculate_score_and_time(model,X,y):
    cv = RepeatedStratifiedKFold(n_splits=5,n_repeats=2,random_state=42)
    start = time.time()
    scores = cross_val_score(model,X,y,cv=cv,n_jobs=-1)
    end = time.time()
    mean_score = np.mean(scores)
    return end-start,mean_score    

In [9]:
train = pd.read_csv(TRAIN_PATH)
train["target"] = train["target"].map({"Class_1":1,"Class_2":2,"Class_3":3,"Class_4":4})
X = train.drop(columns=["id","target"])
y = train.target

In [10]:
model_names = ["hgbc","xgbc","cbc","lgbm"]
models = [HistGradientBoostingClassifier(),XGBClassifier(),CatBoostClassifier(verbose=False),LGBMClassifier()]
scores = {}
            
for model_name,model in zip(model_names,models):
    print("Running for {}".format(model_name))
    delta_time,mean_score = calculate_score_and_time(model,X,y)
    print("Time:{} | Mean score: {}".format(delta_time,mean_score))
    scores[model_name] = delta_time,mean_score

Running for hgbc
Time:34.34063124656677 | Mean score: 0.5779050000000001
Running for xgbc
Time:114.5469286441803 | Mean score: 0.576735
Running for cbc
Time:37.40938949584961 | Mean score: nan
Running for lgbm
Time:15.663515567779541 | Mean score: 0.57895


In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)

In [16]:
def return_log_loss_catboost(params,x_train,x_test,y_train,y_test):
    model = CatBoostClassifier(**params)
    model.fit(x_train,y_train,eval_set=[(x_test,y_test)])
    y_preds = model.predict_proba(x_test)
    return log_loss(y_test,y_preds)

In [17]:
import optuna

In [26]:
def objective(trial):
    params = {
        "learning_rate": trial.suggest_uniform("learning_rate",0.03,0.3),
        "iterations": trial.suggest_int("iterations",500,1200),
        "depth": trial.suggest_int("depth",3,10),
        "verbose": False,
        "random_seed": 42,
        #"subsample": trial.suggest_uniform("subsample",0.2,1.0),
        #"l2_leaf_reg": trial.suggest_loguniform('reg_lambda' , 1e-8 , 30),
        #"bootstrap_type": "Poisson"
        
    }
    params = {}
    loss = return_log_loss_catboost(params,x_train,x_test,y_train,y_test)
    return loss

In [28]:
params = {
        "learning_rate": trial.suggest_uniform("learning_rate",0.03,0.3),
        "iterations": trial.suggest_int("iterations",500,1200),
        "depth": trial.suggest_int("depth",3,10),
        "verbose": False,
        "random_seed": 42,
        #"subsample": trial.suggest_uniform("subsample",0.2,1.0),
        #"l2_leaf_reg": trial.suggest_loguniform('reg_lambda' , 1e-8 , 30),
        #"bootstrap_type": "Poisson"
        
    }
print(return_log_loss_catboost(params,x_train,x_test,y_train,y_test))

1.0902079496864874


In [30]:
study = optuna.create_study(direction="minimize")
study.optimize(objective,n_trials=3)
best = study.best_trial
for k,v in best.items():
    print(k,v)

[32m[I 2021-06-01 13:56:58,504][0m A new study created in memory with name: no-name-e8dce86f-f159-4035-9c9c-c35f765ad345[0m


Learning rate set to 0.117866
0:	learn: 1.3197231	test: 1.3196908	best: 1.3196908 (0)	total: 74.9ms	remaining: 1m 14s
1:	learn: 1.2724676	test: 1.2724309	best: 1.2724309 (1)	total: 121ms	remaining: 1m
2:	learn: 1.2375771	test: 1.2375668	best: 1.2375668 (2)	total: 157ms	remaining: 52.1s
3:	learn: 1.2111726	test: 1.2111969	best: 1.2111969 (3)	total: 197ms	remaining: 49s
4:	learn: 1.1906878	test: 1.1908131	best: 1.1908131 (4)	total: 246ms	remaining: 48.9s
5:	learn: 1.1750093	test: 1.1751549	best: 1.1751549 (5)	total: 284ms	remaining: 47s
6:	learn: 1.1618915	test: 1.1620728	best: 1.1620728 (6)	total: 322ms	remaining: 45.7s
7:	learn: 1.1518939	test: 1.1521310	best: 1.1521310 (7)	total: 358ms	remaining: 44.4s
8:	learn: 1.1440697	test: 1.1443095	best: 1.1443095 (8)	total: 394ms	remaining: 43.3s
9:	learn: 1.1377689	test: 1.1380900	best: 1.1380900 (9)	total: 438ms	remaining: 43.4s
10:	learn: 1.1324154	test: 1.1329302	best: 1.1329302 (10)	total: 478ms	remaining: 43s
11:	learn: 1.1280486	test: 1.

[32m[I 2021-06-01 13:57:28,104][0m Trial 0 finished with value: 1.0905681297724796 and parameters: {'learning_rate': 0.293395985639725, 'iterations': 894, 'depth': 10}. Best is trial 0 with value: 1.0905681297724796.[0m


Learning rate set to 0.117866
0:	learn: 1.3197231	test: 1.3196908	best: 1.3196908 (0)	total: 56.2ms	remaining: 56.2s
1:	learn: 1.2724676	test: 1.2724309	best: 1.2724309 (1)	total: 94.6ms	remaining: 47.2s
2:	learn: 1.2375771	test: 1.2375668	best: 1.2375668 (2)	total: 130ms	remaining: 43.3s
3:	learn: 1.2111726	test: 1.2111969	best: 1.2111969 (3)	total: 163ms	remaining: 40.6s
4:	learn: 1.1906878	test: 1.1908131	best: 1.1908131 (4)	total: 202ms	remaining: 40.2s
5:	learn: 1.1750093	test: 1.1751549	best: 1.1751549 (5)	total: 236ms	remaining: 39.1s
6:	learn: 1.1618915	test: 1.1620728	best: 1.1620728 (6)	total: 273ms	remaining: 38.7s
7:	learn: 1.1518939	test: 1.1521310	best: 1.1521310 (7)	total: 307ms	remaining: 38.1s
8:	learn: 1.1440697	test: 1.1443095	best: 1.1443095 (8)	total: 342ms	remaining: 37.6s
9:	learn: 1.1377689	test: 1.1380900	best: 1.1380900 (9)	total: 381ms	remaining: 37.7s
10:	learn: 1.1324154	test: 1.1329302	best: 1.1329302 (10)	total: 421ms	remaining: 37.9s
11:	learn: 1.1280486

[32m[I 2021-06-01 13:57:56,493][0m Trial 1 finished with value: 1.0905681297724796 and parameters: {'learning_rate': 0.23792145310154847, 'iterations': 1163, 'depth': 3}. Best is trial 0 with value: 1.0905681297724796.[0m


Learning rate set to 0.117866
0:	learn: 1.3197231	test: 1.3196908	best: 1.3196908 (0)	total: 41.8ms	remaining: 41.8s
1:	learn: 1.2724676	test: 1.2724309	best: 1.2724309 (1)	total: 73.1ms	remaining: 36.5s
2:	learn: 1.2375771	test: 1.2375668	best: 1.2375668 (2)	total: 106ms	remaining: 35.3s
3:	learn: 1.2111726	test: 1.2111969	best: 1.2111969 (3)	total: 138ms	remaining: 34.3s
4:	learn: 1.1906878	test: 1.1908131	best: 1.1908131 (4)	total: 172ms	remaining: 34.3s
5:	learn: 1.1750093	test: 1.1751549	best: 1.1751549 (5)	total: 207ms	remaining: 34.3s
6:	learn: 1.1618915	test: 1.1620728	best: 1.1620728 (6)	total: 245ms	remaining: 34.7s
7:	learn: 1.1518939	test: 1.1521310	best: 1.1521310 (7)	total: 268ms	remaining: 33.2s
8:	learn: 1.1440697	test: 1.1443095	best: 1.1443095 (8)	total: 287ms	remaining: 31.6s
9:	learn: 1.1377689	test: 1.1380900	best: 1.1380900 (9)	total: 313ms	remaining: 31s
10:	learn: 1.1324154	test: 1.1329302	best: 1.1329302 (10)	total: 337ms	remaining: 30.3s
11:	learn: 1.1280486	t

[32m[I 2021-06-01 13:58:23,967][0m Trial 2 finished with value: 1.0905681297724796 and parameters: {'learning_rate': 0.19411381973114483, 'iterations': 1055, 'depth': 10}. Best is trial 0 with value: 1.0905681297724796.[0m


AttributeError: 'FrozenTrial' object has no attribute 'items'

In [31]:
best.params

{'learning_rate': 0.293395985639725, 'iterations': 894, 'depth': 10}

train
[4 5 6 7]
test
[0 1 2 3]
**********
train
[0 1 2 3]
test
[4 5 6 7]
**********


train
x_train: [[1 2]
 [3 4]
 [5 6]
 [8 7]
 [5 4]
 [6 9]]
y_train: [0 1 0 0 1 1]
test
x_test: [[2 3]
 [9 7]]
y_test: [1 0]
train
x_train: [[2 3]
 [3 4]
 [5 6]
 [8 7]
 [9 7]
 [5 4]]
y_train: [1 1 0 0 0 1]
test
x_test: [[1 2]
 [6 9]]
y_test: [0 1]
train
x_train: [[1 2]
 [2 3]
 [3 4]
 [8 7]
 [9 7]
 [6 9]]
y_train: [0 1 1 0 0 1]
test
x_test: [[5 6]
 [5 4]]
y_test: [0 1]
train
x_train: [[1 2]
 [2 3]
 [5 6]
 [9 7]
 [5 4]
 [6 9]]
y_train: [0 1 0 0 1 1]
test
x_test: [[3 4]
 [8 7]]
y_test: [1 0]


train
x_train: [[1 2]
 [3 4]
 [5 6]
 [8 7]
 [5 4]
 [6 9]]
y_train: [0 1 0 0 1 1]
test
x_test: [[2 3]
 [9 7]]
y_test: [1 0]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [[2 3]
 [5 6]
 [8 7]
 [9 7]
 [5 4]
 [6 9]]
y_train: [1 0 0 0 1 1]
test
x_test: [[1 2]
 [3 4]]
y_test: [0 1]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [[1 2]
 [2 3]
 [3 4]
 [8 7]
 [9 7]
 [5 4]]
y_train: [0 1 1 0 0 1]
test
x_test: [[5 6]
 [6 9]]
y_test: [0 1]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [[1 2]
 [2 3]
 [3 4]
 [5 6]
 [9 7]
 [6 9]]
y_train: [0 1 1 0 0 1]
test
x_test: [[8 7]
 [5 4]]
y_test: [0 1]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [[1 2]
 [2 3]
 [3 4]
 [8 7]
 [9 7]
 [5 4]]
y_train: [0 1 1 0 0 1]
test
x_test: [[5 6]
 [6 9]]
y_test: [0 1]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [[1 2]
 [3 4]
 [5 6]
 [8 7]
 [5 4]
 [6 9]]
y_train: [0 1 0 0 1 1]
test
x_test: [[2 3]
 [9 7]]
y_test: [1 0]
*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
train
x_train: [

In [76]:
round(0.5781950000000001,5)

0.5782

In [77]:
round(0.578225,5)

0.57822