In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
import lightgbm as lgb
import catboost as cb
from sklearn.metrics import accuracy_score

## 1. binary classification

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

x, y = load_breast_cancer(return_X_y=True, as_frame=True)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.8, random_state=42)

In [3]:
class_weight = {0: 1, 1: 2}
sample_weight = ytrain.map(class_weight).values

### 1.1. DecisionTreeClassifier

In [4]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = DecisionTreeClassifier(class_weight=class_weight, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = DecisionTreeClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.8881578947368421
score: 0.9013157894736842
score: 0.9013157894736842
True


### 1.2. RandomForestClassifier

In [5]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = RandomForestClassifier(class_weight=class_weight, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = RandomForestClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9473684210526315
score: 0.9451754385964912
score: 0.9451754385964912
True


### 1.3. XGBClassifier

In [6]:
from xgboost import XGBClassifier

model = XGBClassifier(scale_pos_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = XGBClassifier(scale_pos_weight=class_weight[1], random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = XGBClassifier(scale_pos_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9495614035087719
score: 0.9627192982456141
score: 0.9627192982456141
True


In [7]:
model = xgb.train(
    {
        'objective': 'binary:logistic',
        'scale_pos_weight': None,
    },
    dtrain=xgb.DMatrix(xtrain, label=ytrain, weight=None),
)
ypred = model.predict(xgb.DMatrix(xtest))
print('score:', accuracy_score(ytest, ypred > 0.5))

model = xgb.train(
    {
        'objective': 'binary:logistic',
        'scale_pos_weight': class_weight[1],
    },
    dtrain=xgb.DMatrix(xtrain, label=ytrain, weight=None),
)
ypred1 = model.predict(xgb.DMatrix(xtest))
print('score:', accuracy_score(ytest, ypred1 > 0.5))

model = xgb.train(
    {
        'objective': 'binary:logistic',
        'scale_pos_weight': None,
    },
    dtrain=xgb.DMatrix(xtrain, label=ytrain, weight=sample_weight),
)
ypred2 = model.predict(xgb.DMatrix(xtest))
print('score:', accuracy_score(ytest, ypred2 > 0.5))

print((ypred1 == ypred2).all())

score: 0.9495614035087719
score: 0.956140350877193
score: 0.956140350877193
True


### 1.4. LGBMClassifier

In [8]:
from lightgbm import LGBMClassifier

model = LGBMClassifier(scale_pos_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = LGBMClassifier(scale_pos_weight=class_weight[1], random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = LGBMClassifier(scale_pos_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())  # !!!

score: 0.9495614035087719
score: 0.9517543859649122
score: 0.9517543859649122
False


In [9]:
from lightgbm import LGBMClassifier

model = LGBMClassifier(class_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = LGBMClassifier(class_weight=class_weight, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = LGBMClassifier(class_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9495614035087719
score: 0.9517543859649122
score: 0.9517543859649122
True


In [10]:
model = lgb.train(
    {
        'objective': 'binary',
        'scale_pos_weight': None,
        'verbosity': -1,
    },
    train_set=lgb.Dataset(xtrain, label=ytrain, weight=None),
)
ypred = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred > 0.5))

model = lgb.train(
    {
        'objective': 'binary',
        'scale_pos_weight': class_weight[1],
        'verbosity': -1,
    },
    train_set=lgb.Dataset(xtrain, label=ytrain, weight=None),
)
ypred1 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred1 > 0.5))

model = lgb.train(
    {
        'objective': 'binary',
        'scale_pos_weight': None,
        'verbosity': -1,
    },
    train_set=lgb.Dataset(xtrain, label=ytrain, weight=sample_weight),
)
ypred2 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred2 > 0.5))

print((ypred1 == ypred2).all())  # !!!

score: 0.9495614035087719
score: 0.9517543859649122
score: 0.9517543859649122
False


### 1.5. CatBoostClassifier

In [11]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=100, scale_pos_weight=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = CatBoostClassifier(iterations=100, scale_pos_weight=class_weight[1], random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = CatBoostClassifier(iterations=100, scale_pos_weight=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())  # !!!

score: 0.9627192982456141
score: 0.9605263157894737
score: 0.9605263157894737
True


In [12]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=100, class_weights=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = CatBoostClassifier(iterations=100, class_weights=class_weight, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = CatBoostClassifier(iterations=100, class_weights=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9627192982456141
score: 0.9605263157894737
score: 0.9605263157894737
True


In [13]:
model = cb.train(
    params={
        'loss_function': 'Logloss',
        'scale_pos_weight': 1.0,
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred > 0.5))

model = cb.train(
    params={
        'loss_function': 'Logloss',
        'scale_pos_weight': class_weight[1],
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred1 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred1 > 0.5))

model = cb.train(
    params={
        'loss_function': 'Logloss',
        'scale_pos_weight': 1.0,
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=sample_weight),
    iterations=100,
)
ypred2 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred2 > 0.5))

print((ypred1 == ypred2).all())

score: 0.9495614035087719
score: 0.9583333333333334
score: 0.9583333333333334
True


In [14]:
model = cb.train(
    params={
        'loss_function': 'Logloss',
        'class_weights': [1, 1],
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred > 0.5))

model = cb.train(
    params={
        'loss_function': 'Logloss',
        'class_weights': class_weight,
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred1 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred1 > 0.5))

model = cb.train(
    params={
        'loss_function': 'Logloss',
        'class_weights': [1, 1],
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=sample_weight),
    iterations=100,
)
ypred2 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred2 > 0.5))

print((ypred1 == ypred2).all())

score: 0.9495614035087719
score: 0.9583333333333334
score: 0.9583333333333334
True


### 1.6. Hold-Out Method

In [15]:
from dm_utils.hom import HOM

model_params = [None]*6
model_params[-1] = {'natural_gradient': False}
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, model_params=model_params)



[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.9565217391304348, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.069 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's auc: 0.940476	valid_0's binary_logloss: 0.396679	valid_0's binary_error: 0.0434783
Did not meet early stopping. Best iteration is:
[1]	valid_0's auc: 0.944444	valid_0's binary_logloss: 0.665593	valid_0's binary_error: 0.391304
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 0.785 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.6086956521739131, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	test: 0.9722222	best: 0.9722222 (0)	total: 1.1ms	remaining: 109ms
99:	test: 0.9444444	best: 0.9722222 (0)	total: 91.8ms	remaining: 0us

bestTest = 0.9722222222
bestIteration = 0

Shrink model to first 1 iterations.
[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.143 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.8695652173913043, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.6802 val_loss=0.6457 scale=512.0000 norm=249.4059
[32m[INFO] Model 

Unnamed: 0,acc,model
model0,0.956522,DecisionTreeClassifier
model1,0.913043,RandomForestClassifier
model2,0.956522,XGBoost
model3,0.608696,LightGBM
model4,0.869565,CatBoost
model5,0.956522,NGBClassifier
all,0.956522,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [16]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, class_weight=class_weight, model_params=model_params)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.083 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.9565217391304348, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's auc: 0.968254	valid_0's binary_logloss: 0.311995	valid_0's binary_error: 0.027027
Did not meet early stopping. Best iteration is:
[94]	valid_0's auc: 0.972222	valid_0's binary_logloss: 0.31978	valid_0's binary_error: 0.0540541
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 2.933 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	test: 0.9563492	best: 0.9563492 (0)	total: 1.04ms	remaining: 103ms
99:	test: 0.9365079	best: 0.9920635 (1)	total: 85.8ms	remaining: 0us

bestTest = 0.9920634921
bestIteration = 1

Shrink model to first 2 iterations.
[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.105 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.6515 val_loss=0.3858 scale=512.0000 norm=252.8395
[32m[INFO] Model

Unnamed: 0,acc,model
model0,0.913043,DecisionTreeClassifier
model1,0.956522,RandomForestClassifier
model2,0.608696,XGBoost
model3,0.913043,LightGBM
model4,0.913043,CatBoost
model5,0.913043,NGBClassifier
all,0.913043,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [17]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.068 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.9565217391304348, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's auc: 0.968254	valid_0's binary_logloss: 0.311995	valid_0's binary_error: 0.027027
Did not meet early stopping. Best iteration is:
[94]	valid_0's auc: 0.972222	valid_0's binary_logloss: 0.31978	valid_0's binary_error: 0.0540541
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 1.303 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	test: 0.9563492	best: 0.9563492 (0)	total: 991us	remaining: 98.1ms
99:	test: 0.9365079	best: 0.9920635 (1)	total: 89ms	remaining: 0us

bestTest = 0.9920634921
bestIteration = 1

Shrink model to first 2 iterations.
[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.145 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.5832 val_loss=0.6593 scale=16.0000 norm=32.0000
[32m[INFO] Model NGB

Unnamed: 0,acc,model
model0,0.913043,DecisionTreeClassifier
model1,0.956522,RandomForestClassifier
model2,0.608696,XGBoost
model3,0.913043,LightGBM
model4,0.913043,CatBoost
model5,0.913043,NGBClassifier
all,0.913043,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [18]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, class_weight=class_weight, weight_train=sample_weight, model_params=model_params)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.069 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.9565217391304348, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's auc: 0.944444	valid_0's binary_logloss: 0.215296	valid_0's binary_error: 0.138462
Did not meet early stopping. Best iteration is:
[69]	valid_0's auc: 0.968254	valid_0's binary_logloss: 0.249411	valid_0's binary_error: 0.138462
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 1.335 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.6086956521739131, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	test: 0.9285714	best: 0.9285714 (0)	total: 1.68ms	remaining: 166ms
99:	test: 0.9285714	best: 0.9365079 (40)	total: 86.4ms	remaining: 0us

bestTest = 0.9365079365
bestIteration = 40

Shrink model to first 41 iterations.
[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.107 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.9565217391304348, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.5782 val_loss=0.3718 scale=128.0000 norm=61.8052
[32m[INFO] Mod

Unnamed: 0,acc,model
model0,0.913043,DecisionTreeClassifier
model1,0.956522,RandomForestClassifier
model2,0.608696,XGBoost
model3,0.608696,LightGBM
model4,0.956522,CatBoost
model5,0.869565,NGBClassifier
all,0.956522,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


### 1.7. Out-of-Fold Method

In [19]:
from dm_utils import OOF

oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.9565217391304348, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.78462
[99]	valid-auc:0.95769
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 1.297 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 0.9130434782608695, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m
Training until validation scores don't improve for 200 rounds




[100]	valid_0's auc: 0.992063	valid_0's binary_logloss: 0.405306	valid_0's binary_error: 0.0434783
Did not meet early stopping. Best iteration is:
[18]	valid_0's auc: 0.996032	valid_0's binary_logloss: 0.597975	valid_0's binary_error: 0.391304
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 0.387 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.6086956521739131, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	test: 0.8616071	best: 0.8616071 (0)	total: 1.03ms	remaining: 102ms
99:	test: 0.9821429	best: 1.0000000 (8)	total: 86.2ms	remaining: 0us

bestTest = 1
bestIteration = 8

Shrink model to first 9 iterations.
[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.142 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 0.8636363636363636, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=0.6710 val_loss=0.5939 scale=16.0000 norm=32.0000
[32m[I

Unnamed: 0,acc,model
fold0,0.956522,DecisionTreeClassifier
fold1,0.913043,XGBoost
fold2,0.608696,LightGBM
fold3,0.863636,CatBoost
fold4,1.0,NGBClassifier
all,0.867257,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [20]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, class_weight=class_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.78462
[99]	valid-auc:0.95769
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 2.679 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 0.5652173913043478, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m
Training until validation scores don't improve for 200 rounds
[100]	valid_0's auc: 0.996032	valid_0's binary_logloss: 0.323744	vali



[32m[INFO] Model NGBClassifier, Fold 5 / 5 training finish, cost time 0.143 s.[0m
[36m[SUCEESS] 5 / 5 fold validation scores: {'acc': 0.9545454545454546, 'model': 'NGBClassifier'}[0m
[32m[INFO] 5-fold training finish, cost time 2.951 s.[0m
[36m[SUCEESS] total 5-fold validation scores:{'acc': 0.7964601769911505, 'model': 'CatBoost,DecisionTreeClassifier,LightGBM,NGBClassifier,XGBoost'}[0m


Unnamed: 0,acc,model
fold0,0.913043,DecisionTreeClassifier
fold1,0.565217,XGBoost
fold2,0.608696,LightGBM
fold3,0.954545,CatBoost
fold4,0.954545,NGBClassifier
all,0.79646,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [21]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.78462
[99]	valid-auc:0.95769
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 0.202 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 0.5652173913043478, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m
Training until validation scores don't improve for 200 rounds




[100]	valid_0's auc: 0.996032	valid_0's binary_logloss: 0.323744	valid_0's binary_error: 0.0540541
Did not meet early stopping. Best iteration is:
[18]	valid_0's auc: 0.996032	valid_0's binary_logloss: 0.48881	valid_0's binary_error: 0.243243
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 2.658 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.6086956521739131, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	test: 0.9196429	best: 0.9196429 (0)	total: 1.29ms	remaining: 128ms
99:	test: 0.9821429	best: 1.0000000 (6)	total: 86.2ms	remaining: 0us

bestTest = 1
bestIteration = 6

Shrink model to first 7 iterations.
[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.136 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 0.9545454545454546, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=0.6075 val_loss=0.4957 scale=32.0000 norm=64.0000
[32m[IN

Unnamed: 0,acc,model
fold0,0.913043,DecisionTreeClassifier
fold1,0.565217,XGBoost
fold2,0.608696,LightGBM
fold3,0.954545,CatBoost
fold4,1.0,NGBClassifier
all,0.80531,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [22]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, class_weight=class_weight, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.003 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.9130434782608695, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.91154
[99]	valid-auc:0.96154
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 0.389 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 0.5652173913043478, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m
Training until validation scores don't improve for 200 rounds




[100]	valid_0's auc: 0.992063	valid_0's binary_logloss: 0.225064	valid_0's binary_error: 0.138462
Did not meet early stopping. Best iteration is:
[3]	valid_0's auc: 0.992063	valid_0's binary_logloss: 0.392953	valid_0's binary_error: 0.138462
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 1.536 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.6086956521739131, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	test: 0.9642857	best: 0.9642857 (0)	total: 978us	remaining: 96.9ms
99:	test: 0.9821429	best: 0.9910714 (2)	total: 81.3ms	remaining: 0us

bestTest = 0.9910714286
bestIteration = 2

Shrink model to first 3 iterations.
[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.101 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 0.9090909090909091, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=0.5466 val_loss=0.5032 scale=16.0000 norm=32.000

Unnamed: 0,acc,model
fold0,0.913043,DecisionTreeClassifier
fold1,0.565217,XGBoost
fold2,0.608696,LightGBM
fold3,0.909091,CatBoost
fold4,0.954545,NGBClassifier
all,0.787611,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


## 2. multi-class classification

In [23]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

x, y = load_iris(return_X_y=True, as_frame=True)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.5, random_state=42)

In [24]:
class_weight = {0: 1, 1: 2, 2: 4}
sample_weight = ytrain.map(class_weight).values

### 2.1. DecisionTreeClassifier

In [25]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = DecisionTreeClassifier(class_weight=class_weight, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = DecisionTreeClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9066666666666666
score: 0.9733333333333334
score: 0.9733333333333334
True


### 2.2. RandomForestClassifier

In [26]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = RandomForestClassifier(class_weight=class_weight, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = RandomForestClassifier(class_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9866666666666667
score: 0.9866666666666667
score: 0.9866666666666667
True


### 2.3. XGBClassifier

In [27]:
from xgboost import XGBClassifier

# X class_weight
model = XGBClassifier(scale_pos_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = XGBClassifier(scale_pos_weight=None, random_state=42)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.96
score: 0.9733333333333334
False


### 2.4. LGBMClassifier

In [28]:
from lightgbm import LGBMClassifier

# X class_weight
model = LGBMClassifier(scale_pos_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = LGBMClassifier(scale_pos_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())  # !!!

score: 0.9733333333333334
score: 0.9733333333333334
False


In [29]:
from lightgbm import LGBMClassifier

model = LGBMClassifier(class_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
ypred = model.predict_proba(xtest)
print('score:', accuracy_score(ytest, ypred.argmax(axis=1)))

# class_weight
model = LGBMClassifier(class_weight=class_weight, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = LGBMClassifier(class_weight=None, random_state=42, verbosity=-1)
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred == ypred1).all())
print((ypred1 == ypred2).all())

score: 0.9733333333333334
score: 0.9733333333333334
score: 0.9733333333333334
False
True


### 2.5. CatBoostClassifier

In [30]:
from catboost import CatBoostClassifier

# X class_weight
model = CatBoostClassifier(iterations=100, scale_pos_weight=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = CatBoostClassifier(iterations=100, scale_pos_weight=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())  # !!!

score: 0.9733333333333334
score: 0.9866666666666667
False


In [31]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=100, class_weights=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))

# class_weight
model = CatBoostClassifier(iterations=100, class_weights=class_weight, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=None)
print('score:', model.score(xtest, ytest))
ypred1 = model.predict_proba(xtest)

# sample_weight
model = CatBoostClassifier(iterations=100, class_weights=None, random_state=42, logging_level='Silent')
model.fit(xtrain, ytrain, sample_weight=sample_weight)
print('score:', model.score(xtest, ytest))
ypred2 = model.predict_proba(xtest)

print((ypred1 == ypred2).all())

score: 0.9733333333333334
score: 0.9866666666666667


score: 0.9866666666666667
True


In [32]:
model = cb.train(
    params={
        'loss_function': 'MultiClass',
        'class_weights': [1, 1, 1],
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred.argmax(axis=1)))

model = cb.train(
    params={
        'loss_function': 'MultiClass',
        'class_weights': class_weight,
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=None),
    iterations=100,
)
ypred1 = model.predict(xtest)
print('score:', accuracy_score(ytest, ypred1.argmax(axis=1)))

model = cb.train(
    params={
        'loss_function': 'MultiClass',
        'class_weights': [1, 1, 1],
        'logging_level': 'Silent',
    },
    dtrain=cb.Pool(xtrain, label=ytrain, weight=sample_weight),
    iterations=100,
)
ypred2 = model.predict(xtest)
print('score:', accuracy_score(ytest, (ypred2).argmax(axis=1)))

print((ypred1 == ypred2).all())

score: 0.9733333333333334
score: 0.9866666666666667
score: 0.9866666666666667
True


### 2.6. Hold-Out Method

In [33]:
from dm_utils.hom import HOM

hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.066 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'RandomForestClassifier'}[0m
[3



Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.759871	valid_0's multi_error: 0.133333
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.759871	valid_0's multi_error: 0.133333
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 0.344 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	learn: 1.0951472	test: 1.0962260	best: 1.0962260 (0)	total: 314us	remaining: 31.2ms
99:	learn: 0.7795466	test: 0.8347248	best: 0.8347248 (99)	total: 15.2ms	remaining: 0us

bestTest = 0.8347247632
bestIteration = 99

[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.03 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=1.0912 val_loss=0.7080 scale=32.0000 norm=109.2548
[32m[IN

Unnamed: 0,acc,model
model0,0.866667,DecisionTreeClassifier
model1,0.866667,RandomForestClassifier
model2,0.866667,XGBoost
model3,0.866667,LightGBM
model4,0.866667,CatBoost
model5,0.866667,NGBClassifier
all,0.866667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [34]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, class_weight=class_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.065 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's multi_logloss: 0.682746	valid_0's multi_error: 0.235294
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.682746	valid_0's multi_error: 0.235294
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 14.43 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.6, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	learn: 1.0950060	test: 1.0965407	best: 1.0965407 (0)	total: 383us	remaining: 38ms
99:	learn: 0.7853090	test: 0.8705478	best: 0.8705478 (99)	total: 16.9ms	remaining: 0us

bestTest = 0.8705477866
bestIteration = 99

[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.032 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.9961 val_loss=0.9808 scale=32.0000 norm=109.2548
[32m[INFO] Model NGBClassifier 6 / 6 training finish, cost time 0.183 s.[0m
[36m[SU

Unnamed: 0,acc,model
model0,0.866667,DecisionTreeClassifier
model1,0.866667,RandomForestClassifier
model2,0.866667,XGBoost
model3,0.6,LightGBM
model4,0.866667,CatBoost
model5,0.8,NGBClassifier
all,0.866667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [35]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.064 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'RandomForestClassifier'}[0m
[3



[100]	valid_0's multi_logloss: 0.682746	valid_0's multi_error: 0.235294
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.682746	valid_0's multi_error: 0.235294
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 1.887 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.6, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	learn: 1.0950060	test: 1.0965407	best: 1.0965407 (0)	total: 258us	remaining: 25.6ms
99:	learn: 0.7853090	test: 0.8705478	best: 0.8705478 (99)	total: 14.3ms	remaining: 0us

bestTest = 0.8705477866
bestIteration = 99

[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.029 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=1.0404 val_loss=0.7699 scale=16.0000 norm=54.6274
[32m[INFO] Model NGBClassifier 6 / 6 training finish, cost time 0.182 s.[0m
[36m[S

Unnamed: 0,acc,model
model0,0.866667,DecisionTreeClassifier
model1,0.866667,RandomForestClassifier
model2,0.866667,XGBoost
model3,0.6,LightGBM
model4,0.866667,CatBoost
model5,0.866667,NGBClassifier
all,0.866667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [36]:
hom = HOM(task='cls', model=['dt', 'rf', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
hom.fit(xtrain, ytrain, record_time=True, class_weight=class_weight, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'rf' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] not provided X_valid and y_valid, auto split train set into train and valid.[0m
[32m[INFO] hold-out method training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier 1 / 6 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 6 model validation scores: {'acc': 0.6666666666666666, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training begin.[0m
[32m[INFO] Model RandomForestClassifier 2 / 6 training finish, cost time 0.065 s.[0m
[36m[SUCEESS] 2 / 6 model validation scores: {'acc': 0.8666666666666667, 'model': 'RandomForestClassifier'}[0m
[3



Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.491243	valid_0's multi_error: 0.208333
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.491243	valid_0's multi_error: 0.208333
[32m[INFO] Model LightGBM 4 / 6 training finish, cost time 2.886 s.[0m
[36m[SUCEESS] 4 / 6 model validation scores: {'acc': 0.4666666666666667, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost 5 / 6 training begin.[0m
0:	learn: 1.0944103	test: 1.0959907	best: 1.0959907 (0)	total: 308us	remaining: 30.5ms
99:	learn: 0.7746309	test: 0.8558903	best: 0.8558903 (99)	total: 17.3ms	remaining: 0us

bestTest = 0.8558902591
bestIteration = 99

[32m[INFO] Model CatBoost 5 / 6 training finish, cost time 0.068 s.[0m
[36m[SUCEESS] 5 / 6 model validation scores: {'acc': 0.6666666666666666, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier 6 / 6 training begin.[0m
[iter 0] loss=0.9515 val_loss=0.7833 scale=16.0000 norm=54.6274
[32m[IN

Unnamed: 0,acc,model
model0,0.666667,DecisionTreeClassifier
model1,0.866667,RandomForestClassifier
model2,0.8,XGBoost
model3,0.466667,LightGBM
model4,0.666667,CatBoost
model5,0.866667,NGBClassifier
all,0.8,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


### 2.7. Out-of-Fold Method

In [37]:
from dm_utils import OOF

oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.97306
[99]	valid-auc:1.00000
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 3.106 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 0.9333333333333333, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m




Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.793885	valid_0's multi_error: 0.2
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.793885	valid_0's multi_error: 0.2
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 0.411 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.8, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	learn: 1.0948377	test: 1.0950641	best: 1.0950641 (0)	total: 239us	remaining: 23.7ms
99:	learn: 0.7861882	test: 0.8183327	best: 0.8183327 (99)	total: 14.8ms	remaining: 0us

bestTest = 0.8183326855
bestIteration = 99

[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.031 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 0.9333333333333333, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=1.0976 val_loss=0.0000 scale=512.0000 norm=1748.0773
[32m[IN

Unnamed: 0,acc,model
fold0,0.866667,DecisionTreeClassifier
fold1,0.933333,XGBoost
fold2,0.8,LightGBM
fold3,0.933333,CatBoost
fold4,0.8,NGBClassifier
all,0.866667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [38]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, class_weight=class_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:1.00000
[99]	valid-auc:1.00000
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 3.111 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 1.0, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m




Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.697607	valid_0's multi_error: 0.235294
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.697607	valid_0's multi_error: 0.235294
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 2.683 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.5333333333333333, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	learn: 1.0951741	test: 1.0947838	best: 1.0947838 (0)	total: 292us	remaining: 29ms
99:	learn: 0.8045640	test: 0.8304102	best: 0.8304102 (99)	total: 17.9ms	remaining: 0us

bestTest = 0.8304101857
bestIteration = 99

[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.035 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 1.0, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=1.0305 val_loss=0.0000 scale=512.0000 norm=1748.0773


Unnamed: 0,acc,model
fold0,0.866667,DecisionTreeClassifier
fold1,1.0,XGBoost
fold2,0.533333,LightGBM
fold3,1.0,CatBoost
fold4,0.8,NGBClassifier
all,0.84,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [39]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.8666666666666667, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:1.00000
[99]	valid-auc:1.00000
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 2.396 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 1.0, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m
Training until validation scores don't improve for 200 rounds




[100]	valid_0's multi_logloss: 0.697607	valid_0's multi_error: 0.235294
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.697607	valid_0's multi_error: 0.235294
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 0.631 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.5333333333333333, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	learn: 1.0951741	test: 1.0947838	best: 1.0947838 (0)	total: 302us	remaining: 29.9ms
99:	learn: 0.8045640	test: 0.8304102	best: 0.8304102 (99)	total: 19.1ms	remaining: 0us

bestTest = 0.8304101857
bestIteration = 99

[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.039 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 1.0, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=1.0743 val_loss=2.0992 scale=512.0000 norm=1748.0773
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training finish,

Unnamed: 0,acc,model
fold0,0.866667,DecisionTreeClassifier
fold1,1.0,XGBoost
fold2,0.533333,LightGBM
fold3,1.0,CatBoost
fold4,0.733333,NGBClassifier
all,0.826667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."


In [40]:
oof = OOF(task='cls', model=['dt', 'xgb', 'lgb', 'cb', 'ngb'], epochs=100)
oof.fit(xtrain, ytrain, record_time=True, class_weight=class_weight, weight_train=sample_weight)

[33m[CONFLICT] model_str 'dt' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[33m[CONFLICT] model_str 'ngb' is not supported when sklearn_api=False, automatically use sklearn api.[0m
[32m[INFO] 5-fold training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training begin.[0m
[32m[INFO] Model DecisionTreeClassifier, Fold 1 / 5 training finish, cost time 0.002 s.[0m
[36m[SUCEESS] 1 / 5 fold validation scores: {'acc': 0.6666666666666666, 'model': 'DecisionTreeClassifier'}[0m
[32m[INFO] Model XGBoost, Fold 2 / 5 training begin.[0m
[0]	valid-auc:0.96854
[99]	valid-auc:1.00000
[32m[INFO] Model XGBoost, Fold 2 / 5 training finish, cost time 0.365 s.[0m
[36m[SUCEESS] 2 / 5 fold validation scores: {'acc': 1.0, 'model': 'XGBoost'}[0m
[32m[INFO] Model LightGBM, Fold 3 / 5 training begin.[0m




Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 0.418338	valid_0's multi_error: 0.0980392
Did not meet early stopping. Best iteration is:
[100]	valid_0's multi_logloss: 0.418338	valid_0's multi_error: 0.0980392
[32m[INFO] Model LightGBM, Fold 3 / 5 training finish, cost time 2.975 s.[0m
[36m[SUCEESS] 3 / 5 fold validation scores: {'acc': 0.5333333333333333, 'model': 'LightGBM'}[0m
[32m[INFO] Model CatBoost, Fold 4 / 5 training begin.[0m
0:	learn: 1.0948456	test: 1.0943479	best: 1.0943479 (0)	total: 269us	remaining: 26.7ms
99:	learn: 0.7993970	test: 0.8136178	best: 0.8136178 (99)	total: 15.7ms	remaining: 0us

bestTest = 0.8136177785
bestIteration = 99

[32m[INFO] Model CatBoost, Fold 4 / 5 training finish, cost time 0.065 s.[0m
[36m[SUCEESS] 4 / 5 fold validation scores: {'acc': 0.9333333333333333, 'model': 'CatBoost'}[0m
[32m[INFO] Model NGBClassifier, Fold 5 / 5 training begin.[0m
[iter 0] loss=1.0053 val_loss=0.7912 scale=16.00

Unnamed: 0,acc,model
fold0,0.666667,DecisionTreeClassifier
fold1,1.0,XGBoost
fold2,0.533333,LightGBM
fold3,0.933333,CatBoost
fold4,0.8,NGBClassifier
all,0.786667,"CatBoost,DecisionTreeClassifier,LightGBM,NGBCl..."
