In [1]:
import pandas as pd
import numpy as np
import os
import sys
import polars as pl
import json
from joblib import Parallel, delayed
import deepchem
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import StratifiedGroupKFold, GroupKFold, KFold, GroupShuffleSplit, ShuffleSplit
import gc
import random
from functools import partial
import catboost
from catboost import FeaturesData, Pool, CatBoost, CatBoostClassifier, CatBoostRegressor, CatBoostRanker
import time
import math
from sklearn.metrics import average_precision_score
import sys
sys.path.append('../')
from src.utils.metrics import compute_metrics
from src.utils.misc import dict_as_table

# Configure Polars 
cfg = pl.Config()
cfg.set_tbl_rows(20)
cfg.set_tbl_cols(50)

No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch_geometric'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. cannot import name 'DMPNN' from 'deepchem.models.torch_models' (/opt/conda/lib/python3.10/site-packages/deepchem/models/torch_models/__init__.py)
Skipped loading some Jax models, missing a dependency. No module named 'jax'
Skipped loading some PyTorch models, missing a dependency. No module named 'tensorflow'


polars.config.Config

In [2]:
DATA_DIR = '/home/dangnh36/datasets/competitions/leash_belka/'
FEATURE_DIR = '/home/dangnh36/datasets/competitions/leash_belka/processed/features/'
CV_STRATEGY = '16_19_0_2'
CV_FOLD_IDX = 0
TRAIN_SUBSET = 'train_7.8M'

PROTEINS = ['BRD4', 'HSA', 'sEH']
PRED_COLS = [f'pred_{col}' for col in PROTEINS]
TARGET_COLS = [f'target_{col}' for col in PROTEINS]

# Utils

In [3]:
_CUR_TIME = None
def log(*args, **kwargs):
    global _CUR_TIME
    if _CUR_TIME is None:
        print('Start logging time..')
        _CUR_TIME = time.time()
    else:
        cur_time = time.time()
        take = round(cur_time - _CUR_TIME, 2)
        _CUR_TIME = cur_time
        if len(args) > 0:
            print(f'[{take} s]', *args, **kwargs)

            
def evaluate(val_df, preds):
    val_df = val_df.copy()
    val_df[PRED_COLS] = preds
    metrics = compute_metrics(val_df)
    return metrics

## Load train-test split

In [4]:
def load_cv_split(data_dir, cv_strategy, cv_fold_idx, stage = 'train'):
    val_df = None
    idxs = None
    if 'kf' in cv_strategy:
        if stage == 'train':
            filter_cond = pl.col('fold_idx') != cv_fold_idx
        elif stage == 'val':
            filter_cond = pl.col('fold_idx') == cv_fold_idx
        val_df = pl.scan_csv(
            os.path.join(data_dir, 'processed', 'cv', cv_strategy,
                         'cv.csv')).filter(filter_cond).select(
                             pl.col('index'), subset=pl.lit(3).cast(pl.UInt8)).collect().to_pandas()
        idxs = val_df['index'].to_numpy()
    else:
        if stage == 'train':
            if cv_fold_idx == 611:
                raise NotImplementedError
                idxs = np.arange(0, len(features))
            else:
                idxs = pl.scan_csv(
                    os.path.join(
                        data_dir, 'processed', 'cv', cv_strategy,
                        'train.csv')).select(pl.col(
                            'index')).collect().to_numpy().flatten()
        elif stage == 'val':
            val_df = pl.scan_csv(
                os.path.join(data_dir, 'processed', 'cv', cv_strategy,
                             'val.csv')).filter(pl.col('subset')!=1).select(
                                 pl.col('index'),
                                 pl.col('subset').cast(
                                     pl.UInt8)).collect().to_pandas()
            idxs = val_df['index'].to_numpy()
        else:
            raise AssertionError
    return val_df, idxs

In [5]:
val_df, val_idxs = load_cv_split(DATA_DIR, CV_STRATEGY, CV_FOLD_IDX, 'val')
train_idxs = pl.scan_csv('/home/dangnh36/datasets/competitions/leash_belka/processed/cv/train_5_7.8M_idxs.csv').\
    collect()['index'].to_numpy()

val_df.shape, val_idxs.shape, train_idxs.shape

((205900, 2), (205900,), (7812348,))

# Load data

In [6]:
all_targets = pl.scan_csv(os.path.join(DATA_DIR, 'processed', 'train_v2.csv')).select(
    pl.col('BRD4', 'HSA', 'sEH').cast(pl.UInt8), ).collect().to_numpy()
all_targets.shape

(98415610, 3)

In [7]:
ECFP6_IGNORE_COLS = [39, 378, 397, 450, 650, 807, 1152, 1182, 1184, 1201, 1350, 1380, 1582, 1621, 1855, 1917, 1967, 2007]
ECFP6_KEEP_COLS = sorted(list(set(range(2048)).difference(set(ECFP6_IGNORE_COLS))))

RDKIT_IGNORE_COLS = [10, 69, 82, 125, 157, 158, 160, 162, 163, 164, 168, 170, 174, 175, 178, 186, 192, 193, 197, 199, 206]
RDKIT_KEEP_COLS = sorted(list(set(range(210)).difference(set(RDKIT_IGNORE_COLS))))

len(ECFP6_KEEP_COLS), len(RDKIT_KEEP_COLS)

(2030, 189)

### Train pool

In [8]:
all_ecfp6s = np.load(os.path.join(FEATURE_DIR, 'ecfp6', 'train.npy'), mmap_mode='r')
train_ecfp6s = all_ecfp6s[train_idxs]
train_ecfp6s = np.unpackbits(train_ecfp6s, axis = -1)
train_ecfp6s = train_ecfp6s[:, ECFP6_KEEP_COLS]

# train_rdkit210 = np.load(os.path.join(FEATURE_DIR, 'rdkit210', 'train_7.8M.npy'), mmap_mode='r')
# train_rdkit210 = train_rdkit210[:, RDKIT_KEEP_COLS]
# train_features = np.concatenate([train_ecfp6s, train_rdkit210], axis = -1, dtype = np.float32)
train_features = train_ecfp6s

train_features = train_features.astype(np.float32)
print('Train features:', train_features.shape, train_features.dtype)

train_targets = all_targets[train_idxs]
print('Train targets:', train_targets.shape, train_targets.dtype)


train_pool = Pool(
       data = train_features,
       label = train_targets,
       cat_features=None,
       weight=None,
       baseline=None,
       feature_names=None,
       thread_count=-1
)
train_pool.num_row(), train_pool.num_col()

Train features: (7812348, 2030) float32
Train targets: (7812348, 3) uint8


(7812348, 2030)

### Val pool

In [9]:
val_ecfp6s = all_ecfp6s[val_idxs]
val_ecfp6s = np.unpackbits(val_ecfp6s, axis = -1)
val_ecfp6s = val_ecfp6s[:, ECFP6_KEEP_COLS]

# val_rdkit210 = np.load(os.path.join(FEATURE_DIR, 'rdkit210', 'val_02.npy'), mmap_mode='r')
# val_rdkit210 = val_rdkit210[:, RDKIT_KEEP_COLS]
# val_features = np.concatenate([val_ecfp6s, val_rdkit210], axis = -1, dtype = np.float32)
val_features = val_ecfp6s

val_features = val_features.astype(np.float32)

print('Val features:', val_features.shape, val_features.dtype)

val_targets = all_targets[val_idxs]
print('Train targets:', val_targets.shape, val_targets.dtype)


val_pool = Pool(
       data = val_features,
       label = val_targets,
       cat_features=None,
       weight=None,
       baseline=None,
       feature_names=None,
       thread_count=-1
)
val_pool.num_row(), val_pool.num_col()

Val features: (205900, 2030) float32
Train targets: (205900, 3) uint8


(205900, 2030)

In [10]:
val_df[TARGET_COLS] = val_targets

In [11]:
# del train_ecfp6s, train_rdkit210, val_ecfp6s, val_rdkit210, train_features, val_features
del train_ecfp6s, val_ecfp6s, train_features, val_features

gc.collect()

314

## Model

In [12]:
CBM_DEFAULT_PARAMS = dict(
    # Common settings
    loss_function='MultiLogloss',
    custom_metric=None,
    eval_metric=None,
    iterations=None,
    learning_rate=None,
    random_state=42,
    l2_leaf_reg=None,
    bootstrap_type=None,
    bagging_temperature=None,
    subsample=None,
    sampling_frequency=None,  # PerTreeLevel | PerTree
    sampling_unit=None,  # Object | Group
    mvs_reg = None,
    random_strength=None,
    use_best_model = True,
    best_model_min_trees = None,
    max_depth = None,   # default = 6 | 16 if Lossguide
    grow_policy = 'SymmetricTree',
    min_data_in_leaf = None,
    max_leaves = None,  # default = 31, Lossguide only
    ignored_features = None,  # a list
    one_hot_max_size = None,  # default to 2 usually
    has_time = False,
    rsm = None, # default = 1
    nan_mode = 'Forbidden',
    input_borders = None,   # should set
    output_borders = None,  # should set
    fold_permutation_block = None,  # default 1
    leaf_estimation_method = None,
    leaf_estimation_iterations = None,
    leaf_estimation_backtracking = None, # AnyImprovement
    fold_len_multiplier = None, # default 2
    approx_on_full_history = None,  # default False -> faster but may less accurate
    class_weights = None,   # list | dict | OrderedDict
    classes_count = None,
    class_names = None, # list of str
    auto_class_weights = None, # None | Balanced | SqrtBalanced
    scale_pos_weight = None,
    boosting_type = None, # Ordered | Plain
    boost_from_average = None,
    langevin = None, # default False
    diffusion_temperature = None,
    posterior_sampling = None, # default False
    allow_const_label = False,
    score_function = None, # Cosine | L2 | NewtonCosine | NewtonL2
    monotone_constraints = None, # prior knownledge
    feature_weights = None, # prior knownledge or obtained from feature importance
    first_feature_use_penalties = None, # ?
    fixed_binary_splits = None, # ?
    penalties_coefficient = None, # default 1
    per_object_feature_penalties = None, # ?
    model_shrink_rate = None, # ?
    model_shrink_mode = None, # ?
    # CTR Settings
    simple_ctr = None,
    combinations_ctr = None,
    per_feature_ctr = None,
    ctr_target_border_count = None,
    counter_calc_method = None, # SkipTest | Full
    max_ctr_complexity = None,  # default 4, 1 if multiclass
    ctr_leaf_count_limit = None, # default to no limit
    store_all_simple_ctr = None,
    final_ctr_computation_mode = None,
    # Output Settings
    logging_level = 'Verbose',
    metric_period = 100,
    train_dir = 'runs/catboost/',
    model_size_reg = None,  # default to 0.5, could try to turn off
    allow_writing_files = True,
    save_snapshot = True, # not supported in cv, should be disable when grid search
    snapshot_file = None, # dangerous!
    snapshot_interval = 600,
    # roc_file = None,
    border_count=None,
    feature_border_type=None,

    
    # UNK UNK UNK !!!   
    
    per_float_feature_quantization=None,
    od_pval=None,
    od_wait=None,
    od_type=None,
    thread_count=None,
    name=None,
    custom_loss=None,
    used_ram_limit=None,
    gpu_ram_part=None,
    task_type=None,
    device_config=None,
    devices=None,
    dev_score_calc_obj_block_size=None,
    reg_lambda=None,
    eta=None,
    gpu_cat_features_storage=None,
    data_partition=None,
    metadata=None,
    early_stopping_rounds=None,
    cat_features=None,
    ctr_history_unit=None,
    text_features=None,
    tokenizers=None,
    dictionaries=None,
    feature_calcers=None,
    text_processing=None,
)

# CBM_DEFAULT_PARAMS = dict()

In [13]:
!rm -r runs/catboost/chunk=None
def build_model(chunk_idx = None):
    SAVE_DIR = f'runs/catboost/chunk={chunk_idx}/'
    os.makedirs(SAVE_DIR, exist_ok=True)
    updated_params = dict(
        task_type='GPU',
        devices='1:3',
        # loss_function = 'MultiCrossEntropy',
        loss_function = 'MultiLogloss',
        iterations = 15000,
        learning_rate = 0.2,
        random_state = 42,
        train_dir = SAVE_DIR,
        # max_ctr_complexity = 1,
        depth = 6,
        min_data_in_leaf = 2,
#         colsample_bylevel = 0.3,
        bootstrap_type = 'No',
#         mvs_reg = 10,
#         subsample = 0.3,
#         thread_count = 80,
        # auto_class_weights='Balanced',
        # scale_pos_weight = 1.2,
        # class_weights=[2.0, 1.0],
        # used_ram_limit = '180GB',
        gpu_ram_part = 0.75,
        # gpu_cat_features_storage = 'CpuPinnedMemory',
        early_stopping_rounds = 1000,
        
        
#         border_count = 2, # binary features
#         feature_border_type = 'Uniform',
    )

    params = CBM_DEFAULT_PARAMS.copy()
    params.update(updated_params)
    model = CatBoostClassifier(
        **params
    )
    
    print(model.get_params())
    
    return model

In [14]:
!rm -r runs/catboost/chunk=None

rm: cannot remove 'runs/catboost/chunk=None': No such file or directory


In [15]:
model = build_model()
print(model.get_params())

model.fit(
    train_pool,
    verbose=100,
    eval_set=val_pool,
    plot = True,
)
print('ALL PARAMS:\n', model.get_all_params())

{'iterations': 15000, 'learning_rate': 0.2, 'depth': 6, 'loss_function': 'MultiLogloss', 'nan_mode': 'Forbidden', 'use_best_model': True, 'logging_level': 'Verbose', 'metric_period': 100, 'has_time': False, 'allow_const_label': False, 'train_dir': 'runs/catboost/chunk=None/', 'save_snapshot': True, 'snapshot_interval': 600, 'gpu_ram_part': 0.75, 'allow_writing_files': True, 'task_type': 'GPU', 'devices': '1:3', 'bootstrap_type': 'No', 'random_state': 42, 'early_stopping_rounds': 1000, 'grow_policy': 'SymmetricTree', 'min_data_in_leaf': 2}
{'iterations': 15000, 'learning_rate': 0.2, 'depth': 6, 'loss_function': 'MultiLogloss', 'nan_mode': 'Forbidden', 'use_best_model': True, 'logging_level': 'Verbose', 'metric_period': 100, 'has_time': False, 'allow_const_label': False, 'train_dir': 'runs/catboost/chunk=None/', 'save_snapshot': True, 'snapshot_interval': 600, 'gpu_ram_part': 0.75, 'allow_writing_files': True, 'task_type': 'GPU', 'devices': '1:3', 'bootstrap_type': 'No', 'random_state': 

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 0.4300400	test: 0.4161575	best: 0.4161575 (0)	total: 533ms	remaining: 2h 13m 19s
100:	learn: 0.0932320	test: 0.0386495	best: 0.0386495 (100)	total: 15.2s	remaining: 37m 17s
200:	learn: 0.0831780	test: 0.0351689	best: 0.0351626 (198)	total: 30.1s	remaining: 36m 59s
300:	learn: 0.0783270	test: 0.0331490	best: 0.0331490 (300)	total: 45.3s	remaining: 36m 49s
400:	learn: 0.0751468	test: 0.0319114	best: 0.0319114 (400)	total: 59s	remaining: 35m 46s
500:	learn: 0.0729417	test: 0.0309673	best: 0.0309673 (500)	total: 1m 13s	remaining: 35m 16s
600:	learn: 0.0713193	test: 0.0303599	best: 0.0303599 (600)	total: 1m 27s	remaining: 34m 59s
700:	learn: 0.0700204	test: 0.0298160	best: 0.0298160 (700)	total: 1m 42s	remaining: 34m 42s
800:	learn: 0.0689616	test: 0.0293742	best: 0.0293742 (800)	total: 1m 56s	remaining: 34m 22s
900:	learn: 0.0680646	test: 0.0289856	best: 0.0289856 (900)	total: 2m 10s	remaining: 34m
1000:	learn: 0.0672615	test: 0.0286451	best: 0.0286451 (1000)	total: 2m 24s	remain

8700:	learn: 0.0531706	test: 0.0249685	best: 0.0249676 (8671)	total: 22m 40s	remaining: 16m 24s
8800:	learn: 0.0530840	test: 0.0249540	best: 0.0249535 (8797)	total: 22m 53s	remaining: 16m 7s
8900:	learn: 0.0529980	test: 0.0249419	best: 0.0249404 (8886)	total: 23m 7s	remaining: 15m 50s
9000:	learn: 0.0529132	test: 0.0249327	best: 0.0249324 (8999)	total: 23m 21s	remaining: 15m 33s
9100:	learn: 0.0528308	test: 0.0249342	best: 0.0249320 (9002)	total: 23m 36s	remaining: 15m 18s
9200:	learn: 0.0527481	test: 0.0249333	best: 0.0249317 (9191)	total: 23m 52s	remaining: 15m 3s
9300:	learn: 0.0526629	test: 0.0249290	best: 0.0249233 (9232)	total: 24m 11s	remaining: 14m 49s
9400:	learn: 0.0525783	test: 0.0249244	best: 0.0249233 (9232)	total: 24m 29s	remaining: 14m 35s
9500:	learn: 0.0524940	test: 0.0249187	best: 0.0249182 (9495)	total: 24m 48s	remaining: 14m 21s
9600:	learn: 0.0524106	test: 0.0249057	best: 0.0249057 (9600)	total: 25m 3s	remaining: 14m 5s
9700:	learn: 0.0523316	test: 0.0249006	best: 

In [16]:
val_df.subset.unique()

array([0, 2, 3], dtype=uint8)

In [17]:
##### EVALUATION #####

metric_names = ['nonshare_AP', 'share_AP', 'AP',
                'share_BRD4_AP', 'share_HSA_AP', 'share_sEH_AP',
                'nonshare_BRD4_AP', 'nonshare_HSA_AP', 'nonshare_sEH_AP', 'micro_AP', 'nonshare_micro_AP', 'share_micro_AP'
                ]
all_scores = []
for ntree_end in list(range(0, model.get_best_iteration(), 2000)) + [model.get_best_iteration()]:
    print(f'\n\n------------{ntree_end}-----------')
    preds = model.predict_proba(val_pool,
                                ntree_start = 0,
                                ntree_end = ntree_end + 1,
                                verbose = 100)
    preds.shape
    metrics = evaluate(val_df, preds)
    
    # micro AP
    gts = val_df[TARGET_COLS].to_numpy().flatten()
    preds = preds.flatten()
    metrics['micro_AP'] = average_precision_score(gts, preds)
    
    # nonshare micro AP
    nonshare_mask = np.stack([val_df['subset'] == 0, val_df['subset'] == 0, val_df['subset'] == 0], axis = -1).flatten()
    metrics['nonshare_micro_AP'] = average_precision_score(gts[nonshare_mask], preds[nonshare_mask])
    metrics['share_micro_AP'] = average_precision_score(gts[~nonshare_mask], preds[~nonshare_mask])
    
    
    
    
    _scores = [str(round(ntree_end / 1000, 1))] + [str(round(metrics[k] * 100, 4)) for k in metric_names]
    all_scores.append(_scores)
    print('	'.join(_scores))
    
#     metrics_table = dict_as_table(metrics,
#                                   sort_by=lambda x: (len(x[0].split('_')), x[0]),
#     )
#     print(metrics_table)


print('------------FINAL RESULTS---------------\n')
print('	'.join(['iterations'] + metric_names))
for scores in all_scores:
    print('	'.join(scores))



------------0-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


0.0	0.8282	16.8932	8.8607	5.5917	3.1633	41.9245	0.8435	0.7968	0.8442	22.9515	0.7951	24.7268


------------2000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


2.0	3.3158	53.4626	28.3892	52.2987	28.9028	79.1863	2.6862	5.0556	2.2055	56.6093	2.5588	60.4537


------------4000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


4.0	3.3006	55.8261	29.5634	54.8239	30.534	82.1204	2.7269	4.6508	2.5242	58.9836	2.5018	63.0208


------------6000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


6.0	2.7499	56.4597	29.6048	55.6892	30.6473	83.0425	2.951	2.9048	2.394	59.6859	2.0471	63.8125


------------8000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


8.0	2.7818	56.693	29.7374	55.9556	30.7058	83.4177	2.9992	3.0392	2.3071	59.9672	2.1167	64.1184


------------10000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


10.0	2.8286	56.9408	29.8847	56.4299	30.7623	83.6301	2.9758	3.0376	2.4724	60.1874	2.103	64.3584


------------12000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


12.0	2.8798	57.0285	29.9541	56.5772	30.7693	83.7388	3.1508	2.6257	2.8628	60.255	2.0076	64.4443


------------14000-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


14.0	2.9582	57.0218	29.99	56.6104	30.6471	83.808	3.1121	2.8627	2.8997	60.3072	2.0963	64.5009


------------14531-----------


[METRICS] Skip subset=share1 protein=BRD4 and set metric value to None..
[METRICS] Skip subset=share1 protein=HSA and set metric value to None..
[METRICS] Skip subset=share1 protein=sEH and set metric value to None..


14.5	2.9693	57.0991	30.0342	56.724	30.7174	83.8561	3.1707	2.742	2.9952	60.3631	2.0696	64.563
------------FINAL RESULTS---------------

iterations	nonshare_AP	share_AP	AP	share_BRD4_AP	share_HSA_AP	share_sEH_AP	nonshare_BRD4_AP	nonshare_HSA_AP	nonshare_sEH_AP	micro_AP	nonshare_micro_AP	share_micro_AP
0.0	0.8282	16.8932	8.8607	5.5917	3.1633	41.9245	0.8435	0.7968	0.8442	22.9515	0.7951	24.7268
2.0	3.3158	53.4626	28.3892	52.2987	28.9028	79.1863	2.6862	5.0556	2.2055	56.6093	2.5588	60.4537
4.0	3.3006	55.8261	29.5634	54.8239	30.534	82.1204	2.7269	4.6508	2.5242	58.9836	2.5018	63.0208
6.0	2.7499	56.4597	29.6048	55.6892	30.6473	83.0425	2.951	2.9048	2.394	59.6859	2.0471	63.8125
8.0	2.7818	56.693	29.7374	55.9556	30.7058	83.4177	2.9992	3.0392	2.3071	59.9672	2.1167	64.1184
10.0	2.8286	56.9408	29.8847	56.4299	30.7623	83.6301	2.9758	3.0376	2.4724	60.1874	2.103	64.3584
12.0	2.8798	57.0285	29.9541	56.5772	30.7693	83.7388	3.1508	2.6257	2.8628	60.255	2.0076	64.4443
14.0	2.9582	57.0218	29.99	56.6104	30.6471

In [18]:
model

<catboost.core.CatBoostClassifier at 0x7f325996bd30>