In [None]:
import math
from catboost import Pool, CatBoostClassifier

class LoglossObjective(object):
    def calc_ders_range(self, approxes, targets, weights):
        # approxes, targets, weights are indexed containers of floats (containers with only __len__ and __getitem__ defined).
        # weights parameter can be None.
        # Returns list of pairs (der1, der2)
        assert len(approxes) == len(targets)
        if weights is not None:
            assert len(weights) == len(approxes)

        exponents = []
        for index in xrange(len(approxes)):
            exponents.append(math.exp(approxes[index]))

        result = []
        for index in xrange(len(targets)):
            p = exponents[index] / (1 + exponents[index])
            der1 = (1 - p) if targets[index] > 0.0 else -p
            der2 = -p * (1 - p)

            if weights is not None:
                der1 *= weights[index]
                der2 *= weights[index]

            result.append((der1, der2))

        return result

TRAIN_FILE = '../data/adult/train_small'
TEST_FILE = '../data/adult/test_small'
CD_FILE = '../data/adult/train.cd'
# Load data from files to Pool
train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
test_pool = Pool(TEST_FILE, column_description=CD_FILE)
# Initialize CatBoostClassifier with custom `loss_function`
model = CatBoostClassifier(partition_random_seed=0, loss_function=LoglossObjective(), eval_metric="Logloss")
# Fit model
model.fit(train_pool)
# Only prediction_type='RawFormulVal' allowed with custom `loss_function`
preds_raw = model.predict(test_pool, prediction_type='RawFormulaVal') 