In [3]:
import math

import holoviews as hv
import numpy as np
import pandas as pd

import catboost as cb
import lightgbm as lgb
import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [4]:
hv.extension('matplotlib')

In [5]:
def create_synthetic_data(N, K, random_seed=1):
    np.random.seed(random_seed)
    X = np.random.random(size=(N, K))
    y = (
        + 5 * X[:, 0] + np.sin(5 * 2 * math.pi * X[:, 0])
        - 5 * X[:, 1] - np.cos(5 * 2 * math.pi * X[:, 1])
        + np.random.normal(loc=0.0, scale=0.01, size=N)
    )
    return train_test_split(X, y)

In [6]:
X_train, X_test, y_train, y_test = create_synthetic_data(N=1000, K=2)

In [7]:
def get_model_name(regressor):
    constarint_param_name = 'monotone_constraints'
    if constarint_param_name in regressor.get_params():
        model_name = "{} with monotone constraints {}".format(regressor.__class__.__name__,
                                                              regressor.get_params()[constarint_param_name])
    else:
        model_name = "{} without monotone constraints".format(regressor.__class__.__name__)
    return model_name

In [8]:
def print_loss_value(regressor, X_test, y_test):
    y_predicted = regressor.predict(X_test)
    mse_value = mean_squared_error(y_test, y_predicted)
    print("Model {}. MSE value: {}".format(get_model_name(regressor), mse_value))

In [11]:
def get_regressor_params(model_name):
    params = dict(
        max_depth=4,
        learning_rate=0.01,
        n_estimators=300
    )
    model_specific_params = {
        'catboost_w_constraint': dict(
            monotone_constraints='(1,-1)',
            leaf_estimation_method='Gradient',
            verbose=False
        ),
        'catboost_one_constraint': dict(
            verbose=False
        ),
        'xgboost' : dict(
            monotone_constraints='(1,-1)'
        ),
        'lgbm': dict(
            monotone_constraints='1,-1'
        )
    }[model_name]
    params.update(model_specific_params)
    return params

In [12]:
cb_model = cb.CatBoostRegressor(**get_regressor_params('catboost_w_constraint'))
cb_model.fit(X_train, y_train)

print_loss_value(cb_model, X_test, y_test)

Model CatBoostRegressor with monotone constraints (1,-1). MSE value: 0.754339648179418


In [14]:
xgb_regressor = xgb.XGBRegressor(**get_regressor_params('xgboost'))
xgb_regressor.fit(X_train, y_train)

print_loss_value(xgb_regressor, X_test, y_test)

Model XGBRegressor with monotone constraints (1,-1). MSE value: 0.6275213554111196


In [15]:
lgb_regressor = lgb.LGBMRegressor(**get_regressor_params('lgbm'))
lgb_regressor.fit(X_train, y_train)

print_loss_value(lgb_regressor, X_test, y_test)

Model LGBMRegressor with monotone constraints 1,-1. MSE value: 0.6369057087168528


In [16]:
def plot_level_contours(regressor):
    step_count = 100
    X_synthetic = (
        np.mgrid[0:1:1/step_count, 0:1:1/step_count]
        .reshape(2, -1).T
    )
    y = regressor.predict(X_synthetic)
    data = pd.Series(data=y, index=pd.MultiIndex.from_arrays(X_synthetic.T))
    image_data = (
        data
        .unstack()
        .values
        [::-1,:]
    )
    
    def plot_line(df):
        return hv.Curve(
            (df['level_1'].values, df['predict'].values),
            kdims='$x_2$',
            vdims='predict value'
        )
    
    holomap = hv.HoloMap({
        v: plot_line(
            data
            .to_frame('predict')
            .reset_index()
            .loc[lambda df: df['level_0'] == v]
        )
        for v in np.linspace(0, 1, step_count // 4, endpoint=False)
    })
    
    image = (
        hv.Image(
            image_data,
            bounds=(0, 0, 1, 1),
#             label=get_model_name(regressor),
            kdims=['$x_2$', '$x_1$']
        )
        .opts(colorbar=True)
    )
    
    contour_levels = 70
    contours = (
        hv.operation.contours(image, levels=contour_levels)
        .opts(show_legend=False, cmap='fire_r', linewidth=0.8)
    )
    return (
        image * contours
        + holomap.overlay().opts(show_legend=False)
    ).relabel(label=get_model_name(regressor))

In [17]:
%%output fig='svg'
%%opts Image [fig_size=200] 
%%opts Curve (color=hv.Palette('copper_r'))

plot_level_contours(cb_model)

In [18]:
%%output fig='svg'
%%opts Image [fig_size=200] 
%%opts Curve (color=hv.Palette('copper_r'))

plot_level_contours(xgb_regressor)

In [19]:
%%output fig='svg'
%%opts Image [fig_size=200] 
%%opts Curve (color=hv.Palette('copper_r'))

plot_level_contours(lgb_regressor)

## Geo

In [28]:
def read_geo_data():
    data = pd.read_csv('geo_data/geo_data_example.tsv', sep='\t', header=None).drop_duplicates()
    y = data[0].values
    X = data[[1, 2, 3, 4]].values
    return train_test_split(X, y)

In [29]:
X_geo_train, X_geo_test, y_geo_train, y_geo_test = read_geo_data()

In [30]:
def get_regressor_geo_params(model_name):
    params = dict(
        max_depth=4,
        learning_rate=0.01,
        n_estimators=300
    )
    model_specific_params = {
        'catboost_w_constraint': dict(
            monotone_constraints='(1,1,1,1)',
            leaf_estimation_method='Gradient',
            verbose=False
        ),
        'catboost': dict(
            verbose=False
        ),
        'xgboost' : dict(
            monotone_constraints='(1,1,1,1)'
        ),
        'lgbm': dict(
            monotone_constraints='1,1,1,1'
        )
    }[model_name]
    params.update(model_specific_params)
    return params

In [31]:
cb_model = cb.CatBoostRegressor(**get_regressor_geo_params('catboost_w_constraint'))
cb_model.fit(X_geo_train, y_geo_train)

print_loss_value(cb_model, X_geo_test, y_geo_test)

Model CatBoostRegressor with monotone constraints (1,1,1,1). MSE value: 0.1996338299465789


In [32]:
cb_model = cb.CatBoostRegressor(**get_regressor_geo_params('catboost'))
cb_model.fit(X_geo_train, y_geo_train)

print_loss_value(cb_model, X_geo_test, y_geo_test)

Model CatBoostRegressor without monotone constraints. MSE value: 0.1850291707412642


In [33]:
xgb_model = xgb.XGBRegressor(**get_regressor_geo_params('xgboost'))
xgb_model.fit(X_geo_train, y_geo_train)

print_loss_value(xgb_model, X_geo_test, y_geo_test)

Model XGBRegressor with monotone constraints (1,1,1,1). MSE value: 0.20023321716531917


In [34]:
lgbm_model = lgb.LGBMRegressor(**get_regressor_geo_params('lgbm'))
lgbm_model.fit(X_geo_train, y_geo_train)

print_loss_value(lgbm_model, X_geo_test, y_geo_test)

Model LGBMRegressor with monotone constraints 1,1,1,1. MSE value: 0.20027754669445297
