In [1]:
import pandas as pd
import numpy as np
from itertools import combinations
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score
import warnings
warnings.filterwarnings("ignore")
np.random.seed(42)

In [2]:
df = pd.read_csv('train.csv')
y = df.target

df.drop(['ID', 'target'], axis=1, inplace=True)
df.fillna(-9999, inplace=True)
cat_features_ids = np.where(df.apply(pd.Series.nunique) < 30000)[0].tolist()

In [3]:
train, test, y_train, y_test = train_test_split(df, y, test_size = 0.1)

In [4]:
clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, eval_metric='AUC', logging_level='Silent')
clf.fit(train, y_train, cat_features=cat_features_ids)
prediction = clf.predict_proba(test)
print('Roc-auc score with Catboost without regularization:',roc_auc_score(y_test, prediction[:, 1]))

clf = CatBoostClassifier(learning_rate=0.1, iterations=100, random_seed=42, 
                         eval_metric='AUC', logging_level='Silent', l2_leaf_reg=3, 
                         model_size_reg = 3)
clf.fit(train, y_train, cat_features=cat_features_ids)
prediction = clf.predict_proba(test)
print('Roc-auc score with Catboost with regularization:',roc_auc_score(y_test, prediction[:, 1]))

Roc-auc score with Catboost without regularization: 0.7939610054617733
Roc-auc score with Catboost with regularization: 0.7961023589633582
