In [1]:
import pandas as pd
import numpy as np
import tscv

In [4]:
from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=2000,
    n_features=300,
    n_informative=50,
    n_redundant=100,
    n_repeated=3,
    n_classes=2,
    n_clusters_per_class=1,
    class_sep=0.8,
    random_state=0,
)

In [21]:
X

array([[-3.25881317e+01,  1.70874235e+01, -1.29391499e+01, ...,
        -8.51258792e-01, -1.53178446e+00, -1.50698396e-01],
       [-1.03656424e+01,  4.33730298e+00,  2.42950813e+00, ...,
        -1.15984265e+00, -2.33711722e+00, -2.14649610e+00],
       [ 1.08179340e+01,  1.63839904e+00, -1.08745900e+01, ...,
         6.16342485e-01,  4.74399400e-01, -2.22337845e-01],
       ...,
       [ 3.84771904e+00, -1.14674294e+00,  2.73913170e+01, ...,
         2.75171720e-01,  1.30260629e+00, -2.19881855e-01],
       [ 8.76114760e+00, -7.45586087e+00,  2.30390429e+01, ...,
        -1.25665604e-01,  1.68971976e-02, -1.68873449e+00],
       [ 2.43241546e+00, -1.84056652e+01, -4.06367385e+01, ...,
        -4.14220196e-01, -3.28572357e-01, -8.74997555e-01]])

In [7]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit
from catboost import CatBoostClassifier

min_features_to_select = 100
clf = CatBoostClassifier(verbose=0)
cv = tscv.GapKFold(n_splits=5, gap_before=200, gap_after=200)

rfecv = RFECV(
    estimator=clf,
    step=0.25,
    cv=cv,
    scoring="neg_log_loss",
    verbose=100,
    min_features_to_select=min_features_to_select
)

In [17]:
weight_train = np.random.random(len(y))
weight_train

fit_params = {"sample_weight": weight_train}

In [20]:
rfecv.fit_transform(X, y, **fit_params)

TypeError: RFECV.fit() got an unexpected keyword argument 'sample_weight'

In [23]:
rfecv.get_support()

array([ True, False,  True, False, False,  True, False, False, False,
        True,  True,  True,  True, False,  True,  True, False, False,
       False, False, False,  True,  True, False, False, False, False,
       False, False, False, False, False,  True,  True,  True, False,
        True, False, False, False, False,  True, False, False, False,
        True, False, False, False,  True, False, False, False, False,
       False, False, False,  True,  True,  True,  True, False,  True,
        True, False, False, False, False, False,  True,  True, False,
        True,  True, False, False, False, False, False,  True, False,
        True, False,  True, False, False, False, False, False, False,
        True, False, False,  True,  True,  True, False, False,  True,
       False, False,  True, False,  True, False, False,  True, False,
        True, False,  True,  True,  True, False, False, False,  True,
       False, False,  True, False,  True, False, False, False, False,
       False,  True,

In [14]:
rfecv.get_feature_names_out()

array(['x0', 'x2', 'x5', 'x9', 'x10', 'x11', 'x12', 'x14', 'x15', 'x21',
       'x22', 'x32', 'x33', 'x34', 'x36', 'x41', 'x45', 'x49', 'x57',
       'x58', 'x59', 'x60', 'x62', 'x63', 'x69', 'x70', 'x72', 'x73',
       'x79', 'x81', 'x83', 'x90', 'x93', 'x94', 'x95', 'x98', 'x101',
       'x103', 'x106', 'x108', 'x110', 'x111', 'x112', 'x116', 'x119',
       'x121', 'x127', 'x128', 'x133', 'x135', 'x136', 'x139', 'x140',
       'x141', 'x142', 'x144', 'x145', 'x147', 'x150', 'x151', 'x161',
       'x163', 'x164', 'x167', 'x169', 'x170', 'x171', 'x174', 'x177',
       'x178', 'x179', 'x180', 'x181', 'x182', 'x184', 'x187', 'x188',
       'x189', 'x190', 'x191', 'x195', 'x200', 'x202', 'x203', 'x206',
       'x207', 'x209', 'x211', 'x215', 'x219', 'x220', 'x223', 'x225',
       'x226', 'x227', 'x229', 'x233', 'x239', 'x242', 'x243', 'x246',
       'x249', 'x251', 'x252', 'x255', 'x259', 'x263', 'x266', 'x267',
       'x269', 'x270', 'x271', 'x274', 'x279', 'x281', 'x286', 'x287',
      

In [11]:
rfecv.cv_results_

{'mean_test_score': array([-0.16224116, -0.15948638, -0.16098204, -0.1636009 , -0.16551487,
        -0.16821978, -0.16792307, -0.16971655]),
 'std_test_score': array([0.01651491, 0.0161541 , 0.01583374, 0.01776252, 0.01885396,
        0.01680881, 0.01635067, 0.01737918]),
 'split0_test_score': array([-0.13832247, -0.13465514, -0.13727295, -0.13362952, -0.13685932,
        -0.14129542, -0.1418756 , -0.14068121]),
 'split1_test_score': array([-0.15829394, -0.15736918, -0.15829914, -0.16324372, -0.16181221,
        -0.16529728, -0.1683796 , -0.1705429 ]),
 'split2_test_score': array([-0.18602491, -0.18141887, -0.182056  , -0.18471007, -0.18982308,
        -0.19099321, -0.19054984, -0.19278182]),
 'split3_test_score': array([-0.15423169, -0.15224533, -0.15301172, -0.15843744, -0.15690623,
        -0.16352325, -0.16098853, -0.16450349]),
 'split4_test_score': array([-0.17433281, -0.17174341, -0.17427039, -0.17798374, -0.18217352,
        -0.17998975, -0.17782177, -0.18007336])}

In [12]:
rfecv.ranking_

array([1, 2, 1, 6, 4, 1, 3, 7, 3, 1, 1, 1, 1, 2, 1, 1, 3, 2, 4, 7, 5, 1,
       1, 4, 2, 5, 3, 7, 4, 3, 6, 7, 1, 1, 1, 5, 1, 6, 4, 7, 5, 1, 6, 2,
       2, 1, 2, 6, 3, 1, 4, 3, 4, 3, 5, 4, 7, 1, 1, 1, 1, 3, 1, 1, 5, 2,
       5, 7, 3, 1, 1, 2, 1, 1, 3, 5, 6, 7, 6, 1, 7, 1, 2, 1, 4, 2, 2, 4,
       2, 6, 1, 4, 6, 1, 1, 1, 6, 3, 1, 4, 7, 1, 3, 1, 3, 5, 1, 5, 1, 6,
       1, 1, 1, 2, 4, 4, 1, 2, 3, 1, 6, 1, 2, 5, 2, 4, 5, 1, 1, 4, 7, 7,
       2, 1, 7, 1, 1, 2, 6, 1, 1, 1, 1, 3, 1, 1, 7, 1, 2, 4, 1, 1, 3, 5,
       5, 4, 7, 7, 2, 2, 7, 1, 6, 1, 1, 6, 3, 1, 7, 1, 1, 1, 5, 5, 1, 7,
       2, 1, 1, 1, 1, 1, 1, 3, 1, 7, 3, 1, 1, 1, 1, 1, 4, 6, 4, 1, 4, 6,
       4, 3, 1, 7, 1, 1, 2, 2, 1, 1, 4, 1, 6, 1, 6, 2, 5, 1, 6, 7, 5, 1,
       1, 4, 5, 1, 3, 1, 1, 1, 4, 1, 7, 6, 6, 1, 6, 5, 4, 6, 3, 1, 7, 7,
       1, 1, 6, 6, 1, 3, 3, 1, 5, 1, 1, 3, 2, 1, 7, 6, 4, 1, 4, 5, 7, 1,
       5, 2, 1, 1, 7, 1, 1, 1, 5, 5, 1, 7, 2, 6, 3, 1, 5, 1, 6, 5, 3, 3,
       1, 1, 4, 2, 5, 5, 7, 5, 1, 1, 1, 3, 4, 6])

In [15]:
len(rfecv.ranking_), len(rfecv.get_feature_names_out())

(300, 120)

In [66]:
X

array([[ 0.88661908,  1.42146609,  1.10345691, ...,  0.03404206,
         0.23978821,  2.41669025],
       [-0.06950958,  1.007678  ,  0.28973477, ..., -0.57177621,
         1.5297184 ,  0.65710649],
       [ 1.18707986, -0.15726531,  2.39808412, ...,  0.47989883,
         0.65131619, -0.48580281],
       ...,
       [ 0.5187474 ,  0.26190761, -3.56036828, ...,  1.24963992,
         1.31375189,  1.5736302 ],
       [-0.72134957, -0.18112951, -0.395346  , ..., -0.21437854,
         0.59573587,  0.35230313],
       [ 0.47388275,  0.43178594, -1.97444937, ...,  0.12952139,
        -0.85605538, -0.07904277]])

In [24]:
from datetime import datetime, timedelta

In [25]:
n = datetime.now()
n

datetime.datetime(2023, 5, 24, 11, 18, 2, 579201)

In [26]:
d = timedelta(hours=6)

In [27]:
n + d

datetime.datetime(2023, 5, 24, 17, 18, 2, 579201)

In [28]:
n > n + d

False

In [30]:
params = {"verbose": 1}
clf.set_params(**params)

<catboost.core.CatBoostClassifier at 0x16976f670>