In [1]:
import numpy
import pandas
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import RFECV

In [2]:
N = 10_000
data = pandas.DataFrame(data={'A': numpy.array(numpy.arange(N)), 
                              'B': numpy.array(numpy.arange(N)) + numpy.random.normal(size=(N,)), 
                              'C': numpy.random.normal(size=(N,))})
data

Unnamed: 0,A,B,C
0,0,-0.966261,0.322247
1,1,0.811972,1.139800
2,2,2.635955,0.588994
3,3,3.801243,1.233301
4,4,6.290001,0.590689
...,...,...,...
9995,9995,9993.582349,-0.631134
9996,9996,9994.927168,-2.772468
9997,9997,9996.984825,0.391744
9998,9998,9998.598723,1.056614


In [6]:
X, Y = data[['B', 'C']].values, data[['A']].values

In [12]:
model = ExtraTreesRegressor()
model.fit(X, Y.ravel())

ExtraTreesRegressor()

In [13]:
model.score(X, Y.ravel())

0.99999999975112

In [25]:
rfe = RFECV(ExtraTreesRegressor())

In [29]:
rfe.estimator.get_params()

{'bootstrap': False,
 'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [21]:
rfe.fit(X, Y.ravel())

RFECV(estimator=ExtraTreesRegressor())

In [22]:
rfe.support_

array([ True,  True])

In [23]:
rfe.score(X, Y.ravel())

0.9999999997445878

In [24]:
rfe.estimator_

ExtraTreesRegressor()

In [13]:
from mpydge.wrap.models.all import rfe_DTR

In [14]:
model = rfe_DTR()

In [15]:
model.fit(X, Y)

In [16]:
model

rfe_DTR()

In [17]:
model.score(X, Y)

0.999999999988

In [18]:
model.predict(X)

array([0.000e+00, 1.000e+00, 2.000e+00, ..., 9.997e+03, 9.998e+03,
       9.999e+03])

In [3]:
from sklearn.tree import DecisionTreeRegressor as sk_DTR

In [4]:
class rfe_DTR(sk_DTR):

    def fit(self, X, Y):
        params = self.get_params()
        print('got the params:')
        print(params)
        model = sk_DTR(**params)
        print('initialized the model')
        self.rfe = RFECV(model)
        print('the model is under rfe')
        self.rfe.fit(X, Y)
        print('finished rfe')

    def predict(self, X):
        return self.rfe.predict(X)

    def score(self, X, Y):
        return self.rfe.score(X, Y)

In [8]:
testy = rfe_DTR()

In [9]:
testy.fit(X, Y.ravel())

got the params:
{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}
initialized the model
the model is under rfe
finished rfe


In [10]:
testy.get_params()

{'ccp_alpha': 0.0,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'presort': 'deprecated',
 'random_state': None,
 'splitter': 'best'}

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
mody = rfe_DTR()
params = {'max_depth': [None, 10]}
gscv = GridSearchCV(mody, params)

In [13]:
gscv.fit(X, Y.ravel())

got the params:
{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}
initialized the model
the model is under rfe
finished rfe
got the params:
{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}
initialized the model
the model is under rfe
finished rfe
got the params:
{'ccp_alpha': 0.0, 'criterion': 'mse', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weig

GridSearchCV(estimator=rfe_DTR(), param_grid={'max_depth': [None, 10]})