In [1]:
from river.tree import HoeffdingTreeRegressor, HoeffdingTreeRegressorCS
from river.metrics import MAE
from riverevaluate.eval import CompareModels
import openml
from river.ensemble import BaggingRegressor
from river.feature_selection import SelectKBest
from river import stats
from river import compose

In [2]:
# Get dataset by ID
kin8nm = openml.datasets.get_dataset(189)
kin8nm_data, _, _, _ = kin8nm.get_data(dataset_format="dataframe")

In [3]:
kin8nm_targets = kin8nm_data[['y']]
kin8nm_features = kin8nm_data.drop('y', axis=1)

In [4]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
abalone = fetch_ucirepo(id=1)
wine_quality = fetch_ucirepo(id=186)
  
# data (as pandas dataframes) 
X = [abalone.data.features, wine_quality.data.features, kin8nm_features] 
y = [abalone.data.targets, wine_quality.data.targets, kin8nm_targets]

In [5]:
transformers = []
for i in range(1,5):
    transformers.append(SelectKBest(similarity=stats.PearsonCorr(), k=i))

In [None]:
models = [
    HoeffdingTreeRegressor(),
    HoeffdingTreeRegressor(regularize_factor=0.2),
    HoeffdingTreeRegressor(regularize_factor=0.3),
    HoeffdingTreeRegressor(regularize_factor=0.5),
]

In [7]:
for transformer in transformers:
    models.append(
        BaggingRegressor(
            model=HoeffdingTreeRegressorCS(
                transformer=transformer,
                leaf_prediction='mean'
            ) 
        )
    )

In [8]:
model_pipeline = compose.Pipeline(
    ('k-best', SelectKBest(similarity=stats.PearsonCorr(), k=2)),
    ('model', HoeffdingTreeRegressor(leaf_prediction='mean'))
)

models.append(model_pipeline)
models.append(
    HoeffdingTreeRegressorCS(
        SelectKBest(similarity=stats.PearsonCorr(), k=2), 
        leaf_prediction='mean'
    )
)

In [9]:
models_map = {
    0: 'vanilla',
    1: 'regularized-0.2',
    2: 'regularized-0.3',
    3: 'regularized-0.5',
    4: 'bag-cs-1-mean',
    5: 'bag-cs-2-mean',
    6: 'bag-cs-3-mean',
    7: 'bag-cs-4-mean',
    8: 'htr-pipe-cs-2-mean',
    9: 'htr-cs-2-mean'
}

In [10]:
dataset_map = {
    0: 'abalone',
    1: 'winequality',
    2: 'kin8nm',
}

In [11]:
evaluate = CompareModels(
    models=models,
    features=X,
    targets=y,
    models_map = models_map,
    dataset_map = dataset_map
)

In [12]:
results = evaluate.summary_results()

In [19]:
results[(results['model'] == 'htr-cs-2-mean') | (results['model'] == 'htr-pipe-cs-2-mean')].sort_values('dataset')

Unnamed: 0,model,runtime,dataset,error,memory
24,htr-pipe-cs-2-mean,0.621123,abalone,2.173781,751805
27,htr-cs-2-mean,0.387224,abalone,2.141808,713110
26,htr-pipe-cs-2-mean,1.624314,kin8nm,4.369913,751805
29,htr-cs-2-mean,0.744003,kin8nm,4.263023,713110
25,htr-pipe-cs-2-mean,1.569617,winequality,1.961644,751805
28,htr-cs-2-mean,0.708191,winequality,2.017159,713110


In [14]:
results.groupby('model').agg(
    error=('error', 'mean'),
    runtime=('runtime', 'mean'),
    memory_usage=('memory', 'mean')
)

Unnamed: 0_level_0,error,runtime,memory_usage
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bag-cs-1-mean,2.18465,16.043082,7292859.0
bag-cs-2-mean,2.598898,8.102845,6957575.0
bag-cs-3-mean,2.669526,7.932587,6813595.0
bag-cs-4-mean,2.830043,6.638103,6595491.0
htr-cs-2-mean,2.80733,0.613139,713110.0
htr-pipe-cs-2-mean,2.835113,1.271685,751805.0
regularized-0.2,1.179636,1.634213,747743.0
regularized-0.3,1.179636,1.81982,747743.0
regularized-0.5,1.225672,1.390521,737555.0
vanilla,1.549973,1.52658,715671.0
