In [1]:
import copy

import numpy as np
import pandas as pd

from flipping_random_forest import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from datasets import binclas_datasets, regr_datasets
from evaluation import evaluate_classification, evaluate_regression
from config import n_repeats_10, n_splits_10, dataset_map

2023-12-06 17:27:30 INFO     querying the filtered classification datasets
2023-12-06 17:27:30 INFO     ranking the datasets
2023-12-06 17:27:30 INFO     binary classification datasets prepared
2023-12-06 17:27:30 INFO     querying the filtered regression datasets
2023-12-06 17:27:30 INFO     ranking the datasets
2023-12-06 17:27:30 INFO     regression datasets prepared


In [2]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']
params = {}
postfix = '_10'

In [3]:
for label in labels:
    params[label] = {}
    best_params = pd.read_csv(f'params_{label}.csv')
    for idx, row in best_params.iterrows():
        params[label][row['name']] = eval(row['params'])

In [4]:
configurations = {
    'dtc': {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'modes': OperatorDecisionTreeClassifier().get_modes()
        },
    'rfc': {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'modes': OperatorRandomForestClassifier().get_modes()
        },
    'dtr': {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'modes': OperatorDecisionTreeRegressor().get_modes()
        },
    'rfr': {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'modes': OperatorRandomForestRegressor().get_modes()
        },
}

In [5]:
for key, conf in configurations.items():
    print(key, conf)

    results = conf['function'](
        datasets=conf['datasets'],
        estimator=conf['estimator'],
        params=params[key],
        modes=conf['modes'],
        validator_params={'n_splits': n_splits_10, 'n_repeats': n_repeats_10, 'random_state': 5},
        random_state=5
    )


    results['name'] = results['name'].apply(lambda x: dataset_map.get(x, x))

    results.to_csv(f'evaluation_{key}{postfix}.csv')

dtc {'datasets':                             name citation_key  n_col    n  n_minority  n_grid  \
0                   appendicitis         keel      7  106          21       7   
1                       haberman         keel      3  306          81       3   
2                   new_thyroid1         keel      5  215          35       5   
3                         glass0         keel      9  214          70       9   
4               shuttle-6_vs_2-3         keel      9  230          10       9   
5                           bupa         keel      6  345         145       6   
6               cleveland-0_vs_4         keel     13  177          13      10   
7                         ecoli1         keel      7  336          77       5   
8                   poker-9_vs_7         keel     10  244           8      10   
9                         monk-2         keel      6  432         204       4   
10                     hepatitis         krnn     19  155          32       6   
11         

2023-12-06 17:28:09.802741 haberman
2023-12-06 17:28:39.224464 new_thyroid1
2023-12-06 17:29:12.199054 glass0
2023-12-06 17:29:54.469138 shuttle-6_vs_2-3
2023-12-06 17:30:33.207426 bupa
2023-12-06 17:31:11.402978 cleveland-0_vs_4
2023-12-06 17:32:10.683543 ecoli1
2023-12-06 17:32:55.072462 poker-9_vs_7
2023-12-06 17:33:32.645596 monk-2
2023-12-06 17:34:11.694154 hepatitis
2023-12-06 17:35:31.727605 yeast-0-3-5-9_vs_7-8
2023-12-06 17:36:15.536340 mammographic
2023-12-06 17:36:48.618353 saheart
2023-12-06 17:37:33.562669 page-blocks-1-3_vs_4
2023-12-06 17:38:20.439605 lymphography-normal-fibrosis
2023-12-06 17:39:15.126833 pima
2023-12-06 17:40:01.165326 wisconsin
2023-12-06 17:40:48.011785 abalone9_18
2023-12-06 17:41:40.666920 winequality-red-3_vs_5
rfc {'datasets':                             name citation_key  n_col    n  n_minority  n_grid  \
0                   appendicitis         keel      7  106          21       7   
1                       haberman         keel      3  306    