In [1]:
import pandas as pd
import copy

from flipping_random_forest import (AveragedDecisionTreeClassifier,
                                    AveragedRandomForestClassifier,
                                    AveragedDecisionTreeRegressor,
                                    AveragedRandomForestRegressor,
                                    FlippingDecisionTreeClassifier,
                                    FlippingDecisionTreeRegressor,
                                    FlippingRandomForestClassifier,
                                    FlippingRandomForestRegressor,
                                    SpecificDecisionTreeClassifier,
                                    SpecificRandomForestClassifier,
                                    SpecificDecisionTreeRegressor,
                                    SpecificRandomForestRegressor)

from flipping_random_forest import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from evaluation import evaluate_classification, evaluate_regression
from config import n_splits, n_repeats, dataset_map
from datasets import binclas_datasets, regr_datasets

2023-11-23 20:30:05 INFO     querying the filtered classification datasets
2023-11-23 20:30:05 INFO     ranking the datasets
2023-11-23 20:30:05 INFO     binary classification datasets prepared
2023-11-23 20:30:05 INFO     querying the filtered regression datasets
2023-11-23 20:30:05 INFO     ranking the datasets
2023-11-23 20:30:05 INFO     regression datasets prepared


In [3]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']
#labels = ['dtc']
params = {}
postfix = ''

In [4]:
for label in labels:
    params[label] = {}
    best_params = pd.read_csv(f'params_{label}.csv')
    for idx, row in best_params.iterrows():
        params[label][row['name']] = eval(row['params'])

In [5]:
params

{'dtc': {'abalone9_18': {'random_state': 5, 'min_samples_leaf': 0.2},
  'appendicitis': {'random_state': 5, 'min_samples_leaf': 0.2},
  'bupa': {'random_state': 5, 'min_samples_leaf': 0.041},
  'cleveland-0_vs_4': {'random_state': 5, 'min_samples_leaf': 0.054},
  'ecoli1': {'random_state': 5, 'min_samples_leaf': 0.118},
  'glass0': {'random_state': 5, 'min_samples_leaf': 0.091},
  'haberman': {'random_state': 5, 'min_samples_leaf': 0.091},
  'hepatitis': {'random_state': 5, 'min_samples_leaf': 0.054},
  'lymphography-normal-fibrosis': {'random_state': 5, 'max_depth': 3},
  'mammographic': {'random_state': 5, 'min_samples_leaf': 0.07},
  'monk-2': {'random_state': 5, 'min_samples_leaf': 0.008},
  'new_thyroid1': {'random_state': 5, 'min_samples_leaf': 0.091},
  'page-blocks-1-3_vs_4': {'random_state': 5, 'min_samples_leaf': 0.041},
  'pima': {'random_state': 5, 'min_samples_leaf': 0.054},
  'poker-9_vs_7': {'random_state': 5, 'min_samples_leaf': 0.091},
  'saheart': {'random_state': 5, 

In [6]:
configurations = {'dtc':
    [
    {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'operator': '<='},
        'flabel': 'dtc_leq'
    },
    {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'operator': '<'},
        'flabel': 'dtc_l'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'mode': 'full'},
        'flabel': 'adtc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'mode': 'random'},
        'flabel': 'adtc_rand'
    },
    {
        'datasets': binclas_datasets,
        'estimator': FlippingDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {},
        'flabel': 'fdtc'
    },
    {
        'datasets': binclas_datasets,
        'estimator': SpecificDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {},
        'flabel': 'sdtc'
    }
    ],

    'rfc': [
    {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'operator': '<='},
        'flabel': 'rfc_leq'
    },
    {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'operator': '<'},
        'flabel': 'rfc_l'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'full'},
        'flabel': 'arfc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': FlippingRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'full'},
        'flabel': 'frfc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': SpecificRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {},
        'flabel': 'srfc'
    }
    ],

    'dtr': [
    {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'operator': '<='},
        'flabel': 'dtr_leq'
    },
    {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'operator': '<'},
        'flabel': 'dtr_l'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'mode': 'full'},
        'flabel': 'adtr_full'
    },

    {
        'datasets': regr_datasets,
        'estimator': FlippingDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {},
        'flabel': 'fdtr'
    },
    {
        'datasets': regr_datasets,
        'estimator': SpecificDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {},
        'flabel': 'sdtr'
    }
    ],

    'rfr':[
    {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'operator': '<='},
        'flabel': 'rfr_leq'
    },
    {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'operator': '<'},
        'flabel': 'rfr_l'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'full'},
        'flabel': 'arfr_full'
    },
    {
        'datasets': regr_datasets,
        'estimator': FlippingRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'full'},
        'flabel': 'frfr_full'
    },
    {
        'datasets': regr_datasets,
        'estimator': SpecificRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {},
        'flabel': 'srfr'
    }
    ]
}

In [7]:
for key in configurations:
    tmp = []
    for conf in configurations[key]:
        print(key, conf['estimator'], conf['params'])
        param = conf['params']

        params_tmp = copy.deepcopy(params[key])
        for pkey, value in params_tmp.items():
            params_tmp[pkey] = param | value | {'random_state': 5}

        results = conf['function'](
            datasets=conf['datasets'],
            estimator=conf['estimator'],
            params=params_tmp,
            validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
            random_state=5
        )
        results['label'] = conf['flabel']
        tmp.append(results)

    results = pd.concat(tmp)

    results['name'] = results['name'].apply(lambda x: dataset_map.get(x, x))


    results.to_csv(f'evaluation_{key}{postfix}.csv')

dtc <class 'flipping_random_forest._operator_classifiers.OperatorDecisionTreeClassifier'> {'operator': '<='}
2023-11-23 20:30:05.550440 appendicitis
2023-11-23 20:30:05.599952 haberman
2023-11-23 20:30:05.660993 new_thyroid1
2023-11-23 20:30:05.758013 glass0
2023-11-23 20:30:05.823809 shuttle-6_vs_2-3
2023-11-23 20:30:05.879622 bupa
2023-11-23 20:30:05.952548 cleveland-0_vs_4


2023-11-23 20:30:06.014818 ecoli1
2023-11-23 20:30:06.092680 poker-9_vs_7
2023-11-23 20:30:06.211172 monk-2
2023-11-23 20:30:06.333770 hepatitis
2023-11-23 20:30:06.420548 yeast-0-3-5-9_vs_7-8
2023-11-23 20:30:06.464139 mammographic
2023-11-23 20:30:06.528178 saheart
2023-11-23 20:30:06.579312 page-blocks-1-3_vs_4
2023-11-23 20:30:06.690741 lymphography-normal-fibrosis
2023-11-23 20:30:06.746840 pima
2023-11-23 20:30:06.837020 wisconsin
2023-11-23 20:30:06.930466 abalone9_18
2023-11-23 20:30:06.988298 winequality-red-3_vs_5
dtc <class 'flipping_random_forest._operator_classifiers.OperatorDecisionTreeClassifier'> {'operator': '<'}
2023-11-23 20:30:07.021283 appendicitis
2023-11-23 20:30:07.048773 haberman
2023-11-23 20:30:07.081771 new_thyroid1
2023-11-23 20:30:07.118287 glass0
2023-11-23 20:30:07.168824 shuttle-6_vs_2-3
2023-11-23 20:30:07.238401 bupa
2023-11-23 20:30:07.367719 cleveland-0_vs_4
2023-11-23 20:30:07.453398 ecoli1
2023-11-23 20:30:07.528417 poker-9_vs_7
2023-11-23 20:30:0