In [7]:
import pandas as pd
import copy

from flipping_random_forest import (AveragedDecisionTreeClassifier,
                                    AveragedRandomForestClassifier,
                                    AveragedDecisionTreeRegressor,
                                    AveragedRandomForestRegressor,
                                    FlippingDecisionTreeClassifier,
                                    FlippingDecisionTreeRegressor,
                                    FlippingRandomForestClassifier,
                                    FlippingRandomForestRegressor)

from flipping_random_forest import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from evaluation import evaluate_classification, evaluate_regression
from config import n_splits, n_repeats, dataset_map
from datasets import binclas_datasets, regr_datasets

In [8]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']
#labels = ['dtc']
params = {}
postfix = ''

In [9]:
for label in labels:
    params[label] = {}
    best_params = pd.read_csv(f'params_{label}.csv')
    for idx, row in best_params.iterrows():
        params[label][row['name']] = eval(row['params'])

In [10]:
params

{'dtc': {'abalone9_18': {'random_state': 5, 'min_samples_leaf': 0.2},
  'appendicitis': {'random_state': 5, 'min_samples_leaf': 0.2},
  'bupa': {'random_state': 5, 'min_samples_leaf': 0.041},
  'cleveland-0_vs_4': {'random_state': 5, 'min_samples_leaf': 0.054},
  'ecoli1': {'random_state': 5, 'min_samples_leaf': 0.118},
  'glass0': {'random_state': 5, 'min_samples_leaf': 0.091},
  'haberman': {'random_state': 5, 'min_samples_leaf': 0.091},
  'hepatitis': {'random_state': 5, 'min_samples_leaf': 0.054},
  'lymphography-normal-fibrosis': {'random_state': 5, 'max_depth': 3},
  'mammographic': {'random_state': 5, 'min_samples_leaf': 0.07},
  'monk-2': {'random_state': 5, 'min_samples_leaf': 0.008},
  'new_thyroid1': {'random_state': 5, 'min_samples_leaf': 0.091},
  'page-blocks-1-3_vs_4': {'random_state': 5, 'min_samples_leaf': 0.041},
  'pima': {'random_state': 5, 'min_samples_leaf': 0.054},
  'poker-9_vs_7': {'random_state': 5, 'min_samples_leaf': 0.091},
  'saheart': {'random_state': 5, 

In [11]:
configurations = {'dtc':
    [
    {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'operator': '<='},
        'flabel': 'dtc_leq'
    },
    {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'operator': '<'},
        'flabel': 'dtc_l'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'mode': 'full'},
        'flabel': 'adtc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {'mode': 'random'},
        'flabel': 'adtc_rand'
    },
    {
        'datasets': binclas_datasets,
        'estimator': FlippingDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'params': {},
        'flabel': 'fdtc'
    }
    ],

    'rfc': [
    {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'operator': '<='},
        'flabel': 'rfc_leq'
    },
    {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'operator': '<'},
        'flabel': 'rfc_l'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'full'},
        'flabel': 'arfc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': AveragedRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'half'},
        'flabel': 'arfc_half'
    },
    {
        'datasets': binclas_datasets,
        'estimator': FlippingRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'full'},
        'flabel': 'frfc_full'
    },
    {
        'datasets': binclas_datasets,
        'estimator': FlippingRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'params': {'mode': 'random'},
        'flabel': 'frfc_rand'
    }
    ],

    'dtr': [
    {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'operator': '<='},
        'flabel': 'dtr_leq'
    },
    {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'operator': '<'},
        'flabel': 'dtr_l'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'mode': 'full'},
        'flabel': 'adtr_full'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {'mode': 'random'},
        'flabel': 'adtr_rand'
    },
    {
        'datasets': regr_datasets,
        'estimator': FlippingDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'params': {},
        'flabel': 'fdtr'
    }
    ],

    'rfr':[
    {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'operator': '<='},
        'flabel': 'rfr_leq'
    },
    {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'operator': '<'},
        'flabel': 'rfr_l'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'full'},
        'flabel': 'arfr_full'
    },
    {
        'datasets': regr_datasets,
        'estimator': AveragedRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'half'},
        'flabel': 'arfr_half'
    },
    {
        'datasets': regr_datasets,
        'estimator': FlippingRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'full'},
        'flabel': 'frfr_full'
    },
    {
        'datasets': regr_datasets,
        'estimator': FlippingRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'params': {'mode': 'random'},
        'flabel': 'frfr_rand'
    }
    ]
}

In [12]:
for key in configurations:
    tmp = []
    for conf in configurations[key]:
        print(key, conf['estimator'], conf['params'])
        param = conf['params']

        params_tmp = copy.deepcopy(params[key])
        for pkey, value in params_tmp.items():
            params_tmp[pkey] = param | value | {'random_state': 5}

        results = conf['function'](
            datasets=conf['datasets'],
            estimator=conf['estimator'],
            params=params_tmp,
            validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
            random_state=5
        )
        results['label'] = conf['flabel']
        tmp.append(results)

    results = pd.concat(tmp)

    results['name'] = results['name'].apply(lambda x: dataset_map.get(x, x))


    results.to_csv(f'evaluation_{key}{postfix}.csv')

dtc <class 'flipping_random_forest._operator_classifiers.OperatorDecisionTreeClassifier'> {'operator': '<='}
2023-11-22 15:30:33.199235 appendicitis


2023-11-22 15:30:35.936066 haberman
2023-11-22 15:30:38.839508 new_thyroid1
2023-11-22 15:30:42.031006 glass0
2023-11-22 15:30:44.903815 shuttle-6_vs_2-3
2023-11-22 15:30:47.478276 bupa
2023-11-22 15:30:50.452849 cleveland-0_vs_4
2023-11-22 15:30:52.774469 ecoli1
2023-11-22 15:30:54.972342 poker-9_vs_7
2023-11-22 15:30:57.216301 monk-2
2023-11-22 15:30:59.393093 hepatitis
2023-11-22 15:31:01.882448 yeast-0-3-5-9_vs_7-8
2023-11-22 15:31:05.181429 mammographic
2023-11-22 15:31:08.028297 saheart
2023-11-22 15:31:11.500947 page-blocks-1-3_vs_4
2023-11-22 15:31:14.699828 lymphography-normal-fibrosis
2023-11-22 15:31:17.405760 pima
2023-11-22 15:31:22.141997 wisconsin
2023-11-22 15:31:25.104819 abalone9_18
2023-11-22 15:31:28.437312 winequality-red-3_vs_5
dtc <class 'flipping_random_forest._operator_classifiers.OperatorDecisionTreeClassifier'> {'operator': '<'}
2023-11-22 15:31:31.568052 appendicitis
2023-11-22 15:31:34.233311 haberman
2023-11-22 15:31:36.911997 new_thyroid1
2023-11-22 15:31