# The extensive evaluation of all variations of the estimators

In [6]:
import os

import numpy as np
import pandas as pd

from conditioning_bias import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from datasets import binclas_datasets, regr_datasets
from evaluation import evaluate_classification, evaluate_regression
from config import n_repeats, n_splits, dataset_map, data_dir, random_seed

In [7]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']
params = {}
postfix = ''

In [8]:
for label in labels:
    params[label] = {}
    best_params = pd.read_csv(os.path.join(data_dir, f'params_{label}.csv'))
    for idx, row in best_params.iterrows():
        params[label][row['name']] = eval(row['params'])

In [9]:
configurations = {
    'dtc': {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'modes': OperatorDecisionTreeClassifier().get_modes()
        },
    'rfc': {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'modes': OperatorRandomForestClassifier().get_modes()
        },
    'dtr': {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'modes': OperatorDecisionTreeRegressor().get_modes()
        },
    'rfr': {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'modes': OperatorRandomForestRegressor().get_modes()
        },
}

In [10]:
for key, conf in configurations.items():
    print(key, conf)

    results = conf['function'](
        datasets=conf['datasets'],
        estimator=conf['estimator'],
        params=params[key],
        modes=conf['modes'],
        validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
        random_seed=random_seed
    )

    results['name'] = results['name'].apply(lambda x: dataset_map.get(x, x))

    results.to_csv(os.path.join(data_dir, f'evaluation_{key}{postfix}.csv'))

dtc {'datasets':                             name citation_key  n_col    n  n_minority  n_grid  \
0                   appendicitis         keel      7  106          21       7   
1                       haberman         keel      3  306          81       3   
2                   new_thyroid1         keel      5  215          35       5   
3                         glass0         keel      9  214          70       9   
4               shuttle-6_vs_2-3         keel      9  230          10       9   
5                           bupa         keel      6  345         145       6   
6               cleveland-0_vs_4         keel     13  177          13      10   
7                         ecoli1         keel      7  336          77       5   
8                   poker-9_vs_7         keel     10  244           8      10   
9                         monk-2         keel      6  432         204       4   
10                     hepatitis         krnn     19  155          32       6   
11         

2023-12-11 21:01:59.878932 haberman
2023-12-11 21:02:30.229312 new_thyroid1
2023-12-11 21:02:56.303004 glass0
2023-12-11 21:03:20.125075 shuttle-6_vs_2-3
2023-12-11 21:03:40.161722 bupa
2023-12-11 21:04:05.695513 cleveland-0_vs_4
2023-12-11 21:04:29.506927 ecoli1
2023-12-11 21:04:53.040715 poker-9_vs_7
2023-12-11 21:05:13.121637 monk-2
2023-12-11 21:05:32.559041 hepatitis
2023-12-11 21:05:52.227245 yeast-0-3-5-9_vs_7-8
2023-12-11 21:06:15.498808 mammographic
2023-12-11 21:06:36.667445 saheart
2023-12-11 21:06:58.083683 page-blocks-1-3_vs_4
2023-12-11 21:07:17.730981 lymphography-normal-fibrosis
2023-12-11 21:07:36.212653 pima
2023-12-11 21:08:00.106214 wisconsin
2023-12-11 21:08:21.283253 abalone9_18
2023-12-11 21:08:42.215631 winequality-red-3_vs_5
rfc {'datasets':                             name citation_key  n_col    n  n_minority  n_grid  \
0                   appendicitis         keel      7  106          21       7   
1                       haberman         keel      3  306    

KeyboardInterrupt: 