In [1]:
import copy

import numpy as np
import pandas as pd

from flipping_random_forest import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from datasets import binclas_datasets, regr_datasets
from evaluation import evaluate_classification, evaluate_regression
from config import n_repeats, n_splits, dataset_map

2023-11-25 11:54:45 INFO     querying the filtered classification datasets
2023-11-25 11:54:45 INFO     ranking the datasets
2023-11-25 11:54:45 INFO     binary classification datasets prepared
2023-11-25 11:54:45 INFO     querying the filtered regression datasets
2023-11-25 11:54:45 INFO     ranking the datasets
2023-11-25 11:54:45 INFO     regression datasets prepared


In [2]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']
params = {}
postfix = ''

In [3]:
for label in labels:
    params[label] = {}
    best_params = pd.read_csv(f'params_{label}.csv')
    for idx, row in best_params.iterrows():
        params[label][row['name']] = eval(row['params'])

In [4]:
configurations = {
    'dtc': {
        'datasets': binclas_datasets,
        'estimator': OperatorDecisionTreeClassifier,
        'function': evaluate_classification,
        'label': 'dtc',
        'modes': OperatorDecisionTreeClassifier().get_modes()
        },
    'rfc': {
        'datasets': binclas_datasets,
        'estimator': OperatorRandomForestClassifier,
        'function': evaluate_classification,
        'label': 'rfc',
        'modes': OperatorRandomForestClassifier().get_modes()
        },
    'dtr': {
        'datasets': regr_datasets,
        'estimator': OperatorDecisionTreeRegressor,
        'function': evaluate_regression,
        'label': 'dtr',
        'modes': OperatorDecisionTreeRegressor().get_modes()
        },
    'rfr': {
        'datasets': regr_datasets,
        'estimator': OperatorRandomForestRegressor,
        'function': evaluate_regression,
        'label': 'rfr',
        'modes': OperatorRandomForestRegressor().get_modes()
        },
}

In [6]:
for key, conf in configurations.items():
    print(key, conf)

    results = conf['function'](
        datasets=conf['datasets'],
        estimator=conf['estimator'],
        params=params[key],
        modes=conf['modes'],
        validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
        random_state=5
    )


    results['name'] = results['name'].apply(lambda x: dataset_map.get(x, x))

    results.to_csv(f'evaluation_{key}{postfix}.csv')

dtc {'datasets':                             name citation_key  n_col    n  n_minority  n_grid  \
0                   appendicitis         keel      7  106          21       7   
1                       haberman         keel      3  306          81       3   
2                   new_thyroid1         keel      5  215          35       5   
3                         glass0         keel      9  214          70       9   
4               shuttle-6_vs_2-3         keel      9  230          10       9   
5                           bupa         keel      6  345         145       6   
6               cleveland-0_vs_4         keel     13  177          13      10   
7                         ecoli1         keel      7  336          77       5   
8                   poker-9_vs_7         keel     10  244           8      10   
9                         monk-2         keel      6  432         204       4   
10                     hepatitis         krnn     19  155          32       6   
11         