In [1]:
import datetime
import json

import numpy as np
import pandas as pd

from flipping_random_forest import (OperatorDecisionTreeClassifier,
                                    OperatorDecisionTreeRegressor,
                                    OperatorRandomForestClassifier,
                                    OperatorRandomForestRegressor)

from datasets import binclas_datasets, regr_datasets

from evaluation import evaluate_classification, evaluate_regression
from config import n_splits, n_repeats

from scipy.stats import wilcoxon

2023-11-19 12:12:42 INFO     querying the filtered classification datasets
2023-11-19 12:12:42 INFO     ranking the datasets
2023-11-19 12:12:42 INFO     binary classification datasets prepared
2023-11-19 12:12:42 INFO     querying the filtered regression datasets
2023-11-19 12:12:42 INFO     ranking the datasets
2023-11-19 12:12:42 INFO     regression datasets prepared


In [2]:
results = evaluate_classification(
    datasets=binclas_datasets,
    estimator=OperatorDecisionTreeClassifier,
    params=[{'random_state': 5, 'operator': '<'}, {'random_state': 5, 'operator': '<='}],
    validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
    random_state=5
)
results.to_csv('effect_presence_dtc.csv')

2023-11-19 12:12:42.826973 appendicitis
2023-11-19 12:12:42.877119 haberman
2023-11-19 12:12:42.923216 new_thyroid1
2023-11-19 12:12:42.965672 glass0
2023-11-19 12:12:43.009166 shuttle-6_vs_2-3
2023-11-19 12:12:43.052019 bupa
2023-11-19 12:12:43.126179 cleveland-0_vs_4
2023-11-19 12:12:43.175346 ecoli1
2023-11-19 12:12:43.223508 poker-9_vs_7
2023-11-19 12:12:43.264717 monk-2
2023-11-19 12:12:43.336579 hepatitis
2023-11-19 12:12:43.401131 yeast-0-3-5-9_vs_7-8
2023-11-19 12:12:43.455408 mammographic
2023-11-19 12:12:43.525204 saheart
2023-11-19 12:12:43.594714 page-blocks-1-3_vs_4
2023-11-19 12:12:43.707136 lymphography-normal-fibrosis
2023-11-19 12:12:43.768479 pima
2023-11-19 12:12:43.860134 wisconsin
2023-11-19 12:12:43.921856 abalone9_18
2023-11-19 12:12:44.019836 winequality-red-3_vs_5


In [3]:
results = evaluate_regression(
    datasets=regr_datasets,
    estimator=OperatorDecisionTreeRegressor,
    params=[{'random_state': 5, 'operator': '<'}, {'random_state': 5, 'operator': '<='}],
    validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
    random_state=5
)
results.to_csv('effect_presence_dtr.csv')

2023-11-19 12:12:44.084864 diabetes
2023-11-19 12:12:44.132267 o-ring
2023-11-19 12:12:44.455253 stock_portfolio_performance
2023-11-19 12:12:44.492354 wsn-ale
2023-11-19 12:12:44.549519 daily-demand
2023-11-19 12:12:44.596373 slump_test
2023-11-19 12:12:44.647100 servo
2023-11-19 12:12:44.693089 yacht_hydrodynamics
2023-11-19 12:12:44.739984 autoMPG6
2023-11-19 12:12:44.780500 excitation_current
2023-11-19 12:12:44.885580 real_estate_valuation
2023-11-19 12:12:44.962705 wankara
2023-11-19 12:12:45.011466 plastic
2023-11-19 12:12:45.063468 laser
2023-11-19 12:12:45.136423 qsar-aquatic-toxicity
2023-11-19 12:12:45.228953 baseball
2023-11-19 12:12:45.317655 maternal_health_risk
2023-11-19 12:12:45.423750 cpu_performance
2023-11-19 12:12:45.506005 airfoil
2023-11-19 12:12:45.593667 medical_cost


In [4]:
results = evaluate_classification(
    datasets=binclas_datasets,
    estimator=OperatorRandomForestClassifier,
    params=[{'random_state': 5, 'operator': '<'}, {'random_state': 5, 'operator': '<='}],
    validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
    random_state=5
)
results.to_csv('effect_presence_rfc.csv')

2023-11-19 12:12:45.679120 appendicitis
2023-11-19 12:12:46.761406 haberman
2023-11-19 12:12:48.267388 new_thyroid1
2023-11-19 12:12:49.356106 glass0
2023-11-19 12:12:50.690502 shuttle-6_vs_2-3
2023-11-19 12:12:51.670916 bupa
2023-11-19 12:12:52.830258 cleveland-0_vs_4
2023-11-19 12:12:53.723970 ecoli1
2023-11-19 12:12:54.795828 poker-9_vs_7
2023-11-19 12:12:55.653225 monk-2
2023-11-19 12:12:56.691131 hepatitis
2023-11-19 12:12:57.721338 yeast-0-3-5-9_vs_7-8
2023-11-19 12:12:58.989299 mammographic
2023-11-19 12:13:00.617156 saheart
2023-11-19 12:13:02.212790 page-blocks-1-3_vs_4
2023-11-19 12:13:03.522015 lymphography-normal-fibrosis
2023-11-19 12:13:04.542882 pima
2023-11-19 12:13:06.300598 wisconsin
2023-11-19 12:13:07.504918 abalone9_18
2023-11-19 12:13:09.027432 winequality-red-3_vs_5


In [5]:
results = evaluate_regression(
    datasets=regr_datasets,
    estimator=OperatorRandomForestRegressor,
    params=[{'random_state': 5, 'operator': '<'}, {'random_state': 5, 'operator': '<='}],
    validator_params={'n_splits': n_splits, 'n_repeats': n_repeats, 'random_state': 5},
    random_state=5
)
results.to_csv('effect_presence_rfr.csv')

2023-11-19 12:13:10.843799 diabetes
2023-11-19 12:13:11.733517 o-ring
2023-11-19 12:13:13.658832 stock_portfolio_performance
2023-11-19 12:13:14.561124 wsn-ale
2023-11-19 12:13:15.631763 daily-demand
2023-11-19 12:13:16.543776 slump_test
2023-11-19 12:13:17.584782 servo
2023-11-19 12:13:18.528835 yacht_hydrodynamics
2023-11-19 12:13:19.806896 autoMPG6
2023-11-19 12:13:21.344635 excitation_current
2023-11-19 12:13:22.918849 real_estate_valuation
2023-11-19 12:13:24.748180 wankara
2023-11-19 12:13:26.796870 plastic
2023-11-19 12:13:29.622746 laser
2023-11-19 12:13:32.339170 qsar-aquatic-toxicity
2023-11-19 12:13:34.712605 baseball
2023-11-19 12:13:37.209113 maternal_health_risk
2023-11-19 12:13:39.435479 cpu_performance
2023-11-19 12:13:40.724789 airfoil
2023-11-19 12:13:43.594598 medical_cost
