In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

import sys
sys.path.append('..')

from helpers import utils, pipelines, models

from sklearn.model_selection import train_test_split

import multiprocessing

from data_augmentaion.data_augmentator import DataAugmentor 

import json

import time

### Define research parameters

In [3]:
regression_task = False
# continuous_features = []
# metric = 'accuracy'
metrics = ['f1_weighted', 'accuracy', 'balanced_accuracy', 'precision_weighted', 'recall_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'] 

test_size_proportion = 0.2
augment_sample = 0.5

search_pipelines = pipelines.get_classification_pipelines()
search_parameters = models.parameters

settings = [
    {'method': 'random'},
    {'method': 'smote'},
    {'method': 'cf_random'},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 1}},
    {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 1}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 1}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 0.2}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 1}},
]

In [4]:
df = pd.read_csv(r"..\datasets\cirrhosis.csv")
df.drop(columns="ID",inplace=True)
target = 'Status'

In [5]:
missing_df = df.isna().sum()
dtypes_df = df.dtypes
pd.concat([missing_df, dtypes_df], axis=1)

Unnamed: 0,0,1
N_Days,0,int64
Status,0,object
Drug,106,object
Age,0,int64
Sex,0,object
Ascites,106,object
Hepatomegaly,106,object
Spiders,106,object
Edema,0,object
Bilirubin,0,float64


In [6]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_selector


numerical_imputer = SimpleImputer(strategy='mean')#.set_output(transform='pandas')
categorical_imputer = SimpleImputer(strategy='most_frequent')#.set_output(transform='pandas')

final_imputer = ColumnTransformer([
    ('numerical', numerical_imputer, make_column_selector(dtype_include=['int', 'float'])),
    ('categorical', categorical_imputer, make_column_selector(dtype_include=['object'])),
])

df = final_imputer.fit_transform(df)

In [7]:
df = pd.DataFrame(df, columns=[c.split('__')[-1] for c in final_imputer.get_feature_names_out()]).astype(dtypes_df)

In [8]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df[target] = le.fit_transform(df[target])

#### split data

In [9]:
# all data
X = df.drop(target, axis=1)
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_proportion, random_state=42)

In [10]:
continuous_features = X.select_dtypes(include=['number']).columns.tolist()
continuous_features

['N_Days',
 'Age',
 'Bilirubin',
 'Cholesterol',
 'Albumin',
 'Copper',
 'Alk_Phos',
 'SGOT',
 'Tryglicerides',
 'Platelets',
 'Prothrombin',
 'Stage']

In [11]:
display(y_train.value_counts())

0    188
2    125
1     21
Name: Status, dtype: int64

In [12]:
full_results_df = pd.DataFrame()

### Whole dataset scores

In [13]:
print(f'whole dataset scores:')
_, whole_scores = utils.fit_and_evaluate(X_train, y_train, X_test, y_test,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
whole_dataset_result_df = pd.DataFrame.from_dict(whole_scores, orient='index')
whole_dataset_result_df.columns = pd.MultiIndex.from_product([['whole']] + [whole_dataset_result_df.columns])
display(whole_dataset_result_df.T)

whole dataset scores:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,Unnamed: 1,lg,rf,xgb
whole,f1_weighted,0.731732,0.773441,0.742029
whole,accuracy,0.738095,0.797619,0.761905
whole,balanced_accuracy,0.524411,0.54798,0.52862
whole,precision_weighted,0.71434,0.776045,0.727952
whole,recall_weighted,0.738095,0.797619,0.761905
whole,roc_auc_ovr_weighted,0.871974,0.860169,0.867857
whole,roc_auc_ovo_weighted,0.839075,0.797379,0.837963


In [14]:
full_results_df = pd.concat([full_results_df, whole_dataset_result_df], axis=1)

### little flattening the y-curve

In [15]:
y_train_value_counts = y_train.value_counts()
display(y_train_value_counts)
classes_gap = 50 - y_train.value_counts()
classes_to_inflate = classes_gap[classes_gap>0]
df_inflate = pd.DataFrame()
for c, gap in classes_to_inflate.items():
    df_c_inflate = df[df[target]==c].sample(n=gap, replace=True, random_state=42) # , replace=True
    df_inflate = pd.concat([df_inflate, df_c_inflate])
df_inflate[target].value_counts()


0    188
2    125
1     21
Name: Status, dtype: int64

1    29
Name: Status, dtype: int64

In [16]:
X_train_inflate = pd.concat([X_train, df_inflate.drop(target, axis=1)])
y_train_inflate = pd.concat([y_train, df_inflate[target]])

In [17]:
print(f'inflate dataset scores:')
_, inflate_scores = utils.fit_and_evaluate(X_train_inflate, y_train_inflate, X_test, y_test,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
inflate_dataset_result_df = pd.DataFrame.from_dict(inflate_scores, orient='index')
inflate_dataset_result_df.columns = pd.MultiIndex.from_product([['inflate']] + [inflate_dataset_result_df.columns])
display(inflate_dataset_result_df.T)

inflate dataset scores:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,Unnamed: 1,lg,rf,xgb
inflate,f1_weighted,0.724538,0.773441,0.791364
inflate,accuracy,0.738095,0.797619,0.797619
inflate,balanced_accuracy,0.526094,0.54798,0.628788
inflate,precision_weighted,0.711557,0.760547,0.797642
inflate,recall_weighted,0.738095,0.797619,0.797619
inflate,roc_auc_ovr_weighted,0.883581,0.87629,0.887897
inflate,roc_auc_ovo_weighted,0.879539,0.882756,0.905829


In [18]:
full_results_df = pd.concat([full_results_df, inflate_dataset_result_df], axis=1)

In [19]:
full_results_df

Unnamed: 0_level_0,whole,whole,whole,whole,whole,whole,whole,inflate,inflate,inflate,inflate,inflate,inflate,inflate
Unnamed: 0_level_1,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted
lg,0.731732,0.738095,0.524411,0.71434,0.738095,0.871974,0.839075,0.724538,0.738095,0.526094,0.711557,0.738095,0.883581,0.879539
rf,0.773441,0.797619,0.54798,0.776045,0.797619,0.860169,0.797379,0.773441,0.797619,0.54798,0.760547,0.797619,0.87629,0.882756
xgb,0.742029,0.761905,0.52862,0.727952,0.761905,0.867857,0.837963,0.791364,0.797619,0.628788,0.797642,0.797619,0.887897,0.905829


### running experiments

In [20]:
best_estimators = {}
best_scores = {}
total_time = time.time()
for i, s in enumerate(settings):
    start = time.time()
    print(f'{i+1} / {len(settings)}, {s}', end=' ')

    if 'cf' in s['method']:
        X_train_for_balance_augmemt, y_train_for_balance_augmemt = X_train_inflate, y_train_inflate
    else:
        X_train_for_balance_augmemt, y_train_for_balance_augmemt = X_train, y_train
    augmentor = DataAugmentor(X_train_for_balance_augmemt, y_train_for_balance_augmemt, X_test, y_test,
                             method=s['method'], regression=regression_task,
                             continuous_feats=continuous_features,
                             cf_scoring='balanced_accuracy',
                             kw_args=s.get('kw_args', {})
                             )

    X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
    best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
    utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
                            search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
    result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'], orient='index')
    result_df_balanced.columns = pd.MultiIndex.from_product([[f'{json.dumps((list(s.values())))} balanced']] + [result_df_balanced.columns])


    X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
    best_estimators[f'{i}'], best_scores[f'{i}'] = \
        utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                               search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
    result_df = pd.DataFrame.from_dict(best_scores[f'{i}'], orient='index')
    result_df.columns = pd.MultiIndex.from_product([[f'{json.dumps((list(s.values())))}']] + [result_df.columns])


    full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
    print(f'{time.time() - start} seconds for settings {i}')

print(f'\nTotal time: {time.time() - total_time}')

1 / 4, {'method': 'random'} 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


11.831823587417603 seconds for settings 0
2 / 4, {'method': 'smote'} 12.184007406234741 seconds for settings 1
3 / 4, {'method': 'cf_random'} model for cf balanced_accuracy score: {'cf': {'balanced_accuracy': 0.7382154882154882}}
0/63


100%|██████████| 1/1 [00:00<00:00,  3.03it/s]
100%|██████████| 1/1 [00:00<00:00,  3.01it/s]
100%|██████████| 1/1 [00:00<00:00,  2.93it/s]
100%|██████████| 1/1 [00:00<00:00,  3.37it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  3.23it/s]
100%|██████████| 1/1 [00:00<00:00,  3.61it/s]
100%|██████████| 1/1 [00:00<00:00,  3.72it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]
100%|██████████| 1/1 [00:00<00:00,  3.58it/s]
100%|██████████| 1/1 [00:00<00:00,  3.82it/s]
100%|██████████| 1/1 [00:00<00:00,  4.50it/s]
100%|██████████| 1/1 [00:00<00:00,  3.57it/s]
100%|██████████| 1/1 [00:00<00:00,  3.52it/s]
100%|██████████| 1/1 [00:00<00:00,  3.97it/s]
100%|██████████| 1/1 [00:00<00:00,  4.04it/s]
100%|██████████| 1/1 [00:00<00:00,  4.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.99it/s]
100%|██████████| 1/1 [00:00<00:00,  4.05it/s]
100%|██████████| 1/1 [00:01<00:00,

50/63


100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.28it/s]
100%|██████████| 1/1 [00:00<00:00,  3.74it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.81it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  4.09it/s]
100%|██████████| 1/1 [00:00<00:00,  3.85it/s]
100%|██████████| 1/1 [00:00<00:00,  3.94it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  3.91it/s]


0/138


100%|██████████| 1/1 [00:00<00:00,  3.61it/s]
100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00,  1.15it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  3.56it/s]
100%|██████████| 1/1 [00:00<00:00,  3.50it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.50it/s]
100%|██████████| 1/1 [00:00<00:00,  1.04it/s]
100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
100%|██████████| 1/1 [00:00<00:00,  3.81it/s]
100%|██████████| 1/1 [00:00<00:00,  3.98it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]
100%|██████████| 1/1 [00:00<00:00,  3.91it/s]
100%|██████████| 1/1 [00:00<00:00,  4.01it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
100%|██████████| 1/1 [00:00<00:00,  1.30it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]
100%|██████████| 1/1 [00:00<00:00,  1.21it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  3.64it/s]
100%|██████████| 1/1 [00:00<00:00,  3.93it/s]
100%|██████████| 1/1 [00:00<00:00,  1.73it/s]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
100%|██████████| 1/1 [00:00<00:00,  3.60it/s]
100%|██████████| 1/1 [00:00<00:00,  4.05it/s]
100%|██████████| 1/1 [00:00<00:00,  3.84it/s]
100%|██████████| 1/1 [00:00<00:00,  4.11it/s]
100%|██████████| 1/1 [00:00<00:00,  4.35it/s]
100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.89it/s]
100%|██████████| 1/1 [00:00<00:00,  2.31it/s]
100%|██████████| 1/1 [00:00<00:00,  2.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.65it/s]
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]
100%|██████████| 1/1 [00:02<00:00,  2.30s/it]
100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  3.46it/s]
100%|██████████| 1/1 [00:00<00:00,  3.66it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]
100%|██████████| 1/1 [00:00<00:00,  4.04it/s]
100%|██████████| 1/1 [00:00<00:00,

50/138


100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  3.80it/s]
100%|██████████| 1/1 [00:00<00:00,  1.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
100%|██████████| 1/1 [00:00<00:00,  4.01it/s]
100%|██████████| 1/1 [00:00<00:00,  2.32it/s]
100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
100%|██████████| 1/1 [00:00<00:00,  3.56it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  2.70it/s]
100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.55it/s]
100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
100%|██████████| 1/1 [00:00<00:00,  3.84it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.52it/s]
100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
100%|██████████| 1/1 [01:24<00:00, 84.65s/it]
100%|██████████| 1/1 [00:00<00:00,  2.91it/s]
100%|██████████| 1/1 [00:00<00:00,  3.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  3.85it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.71it/s]
100%|██████████| 1/1 [00:00<00:00,  2.65it/s]
100%|██████████| 1/1 [00:00<00:00,  2.90it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  4.20it/s]
100%|██████████| 1/1 [00:00<00:00,  4.19it/s]
100%|██████████| 1/1 [00:00<00:00,  4.01it/s]
100%|██████████| 1/1 [00:00<00:00,  2.87it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.19it/s]
100%|██████████| 1/1 [00:00<00:00,  2.91it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  4.10it/s]
100%|██████████| 1/1 [00:00<00:00,  2.93it/s]
100%|██████████| 1/1 [00:00<00:00,

100/138


100%|██████████| 1/1 [00:00<00:00,  2.70it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  2.54it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.09it/s]
100%|██████████| 1/1 [01:23<00:00, 83.94s/it]
100%|██████████| 1/1 [00:00<00:00,  4.22it/s]
100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  3.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
100%|██████████| 1/1 [00:00<00:00,  3.07it/s]
100%|██████████| 1/1 [00:00<00:00,  4.09it/s]
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
100%|██████████| 1/1 [00:00<00:00,  4.01it/s]
100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  2.96it/s]
100%|██████████| 1/1 [00:00<00:00,

model for cf balanced_accuracy score: {'cf': {'balanced_accuracy': 0.7382154882154882}}
0/181


100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]
100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]
100%|██████████| 1/1 [00:00<00:00,  2.40it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  3.70it/s]
100%|██████████| 1/1 [00:00<00:00,  2.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.16it/s]
100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.95it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]
100%|██████████| 1/1 [00:00<00:00,  1.00it/s]
100%|██████████| 1/1 [01:30<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  3.72it/s]
100%|██████████| 1/1 [00:00<00:00,  4.04it/s]
100%|██████████| 1/1 [00:00<00:00,  3.81it/s]
100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
100%|██████████| 1/1 [00:00<00:00,  3.69it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.55it/s]
100%|██████████| 1/1 [00:00<00:00,  2.95it/s]
100%|██████████| 1/1 [00:00<00:00,  3.95it/s]
100%|██████████| 1/1 [00:00<00:00,  4.60it/s]
100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
100%|██████████| 1/1 [00:00<00:00,  4.07it/s]
100%|██████████| 1/1 [00:00<00:00,  2.57it/s]
100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
100%|██████████| 1/1 [00:00<00:00,  3.54it/s]
100%|██████████| 1/1 [00:00<00:00,  3.93it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  3.90it/s]
100%|██████████| 1/1 [00:00<00:00,  3.54it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]
100%|██████████| 1/1 [00:00<00:00,

50/181


100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  3.95it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]
100%|██████████| 1/1 [00:00<00:00,  3.84it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  2.40it/s]
100%|██████████| 1/1 [00:00<00:00,  3.60it/s]
100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
100%|██████████| 1/1 [00:00<00:00,  3.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.61it/s]
100%|██████████| 1/1 [00:00<00:00,  3.86it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:00<00:00,  3.80it/s]
100%|██████████| 1/1 [00:00<00:00,  3.51it/s]
100%|██████████| 1/1 [00:00<00:00,  3.70it/s]
100%|██████████| 1/1 [00:00<00:00,  3.44it/s]
100%|██████████| 1/1 [00:00<00:00,  3.61it/s]
100%|██████████| 1/1 [00:00<00:00,  3.20it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.29it/s]
100%|██████████| 1/1 [00:00<00:00,  1.36it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.94it/s]
100%|██████████| 1/1 [00:00<00:00,  2.95it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
100%|██████████| 1/1 [00:00<00:00,  4.05it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.26it/s]
100%|██████████| 1/1 [00:00<00:00,  3.53it/s]
100%|██████████| 1/1 [00:00<00:00,  4.02it/s]
100%|██████████| 1/1 [00:00<00:00,  3.92it/s]
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.94it/s]
100%|██████████| 1/1 [00:00<00:00,  3.99it/s]
100%|██████████| 1/1 [00:00<00:00,  2.72it/s]
100%|██████████| 1/1 [00:00<00:00,  2.94it/s]
100%|██████████| 1/1 [00:00<00:00,  3.39it/s]
100%|██████████| 1/1 [00:00<00:00,

100/181


100%|██████████| 1/1 [00:00<00:00,  3.62it/s]
100%|██████████| 1/1 [00:00<00:00,  2.45it/s]
100%|██████████| 1/1 [00:00<00:00,  4.29it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.54it/s]
100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
100%|██████████| 1/1 [00:00<00:00,  3.63it/s]
100%|██████████| 1/1 [00:00<00:00,  4.42it/s]
100%|██████████| 1/1 [00:00<00:00,  2.91it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
100%|██████████| 1/1 [00:00<00:00,  3.60it/s]
100%|██████████| 1/1 [00:00<00:00,  3.47it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  2.78it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.94it/s]
100%|██████████| 1/1 [00:00<00:00,  2.34it/s]
100%|██████████| 1/1 [00:00<00:00,

150/181


100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
100%|██████████| 1/1 [00:00<00:00,  2.85it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.06it/s]
100%|██████████| 1/1 [00:00<00:00,  4.04it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.73it/s]
100%|██████████| 1/1 [00:00<00:00,  3.76it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  3.73it/s]
100%|██████████| 1/1 [00:00<00:00,  4.08it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  4.07it/s]
100%|██████████| 1/1 [00:00<00:00,  2.52it/s]
100%|██████████| 1/1 [00:00<00:00,  3.73it/s]
100%|██████████| 1/1 [00:00<00:00,  3.46it/s]
100%|██████████| 1/1 [00:00<00:00,  4.05it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  3.39it/s]
100%|██████████| 1/1 [00:00<00:00,  3.71it/s]
100%|██████████| 1/1 [00:00<00:00,

665.5719857215881 seconds for settings 2
4 / 4, {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}} model for cf balanced_accuracy score: {'cf': {'balanced_accuracy': 0.7382154882154882}}
0/63


100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.85it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.01it/s]
100%|██████████| 1/1 [00:00<00:00,  2.82it/s]
100%|██████████| 1/1 [00:00<00:00,  2.89it/s]
100%|██████████| 1/1 [00:00<00:00,  3.19it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.33it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]
100%|██████████| 1/1 [00:00<00:00,  3.14it/s]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.97it/s]
100%|██████████| 1/1 [00:00<00:00,  3.18it/s]
100%|██████████| 1/1 [00:00<00:00,  3.38it/s]
100%|██████████| 1/1 [00:00<00:00,  3.03it/s]
100%|██████████| 1/1 [00:00<00:00,  3.16it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  2.66it/s]
100%|██████████| 1/1 [00:00<00:00,

50/63


100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
100%|██████████| 1/1 [00:00<00:00,  2.58it/s]
100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
100%|██████████| 1/1 [00:00<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00,  2.39it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s]
100%|██████████| 1/1 [00:00<00:00,  2.72it/s]
100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]


0/138


100%|██████████| 1/1 [00:12<00:00, 12.18s/it]
100%|██████████| 1/1 [00:03<00:00,  3.25s/it]
100%|██████████| 1/1 [00:08<00:00,  8.15s/it]
100%|██████████| 1/1 [00:05<00:00,  5.95s/it]
100%|██████████| 1/1 [00:08<00:00,  8.97s/it]
100%|██████████| 1/1 [00:02<00:00,  2.70s/it]
100%|██████████| 1/1 [00:04<00:00,  4.30s/it]
100%|██████████| 1/1 [00:05<00:00,  5.59s/it]
100%|██████████| 1/1 [00:05<00:00,  5.76s/it]
100%|██████████| 1/1 [00:02<00:00,  2.37s/it]
100%|██████████| 1/1 [00:08<00:00,  8.71s/it]
100%|██████████| 1/1 [00:08<00:00,  8.45s/it]
100%|██████████| 1/1 [00:07<00:00,  7.96s/it]
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:08<00:00,  8.31s/it]
100%|██████████| 1/1 [00:02<00:00,  2.45s/it]
100%|██████████| 1/1 [00:05<00:00,  5.33s/it]
100%|██████████| 1/1 [00:03<00:00,  3.19s/it]
100%|██████████| 1/1 [00:06<00:00,  6.39s/it]
100%|██████████| 1/1 [00:04<00:00,  4.88s/it]
100%|██████████| 1/1 [00:06<00:00,  6.37s/it]
100%|██████████| 1/1 [00:10<00:00,

50/138


100%|██████████| 1/1 [00:06<00:00,  6.82s/it]
100%|██████████| 1/1 [00:05<00:00,  5.16s/it]
100%|██████████| 1/1 [00:06<00:00,  6.23s/it]
100%|██████████| 1/1 [00:07<00:00,  7.95s/it]
100%|██████████| 1/1 [00:16<00:00, 16.11s/it]
100%|██████████| 1/1 [00:07<00:00,  7.30s/it]
100%|██████████| 1/1 [00:10<00:00, 10.41s/it]
100%|██████████| 1/1 [00:06<00:00,  6.20s/it]
100%|██████████| 1/1 [00:05<00:00,  5.46s/it]
100%|██████████| 1/1 [00:09<00:00,  9.85s/it]
100%|██████████| 1/1 [00:04<00:00,  4.10s/it]
100%|██████████| 1/1 [00:03<00:00,  3.87s/it]
100%|██████████| 1/1 [00:05<00:00,  5.39s/it]
100%|██████████| 1/1 [00:02<00:00,  2.60s/it]
100%|██████████| 1/1 [00:06<00:00,  6.05s/it]
100%|██████████| 1/1 [00:05<00:00,  5.52s/it]
100%|██████████| 1/1 [00:04<00:00,  4.41s/it]
100%|██████████| 1/1 [00:05<00:00,  5.33s/it]
100%|██████████| 1/1 [00:06<00:00,  6.70s/it]
100%|██████████| 1/1 [00:07<00:00,  7.06s/it]
100%|██████████| 1/1 [00:04<00:00,  4.18s/it]
100%|██████████| 1/1 [00:11<00:00,

100/138


100%|██████████| 1/1 [00:07<00:00,  7.18s/it]
100%|██████████| 1/1 [00:05<00:00,  5.45s/it]
100%|██████████| 1/1 [00:01<00:00,  1.51s/it]
100%|██████████| 1/1 [00:07<00:00,  7.94s/it]
100%|██████████| 1/1 [00:08<00:00,  8.53s/it]
100%|██████████| 1/1 [00:07<00:00,  7.38s/it]
100%|██████████| 1/1 [00:08<00:00,  8.06s/it]
100%|██████████| 1/1 [00:08<00:00,  8.26s/it]
100%|██████████| 1/1 [00:05<00:00,  5.46s/it]
100%|██████████| 1/1 [00:03<00:00,  3.80s/it]
100%|██████████| 1/1 [00:04<00:00,  4.56s/it]
100%|██████████| 1/1 [00:01<00:00,  1.93s/it]
100%|██████████| 1/1 [00:06<00:00,  6.93s/it]
100%|██████████| 1/1 [00:05<00:00,  5.67s/it]
100%|██████████| 1/1 [00:04<00:00,  4.26s/it]
100%|██████████| 1/1 [00:06<00:00,  6.66s/it]
100%|██████████| 1/1 [00:06<00:00,  6.31s/it]
100%|██████████| 1/1 [00:02<00:00,  2.32s/it]
100%|██████████| 1/1 [00:08<00:00,  8.69s/it]
100%|██████████| 1/1 [00:03<00:00,  3.62s/it]
100%|██████████| 1/1 [00:06<00:00,  6.28s/it]
100%|██████████| 1/1 [00:06<00:00,

model for cf balanced_accuracy score: {'cf': {'balanced_accuracy': 0.7382154882154882}}
0/181


100%|██████████| 1/1 [00:04<00:00,  4.83s/it]
100%|██████████| 1/1 [00:04<00:00,  4.37s/it]
100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s]
100%|██████████| 1/1 [00:00<00:00,  3.05it/s]
100%|██████████| 1/1 [00:00<00:00,  2.66it/s]
100%|██████████| 1/1 [00:02<00:00,  2.17s/it]
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]
100%|██████████| 1/1 [00:09<00:00,  9.08s/it]
100%|██████████| 1/1 [00:05<00:00,  5.16s/it]
100%|██████████| 1/1 [00:00<00:00,  2.94it/s]
100%|██████████| 1/1 [00:00<00:00,  3.35it/s]
100%|██████████| 1/1 [00:07<00:00,  7.86s/it]
100%|██████████| 1/1 [00:00<00:00,  2.98it/s]
100%|██████████| 1/1 [00:08<00:00,  8.76s/it]
100%|██████████| 1/1 [00:00<00:00,  3.06it/s]
100%|██████████| 1/1 [00:00<00:00,  2.98it/s]
100%|██████████| 1/1 [00:00<00:00,  2.93it/s]
100%|██████████| 1/1 [00:02<00:00,  2.87s/it]
100%|██████████| 1/1 [00:00<00:00,  2.55it/s]
100%|██████████| 1/1 [00:07<00:00,  7.49s/it]
100%|██████████| 1/1 [00:00<00:00,

50/181


100%|██████████| 1/1 [00:00<00:00,  2.83it/s]
100%|██████████| 1/1 [00:04<00:00,  4.92s/it]
100%|██████████| 1/1 [00:00<00:00,  2.99it/s]
100%|██████████| 1/1 [00:08<00:00,  8.93s/it]
100%|██████████| 1/1 [00:11<00:00, 11.43s/it]
100%|██████████| 1/1 [00:05<00:00,  5.02s/it]
100%|██████████| 1/1 [00:04<00:00,  4.77s/it]
100%|██████████| 1/1 [00:00<00:00,  3.03it/s]
100%|██████████| 1/1 [00:10<00:00, 10.71s/it]
100%|██████████| 1/1 [00:10<00:00, 10.75s/it]
100%|██████████| 1/1 [00:00<00:00,  1.23it/s]
100%|██████████| 1/1 [00:09<00:00,  9.03s/it]
100%|██████████| 1/1 [00:17<00:00, 17.30s/it]
100%|██████████| 1/1 [00:00<00:00,  1.80it/s]
100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
100%|██████████| 1/1 [00:00<00:00,  1.37it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
100%|██████████| 1/1 [00:04<00:00,  4.47s/it]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
100%|██████████| 1/1 [00:07<00:00,

100/181


100%|██████████| 1/1 [00:11<00:00, 11.56s/it]
100%|██████████| 1/1 [00:00<00:00,  1.16it/s]
100%|██████████| 1/1 [00:00<00:00,  1.02it/s]
100%|██████████| 1/1 [00:00<00:00,  1.01it/s]
100%|██████████| 1/1 [00:00<00:00,  1.03it/s]
100%|██████████| 1/1 [00:00<00:00,  1.04it/s]
100%|██████████| 1/1 [00:00<00:00,  1.49it/s]
100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
100%|██████████| 1/1 [00:04<00:00,  4.20s/it]
100%|██████████| 1/1 [00:00<00:00,  2.67it/s]
100%|██████████| 1/1 [00:00<00:00,  2.81it/s]
100%|██████████| 1/1 [00:09<00:00,  9.46s/it]
100%|██████████| 1/1 [00:00<00:00,  3.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.64it/s]
100%|██████████| 1/1 [00:04<00:00,  4.87s/it]
100%|██████████| 1/1 [00:00<00:00,  2.96it/s]
100%|██████████| 1/1 [00:05<00:00,  5.48s/it]
100%|██████████| 1/1 [00:00<00:00,  2.13it/s]
100%|██████████| 1/1 [00:01<00:00,  1.48s/it]
100%|██████████| 1/1 [00:08<00:00,  8.04s/it]
100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
100%|██████████| 1/1 [00:03<00:00,

150/181


100%|██████████| 1/1 [00:05<00:00,  5.49s/it]
100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
100%|██████████| 1/1 [00:09<00:00,  9.28s/it]
100%|██████████| 1/1 [00:11<00:00, 11.17s/it]
100%|██████████| 1/1 [00:00<00:00,  2.67it/s]
100%|██████████| 1/1 [00:00<00:00,  2.10it/s]
100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.93it/s]
100%|██████████| 1/1 [00:00<00:00,  2.19it/s]
100%|██████████| 1/1 [00:07<00:00,  7.67s/it]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:12<00:00, 12.37s/it]
100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
100%|██████████| 1/1 [00:15<00:00, 15.93s/it]
100%|██████████| 1/1 [00:03<00:00,  3.48s/it]
100%|██████████| 1/1 [00:04<00:00,  4.87s/it]
100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
100%|██████████| 1/1 [00:06<00:00,  6.33s/it]
100%|██████████| 1/1 [00:00<00:00,

1679.366427898407 seconds for settings 3

Total time: 2368.956385374069


In [21]:
# # best_estimators = {}
# # best_scores = {}
# # for i, s in enumerate(settings):
# #     start = time.time()
# #     print(f'{i} / {len(settings)}, {s}', end=' ')
# #     augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
# #                              method=s['method'], regression=regression_task,
# #                              continuous_feats=continuous_features,
# #                              cf_scoring = metric,
# #                              kw_args=s.get('kw_args', {})
# #                              )

# #     X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
# #     best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
# #     utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
# #                             search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
# #     result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
# #                                                 orient='index',
# #                                                 columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

# #     X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
# #     best_estimators[f'{i}'], best_scores[f'{i}'] = \
# #         utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
# #                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
# #     result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
# #                                        orient='index',
# #                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
    
# #     full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
# #     display(full_results_df)
# #     print(f'{time.time() - start} seconds for settings {i}')

# best_estimators = {}
# best_scores = {}
# for i, s in enumerate(settings):
#     start = time.time()
#     print(f'{i+1} / {len(settings)}, {s}', end=' ')
#     if False:# 'cf_genetic' in s['method']:
#         augmentor = DataAugmentor(X_train_inflate, y_train_inflate, X_test, y_test,
#                              method=s['method'], regression=regression_task,
#                              continuous_feats=continuous_features,
#                              cf_scoring = metric,
#                              kw_args=s.get('kw_args', {})
#                              )

#         X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
#         best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
#         utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
#                                 search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#         result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
#                                                     orient='index',
#                                                     columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

#         X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
#         best_estimators[f'{i}'], best_scores[f'{i}'] = \
#             utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
#                                 search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#         result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
#                                         orient='index',
#                                         columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
#         full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
#         print(f'{time.time() - start} seconds for settings {i}')
#     else:
#         augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
#                                 method=s['method'], regression=regression_task,
#                                 continuous_feats=continuous_features,
#                                 cf_scoring = metric,
#                                 kw_args=s.get('kw_args', {})
#                                 )

#         X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
#         best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
#         utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
#                                 search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#         result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
#                                                     orient='index',
#                                                     columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

#         X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
#         best_estimators[f'{i}'], best_scores[f'{i}'] = \
#             utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
#                                 search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#         result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
#                                         orient='index',
#                                         columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
#         full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
#         print(f'{time.time() - start} seconds for settings {i}')


### Summary

In [22]:
full_results_df

Unnamed: 0_level_0,whole,whole,whole,whole,whole,whole,whole,inflate,inflate,inflate,...,"[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}] balanced","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}] balanced","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}] balanced","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}]"
Unnamed: 0_level_1,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted,f1_weighted,accuracy,balanced_accuracy,...,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted
lg,0.731732,0.738095,0.524411,0.71434,0.738095,0.871974,0.839075,0.724538,0.738095,0.526094,...,0.690476,0.881101,0.880261,0.725409,0.72619,0.588384,0.735619,0.72619,0.878323,0.868296
rf,0.773441,0.797619,0.54798,0.776045,0.797619,0.860169,0.797379,0.773441,0.797619,0.54798,...,0.75,0.858482,0.861096,0.79908,0.809524,0.563973,0.789752,0.809524,0.870536,0.85594
xgb,0.742029,0.761905,0.52862,0.727952,0.761905,0.867857,0.837963,0.791364,0.797619,0.628788,...,0.785714,0.88125,0.900343,0.797365,0.797619,0.706229,0.797354,0.797619,0.872917,0.903845


In [23]:
print('best methods')
best_methods = pd.DataFrame()
for s in metrics:
    models_scores = full_results_df.xs(s, axis='columns', level=1)
    max_models = models_scores.idxmax(axis='columns')
    max_models.name = s
    max_models['overall'] = models_scores.max(axis=0).idxmax(axis=0)
    best_methods = pd.concat([best_methods, max_models], axis=1)
best_methods

best methods


Unnamed: 0,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted
lg,"[""random""]","[""random""]","[""cf_random""] balanced","[""cf_random""] balanced","[""random""]",inflate,"[""cf_random""] balanced"
rf,"[""cf_genetic"", {""proximity_weight"": 5, ""divers...","[""random""]","[""cf_random""] balanced","[""cf_random""]","[""random""]","[""random""] balanced","[""cf_random""] balanced"
xgb,"[""cf_genetic"", {""proximity_weight"": 5, ""divers...",inflate,"[""cf_random""] balanced",inflate,inflate,inflate,"[""cf_random""] balanced"
overall,"[""cf_genetic"", {""proximity_weight"": 5, ""divers...","[""random""]","[""cf_random""] balanced","[""cf_random""]","[""random""]",inflate,"[""cf_random""] balanced"


In [24]:
full_results_df.to_csv(rf'../log/experiment_multiclass_cirrhosis_testsize{test_size_proportion}_augmentsample{augment_sample}.csv')