In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd

import sys
sys.path.append('..')

from helpers import utils, pipelines, models

from sklearn.model_selection import train_test_split

import multiprocessing

from data_augmentaion.data_augmentator import DataAugmentor 

import json

import time

### Define research parameters

In [4]:
regression_task = False
# continuous_features = []
metric = 'accuracy'
test_size_proportion = 0.2
augment_sample = 0.5

search_pipelines = pipelines.get_adult_pipelines()
search_parameters = models.parameters

settings = [
    {'method': 'random'},
    {'method': 'smote'},
    {'method': 'cf_random'},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 1}},
    {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 1}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 1}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 0.2}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 1}},
]

In [5]:
df = pd.read_csv(r"..\datasets\cirrhosis.csv")
df.drop(columns="ID",inplace=True)
target = 'Status'

In [6]:
missing_df = df.isna().sum()
dtypes_df = df.dtypes
pd.concat([missing_df, dtypes_df], axis=1)

Unnamed: 0,0,1
N_Days,0,int64
Status,0,object
Drug,106,object
Age,0,int64
Sex,0,object
Ascites,106,object
Hepatomegaly,106,object
Spiders,106,object
Edema,0,object
Bilirubin,0,float64


In [7]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_selector


numerical_imputer = SimpleImputer(strategy='mean')#.set_output(transform='pandas')
categorical_imputer = SimpleImputer(strategy='most_frequent')#.set_output(transform='pandas')

final_imputer = ColumnTransformer([
    ('numerical', numerical_imputer, make_column_selector(dtype_include=['int', 'float'])),
    ('categorical', categorical_imputer, make_column_selector(dtype_include=['object'])),
])

df = final_imputer.fit_transform(df)

In [8]:
df = pd.DataFrame(df, columns=[c.split('__')[-1] for c in final_imputer.get_feature_names_out()]).astype(dtypes_df)

In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df[target] = le.fit_transform(df[target])

#### split data

In [10]:
# all data
X = df.drop(target, axis=1)
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_proportion, random_state=42)

In [11]:
continuous_features = X.select_dtypes(include=['number']).columns.tolist()
continuous_features

['N_Days',
 'Age',
 'Bilirubin',
 'Cholesterol',
 'Albumin',
 'Copper',
 'Alk_Phos',
 'SGOT',
 'Tryglicerides',
 'Platelets',
 'Prothrombin',
 'Stage']

In [12]:
display(y_train.value_counts())

0    188
2    125
1     21
Name: Status, dtype: int64

In [13]:
full_results_df = pd.DataFrame()

### Whole dataset scores

In [14]:
print(f'whole adult dataset {metric} scores:')
_, whole_scores = utils.fit_and_evaluate(X_train, y_train, X_test, y_test,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
whole_dataset_result_df = pd.DataFrame.from_dict(whole_scores, orient='index', columns=[f'full data {metric} score'])
display(whole_dataset_result_df.T)

whole adult dataset accuracy scores:


Unnamed: 0,lg,rf,xgb
full data accuracy score,0.738095,0.797619,0.761905


In [15]:
full_results_df = pd.concat([full_results_df, whole_dataset_result_df], axis=1)

### little flattening the y-curve

In [16]:
y_train_value_counts = y_train.value_counts()
display(y_train_value_counts)
classes_to_50_gap = 50 - y_train.value_counts()
classes_to_inflate = classes_to_50_gap[classes_to_50_gap>0]
df_inflate = pd.DataFrame()
for c, gap in classes_to_inflate.items():
    df_c_inflate = df[df[target]==c].sample(n=gap, replace=True, random_state=42)
    df_inflate = pd.concat([df_inflate, df_c_inflate])
df_inflate[target].value_counts()


0    188
2    125
1     21
Name: Status, dtype: int64

1    29
Name: Status, dtype: int64

In [17]:
df_full_inflate = pd.concat([df, df_inflate])
X_inflate = df_full_inflate.drop(target, axis=1)
y_inflate = df_full_inflate[target]
X_train_inflate, X_test_inflate, y_train_inflate, y_test_inflate = train_test_split(X_inflate, y_inflate, test_size=test_size_proportion, random_state=42)

In [18]:
print(f'inflate dataset {metric} scores:')
_, inflate_scores = utils.fit_and_evaluate(X_train_inflate, y_train_inflate, X_test_inflate, y_test_inflate,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
inflate_dataset_result_df = pd.DataFrame.from_dict(inflate_scores, orient='index', columns=[f'inflate data {metric} score'])
display(inflate_dataset_result_df.T)

inflate dataset accuracy scores:


Unnamed: 0,lg,rf,xgb
inflate data accuracy score,0.711111,0.655556,0.788889


In [None]:
full_results_df = pd.concat([full_results_df, inflate_dataset_result_df], axis=1)

### running experiments

In [15]:
# best_estimators = {}
# best_scores = {}
# for i, s in enumerate(settings):
#     start = time.time()
#     print(f'{i} / {len(settings)}, {s}', end=' ')
#     augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
#                              method=s['method'], regression=regression_task,
#                              continuous_feats=continuous_features,
#                              cf_scoring = metric,
#                              kw_args=s.get('kw_args', {})
#                              )

#     X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
#     best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
#     utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
#                             search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#     result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
#                                                 orient='index',
#                                                 columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

#     X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
#     best_estimators[f'{i}'], best_scores[f'{i}'] = \
#         utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
#                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#     result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
#                                        orient='index',
#                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
    
#     full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
#     display(full_results_df)
#     print(f'{time.time() - start} seconds for settings {i}')

best_estimators = {}
best_scores = {}
for i, s in enumerate(settings):
    start = time.time()
    print(f'{i+1} / {len(settings)}, {s}', end=' ')
    if 'cf_genetic' in s['method']:
        augmentor = DataAugmentor(X_train_inflate, y_train_inflate, X_test_inflate, y_test_inflate,
                             method=s['method'], regression=regression_task,
                             continuous_feats=continuous_features,
                             cf_scoring = metric,
                             kw_args=s.get('kw_args', {})
                             )

        X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
        best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
        utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test_inflate, y_test_inflate,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
                                                    orient='index',
                                                    columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

        X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
        best_estimators[f'{i}'], best_scores[f'{i}'] = \
            utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
                                        orient='index',
                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
        full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
        print(f'{time.time() - start} seconds for settings {i}')
    else:
        augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
                                method=s['method'], regression=regression_task,
                                continuous_feats=continuous_features,
                                cf_scoring = metric,
                                kw_args=s.get('kw_args', {})
                                )

        X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
        best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
        utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
                                                    orient='index',
                                                    columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

        X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
        best_estimators[f'{i}'], best_scores[f'{i}'] = \
            utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
                                        orient='index',
                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
        full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
        print(f'{time.time() - start} seconds for settings {i}')



0 / 4, {'method': 'random'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']


Unnamed: 0,full data accuracy score,"[""random""] balanced accuracy score","[""random""] accuracy score"
lg,0.738095,0.702381,0.761905
rf,0.797619,0.75,0.809524
xgb,0.761905,0.75,0.75


9.455303192138672 seconds for settings 0
1 / 4, {'method': 'smote'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']


Unnamed: 0,full data accuracy score,"[""random""] balanced accuracy score","[""random""] accuracy score","[""smote""] balanced accuracy score","[""smote""] accuracy score"
lg,0.738095,0.702381,0.761905,0.642857,0.642857
rf,0.797619,0.75,0.809524,0.761905,0.761905
xgb,0.761905,0.75,0.75,0.738095,0.738095


9.954652070999146 seconds for settings 1
2 / 4, {'method': 'cf_random'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
0/63


100%|██████████| 1/1 [00:00<00:00,  3.87it/s]
100%|██████████| 1/1 [00:00<00:00,  4.87it/s]
100%|██████████| 1/1 [00:00<00:00,  6.33it/s]
100%|██████████| 1/1 [00:00<00:00,  5.80it/s]
100%|██████████| 1/1 [00:00<00:00,  5.76it/s]
100%|██████████| 1/1 [00:00<00:00,  6.82it/s]
100%|██████████| 1/1 [00:00<00:00,  7.07it/s]
100%|██████████| 1/1 [00:00<00:00,  7.50it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]
100%|██████████| 1/1 [00:00<00:00,  3.78it/s]
100%|██████████| 1/1 [00:00<00:00,  6.30it/s]
100%|██████████| 1/1 [00:00<00:00,  3.08it/s]
100%|██████████| 1/1 [00:00<00:00,  5.30it/s]
100%|██████████| 1/1 [00:00<00:00,  5.92it/s]
100%|██████████| 1/1 [00:00<00:00,  4.90it/s]
100%|██████████| 1/1 [00:00<00:00,  6.33it/s]
100%|██████████| 1/1 [00:00<00:00,  7.10it/s]
100%|██████████| 1/1 [00:00<00:00,  7.05it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]
100%|██████████| 1/1 [00:00<00:00,  7.04it/s]
100%|██████████| 1/1 [00:00<00:00,  7.10it/s]
100%|██████████| 1/1 [00:00<00:00,

50/63


100%|██████████| 1/1 [00:00<00:00,  5.75it/s]
100%|██████████| 1/1 [00:00<00:00,  6.32it/s]
100%|██████████| 1/1 [00:00<00:00,  4.66it/s]
100%|██████████| 1/1 [00:00<00:00,  6.38it/s]
100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
100%|██████████| 1/1 [00:00<00:00,  4.23it/s]
100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
100%|██████████| 1/1 [00:00<00:00,  5.29it/s]


0/167


100%|██████████| 1/1 [00:00<00:00,  2.30it/s]
100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
100%|██████████| 1/1 [00:00<00:00,  5.08it/s]
100%|██████████| 1/1 [00:00<00:00,  4.35it/s]
100%|██████████| 1/1 [00:00<00:00,  4.25it/s]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.72it/s]
100%|██████████| 1/1 [00:00<00:00,  2.04it/s]
100%|██████████| 1/1 [00:00<00:00,  3.91it/s]
100%|██████████| 1/1 [00:00<00:00,  4.84it/s]
100%|██████████| 1/1 [00:00<00:00,  6.31it/s]
100%|██████████| 1/1 [00:00<00:00,  4.22it/s]
100%|██████████| 1/1 [00:00<00:00,  4.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.25it/s]
100%|██████████| 1/1 [00:00<00:00,  3.43it/s]
100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 1/1 [00:00<00:00,  3.55it/s]
100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
100%|██████████| 1/1 [00:00<00:00,  2.75it/s]
100%|██████████| 1/1 [00:00<00:00,  4.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.96it/s]
100%|██████████| 1/1 [00:00<00:00,  5.27it/s]
100%|██████████| 1/1 [00:00<00:00,  4.19it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  6.33it/s]
100%|██████████| 1/1 [00:00<00:00,  5.68it/s]
100%|██████████| 1/1 [00:00<00:00,  3.76it/s]
100%|██████████| 1/1 [00:00<00:00,  5.52it/s]
100%|██████████| 1/1 [00:00<00:00,  7.06it/s]
100%|██████████| 1/1 [00:00<00:00,  5.74it/s]
100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
100%|██████████| 1/1 [00:00<00:00,  3.72it/s]
100%|██████████| 1/1 [00:00<00:00,  2.33it/s]
100%|██████████| 1/1 [00:00<00:00,  2.37it/s]
100%|██████████| 1/1 [00:00<00:00,  3.22it/s]
100%|██████████| 1/1 [00:00<00:00,  5.30it/s]
100%|██████████| 1/1 [00:00<00:00,  4.51it/s]


50/167


100%|██████████| 1/1 [00:00<00:00,  2.19it/s]
100%|██████████| 1/1 [00:00<00:00,  1.14it/s]
100%|██████████| 1/1 [00:00<00:00,  2.88it/s]
100%|██████████| 1/1 [00:01<00:00,  1.23s/it]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 01 sec


100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
100%|██████████| 1/1 [00:00<00:00,  4.19it/s]
100%|██████████| 1/1 [00:00<00:00,  4.96it/s]
100%|██████████| 1/1 [00:01<00:00,  1.12s/it]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 01 sec


100%|██████████| 1/1 [00:00<00:00,  3.47it/s]
100%|██████████| 1/1 [00:00<00:00,  6.64it/s]
100%|██████████| 1/1 [00:00<00:00,  6.01it/s]
100%|██████████| 1/1 [00:00<00:00,  4.41it/s]
100%|██████████| 1/1 [00:00<00:00,  3.66it/s]
100%|██████████| 1/1 [00:00<00:00,  2.43it/s]
100%|██████████| 1/1 [00:01<00:00,  1.03s/it]
100%|██████████| 1/1 [00:00<00:00,  1.06it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  1.12it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.29it/s]
100%|██████████| 1/1 [00:00<00:00,  2.82it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  4.47it/s]
100%|██████████| 1/1 [00:00<00:00,  4.61it/s]
100%|██████████| 1/1 [00:00<00:00,  1.55it/s]
100%|██████████| 1/1 [00:00<00:00,  1.22it/s]
100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:02<00:00,  2.25s/it]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
100%|██████████| 1/1 [00:00<00:00,  5.10it/s]
100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
100%|██████████| 1/1 [00:00<00:00,  2.32it/s]
100%|██████████| 1/1 [00:00<00:00,  1.03it/s]
100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  4.35it/s]
100%|██████████| 1/1 [00:00<00:00,  4.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  2.81it/s]
100%|██████████| 1/1 [00:00<00:00,  3.05it/s]
100%|██████████| 1/1 [00:00<00:00,  3.15it/s]
100%|██████████| 1/1 [00:00<00:00,  6.74it/s]
100%|██████████| 1/1 [00:00<00:00,  6.35it/s]


100/167


100%|██████████| 1/1 [00:00<00:00,  7.11it/s]
100%|██████████| 1/1 [00:00<00:00,  1.95it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  3.83it/s]
100%|██████████| 1/1 [00:00<00:00,  7.41it/s]
100%|██████████| 1/1 [00:00<00:00,  3.19it/s]
100%|██████████| 1/1 [00:00<00:00,  4.40it/s]
100%|██████████| 1/1 [00:00<00:00,  5.60it/s]
100%|██████████| 1/1 [00:00<00:00,  4.64it/s]
100%|██████████| 1/1 [00:00<00:00,  3.70it/s]
100%|██████████| 1/1 [00:00<00:00,  4.61it/s]
100%|██████████| 1/1 [00:00<00:00,  2.59it/s]
100%|██████████| 1/1 [00:00<00:00,  6.03it/s]
100%|██████████| 1/1 [00:00<00:00,  7.24it/s]
100%|██████████| 1/1 [00:00<00:00,  4.58it/s]
100%|██████████| 1/1 [00:00<00:00,  6.84it/s]
100%|██████████| 1/1 [00:00<00:00,  3.17it/s]
100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
100%|██████████| 1/1 [00:00<00:00,  5.95it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  5.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.78it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]
100%|██████████| 1/1 [00:00<00:00,  2.95it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:01<00:00,  1.06s/it]
100%|██████████| 1/1 [00:00<00:00,  5.30it/s]
100%|██████████| 1/1 [00:00<00:00,  4.05it/s]
100%|██████████| 1/1 [00:00<00:00,  8.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.42it/s]


150/167


100%|██████████| 1/1 [00:00<00:00,  6.66it/s]
100%|██████████| 1/1 [00:00<00:00,  6.56it/s]
100%|██████████| 1/1 [00:00<00:00,  9.45it/s]
100%|██████████| 1/1 [00:00<00:00,  8.26it/s]
100%|██████████| 1/1 [00:41<00:00, 41.32s/it]
100%|██████████| 1/1 [00:00<00:00,  4.18it/s]
100%|██████████| 1/1 [00:00<00:00,  5.96it/s]
100%|██████████| 1/1 [00:00<00:00,  2.79it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  2.29it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.07it/s]
100%|██████████| 1/1 [00:00<00:00,  6.65it/s]
100%|██████████| 1/1 [00:00<00:00,  6.63it/s]
100%|██████████| 1/1 [00:00<00:00,  7.48it/s]
100%|██████████| 1/1 [00:00<00:00,  5.97it/s]
100%|██████████| 1/1 [00:00<00:00,  6.16it/s]
100%|██████████| 1/1 [00:00<00:00,  4.40it/s]
100%|██████████| 1/1 [00:00<00:00,  5.47it/s]


0/167


100%|██████████| 1/1 [00:00<00:00,  4.37it/s]
100%|██████████| 1/1 [00:00<00:00,  7.15it/s]
100%|██████████| 1/1 [00:00<00:00,  7.68it/s]
100%|██████████| 1/1 [00:00<00:00,  8.15it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]
100%|██████████| 1/1 [00:00<00:00,  8.49it/s]
100%|██████████| 1/1 [00:00<00:00,  7.74it/s]
100%|██████████| 1/1 [00:00<00:00,  8.70it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  3.99it/s]
100%|██████████| 1/1 [00:00<00:00,  5.95it/s]
100%|██████████| 1/1 [00:00<00:00,  7.60it/s]
100%|██████████| 1/1 [00:00<00:00,  8.07it/s]
100%|██████████| 1/1 [00:00<00:00,  8.15it/s]
100%|██████████| 1/1 [00:00<00:00,  7.87it/s]
100%|██████████| 1/1 [00:44<00:00, 44.18s/it]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]
100%|██████████| 1/1 [00:00<00:00,  7.41it/s]
100%|██████████| 1/1 [00:00<00:00,  7.61it/s]
100%|██████████| 1/1 [00:00<00:00,

50/167


100%|██████████| 1/1 [00:00<00:00,  6.71it/s]
100%|██████████| 1/1 [00:00<00:00,  7.15it/s]
100%|██████████| 1/1 [00:00<00:00,  8.09it/s]
100%|██████████| 1/1 [00:00<00:00,  8.37it/s]
100%|██████████| 1/1 [00:00<00:00,  8.31it/s]
100%|██████████| 1/1 [00:00<00:00,  7.83it/s]
100%|██████████| 1/1 [00:00<00:00,  8.17it/s]
100%|██████████| 1/1 [00:00<00:00,  8.18it/s]
100%|██████████| 1/1 [00:00<00:00,  4.74it/s]
100%|██████████| 1/1 [00:00<00:00,  8.61it/s]
100%|██████████| 1/1 [00:00<00:00,  7.72it/s]
100%|██████████| 1/1 [00:00<00:00,  7.61it/s]
100%|██████████| 1/1 [00:00<00:00,  6.41it/s]
100%|██████████| 1/1 [00:00<00:00,  7.64it/s]
100%|██████████| 1/1 [00:00<00:00,  7.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.67it/s]
100%|██████████| 1/1 [00:00<00:00,  8.34it/s]
100%|██████████| 1/1 [00:00<00:00,  7.74it/s]
100%|██████████| 1/1 [00:00<00:00,  7.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.59it/s]
100%|██████████| 1/1 [00:00<00:00,  3.87it/s]
100%|██████████| 1/1 [00:00<00:00,

100/167


100%|██████████| 1/1 [00:00<00:00,  8.60it/s]
100%|██████████| 1/1 [00:00<00:00,  9.53it/s]
100%|██████████| 1/1 [00:00<00:00,  8.44it/s]
100%|██████████| 1/1 [00:00<00:00,  4.50it/s]
100%|██████████| 1/1 [00:00<00:00,  5.83it/s]
100%|██████████| 1/1 [00:00<00:00,  6.87it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]
100%|██████████| 1/1 [00:00<00:00,  7.36it/s]
100%|██████████| 1/1 [00:00<00:00,  8.03it/s]
100%|██████████| 1/1 [00:00<00:00,  7.18it/s]
100%|██████████| 1/1 [00:00<00:00,  8.27it/s]
100%|██████████| 1/1 [00:00<00:00,  6.54it/s]
100%|██████████| 1/1 [00:00<00:00,  7.90it/s]
100%|██████████| 1/1 [00:00<00:00,  7.66it/s]
100%|██████████| 1/1 [00:00<00:00,  7.14it/s]
100%|██████████| 1/1 [00:00<00:00,  6.79it/s]
100%|██████████| 1/1 [00:00<00:00,  7.63it/s]
100%|██████████| 1/1 [00:00<00:00,  7.55it/s]
100%|██████████| 1/1 [00:00<00:00,  8.15it/s]
100%|██████████| 1/1 [00:00<00:00,  6.68it/s]
100%|██████████| 1/1 [00:00<00:00,  8.60it/s]
100%|██████████| 1/1 [00:00<00:00,

150/167


100%|██████████| 1/1 [00:00<00:00,  7.91it/s]
100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,  7.70it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  8.53it/s]
100%|██████████| 1/1 [00:00<00:00,  7.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.25it/s]
100%|██████████| 1/1 [00:00<00:00,  7.09it/s]
100%|██████████| 1/1 [00:00<00:00,  8.14it/s]
100%|██████████| 1/1 [00:00<00:00,  6.95it/s]
100%|██████████| 1/1 [00:00<00:00,  7.55it/s]
100%|██████████| 1/1 [00:00<00:00,  7.25it/s]
100%|██████████| 1/1 [00:00<00:00,  7.68it/s]
100%|██████████| 1/1 [00:00<00:00,  8.66it/s]
100%|██████████| 1/1 [00:00<00:00,  9.52it/s]
100%|██████████| 1/1 [00:00<00:00,  7.22it/s]
100%|██████████| 1/1 [00:00<00:00,  4.08it/s]


Unnamed: 0,full data accuracy score,"[""random""] balanced accuracy score","[""random""] accuracy score","[""smote""] balanced accuracy score","[""smote""] accuracy score","[""cf_random""] balanced accuracy score","[""cf_random""] accuracy score"
lg,0.738095,0.702381,0.761905,0.642857,0.642857,0.702381,0.77381
rf,0.797619,0.75,0.809524,0.761905,0.761905,0.690476,0.785714
xgb,0.761905,0.75,0.75,0.738095,0.738095,0.77381,0.77381


323.15717673301697 seconds for settings 2
3 / 4, {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
0/63


100%|██████████| 1/1 [00:00<00:00,  3.93it/s]
100%|██████████| 1/1 [00:00<00:00,  5.42it/s]
100%|██████████| 1/1 [00:00<00:00,  6.18it/s]
100%|██████████| 1/1 [00:00<00:00,  6.37it/s]
100%|██████████| 1/1 [00:00<00:00,  6.89it/s]
100%|██████████| 1/1 [00:00<00:00,  6.76it/s]
100%|██████████| 1/1 [00:00<00:00,  6.61it/s]
100%|██████████| 1/1 [00:00<00:00,  6.24it/s]
100%|██████████| 1/1 [00:00<00:00,  6.35it/s]
100%|██████████| 1/1 [00:00<00:00,  6.38it/s]
100%|██████████| 1/1 [00:00<00:00,  5.40it/s]
100%|██████████| 1/1 [00:00<00:00,  6.10it/s]
100%|██████████| 1/1 [00:00<00:00,  6.12it/s]
100%|██████████| 1/1 [00:00<00:00,  5.46it/s]
100%|██████████| 1/1 [00:00<00:00,  5.76it/s]
100%|██████████| 1/1 [00:00<00:00,  5.93it/s]
100%|██████████| 1/1 [00:00<00:00,  5.79it/s]
100%|██████████| 1/1 [00:00<00:00,  5.62it/s]
100%|██████████| 1/1 [00:00<00:00,  5.88it/s]
100%|██████████| 1/1 [00:00<00:00,  6.22it/s]
100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
100%|██████████| 1/1 [00:00<00:00,

50/63


100%|██████████| 1/1 [00:00<00:00,  6.72it/s]
100%|██████████| 1/1 [00:00<00:00,  6.18it/s]
100%|██████████| 1/1 [00:00<00:00,  6.04it/s]
100%|██████████| 1/1 [00:00<00:00,  3.92it/s]
100%|██████████| 1/1 [00:00<00:00,  4.86it/s]
100%|██████████| 1/1 [00:00<00:00,  6.57it/s]
100%|██████████| 1/1 [00:00<00:00,  6.27it/s]
100%|██████████| 1/1 [00:00<00:00,  6.83it/s]
100%|██████████| 1/1 [00:00<00:00,  5.21it/s]
100%|██████████| 1/1 [00:00<00:00,  6.51it/s]
100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
100%|██████████| 1/1 [00:00<00:00,  6.96it/s]
100%|██████████| 1/1 [00:00<00:00,  7.24it/s]
100%|██████████| 1/1 [00:00<00:00,  7.57it/s]


0/167


100%|██████████| 1/1 [00:31<00:00, 31.18s/it]
100%|██████████| 1/1 [00:31<00:00, 31.95s/it]
100%|██████████| 1/1 [00:31<00:00, 31.48s/it]
100%|██████████| 1/1 [01:14<00:00, 74.98s/it]
100%|██████████| 1/1 [00:57<00:00, 57.73s/it]
100%|██████████| 1/1 [01:25<00:00, 85.02s/it]
100%|██████████| 1/1 [00:43<00:00, 43.69s/it]
100%|██████████| 1/1 [01:27<00:00, 87.11s/it]
100%|██████████| 1/1 [00:31<00:00, 31.66s/it]
100%|██████████| 1/1 [00:39<00:00, 39.16s/it]
100%|██████████| 1/1 [01:06<00:00, 66.13s/it]
100%|██████████| 1/1 [00:42<00:00, 42.47s/it]
100%|██████████| 1/1 [00:46<00:00, 46.21s/it]
100%|██████████| 1/1 [00:49<00:00, 49.98s/it]
100%|██████████| 1/1 [00:51<00:00, 51.24s/it]
100%|██████████| 1/1 [00:49<00:00, 49.17s/it]
100%|██████████| 1/1 [00:25<00:00, 26.00s/it]
100%|██████████| 1/1 [00:52<00:00, 52.20s/it]
100%|██████████| 1/1 [00:32<00:00, 32.73s/it]
100%|██████████| 1/1 [00:49<00:00, 49.92s/it]
100%|██████████| 1/1 [01:07<00:00, 67.76s/it]
100%|██████████| 1/1 [00:36<00:00,

In [None]:
full_results_df

In [None]:
full_results_df.to_csv(rf'../log/experiment_multiclass_cirrhosis_{metric}_testsize{test_size_proportion}_augmentsample{augment_sample}.csv')