In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import pandas as pd

import sys
sys.path.append('..')

from helpers import utils, pipelines, models

from sklearn.model_selection import train_test_split

import multiprocessing

from data_augmentaion.data_augmentator import DataAugmentor 

import json

import time

### Define research parameters

In [6]:
regression_task = False
# continuous_features = []
metric = 'accuracy'
test_size_proportion = 0.2
augment_sample = 0.5

search_pipelines = pipelines.get_classification_pipelines()
search_parameters = models.parameters

settings = [
    {'method': 'random'},
    {'method': 'smote'},
    {'method': 'cf_random'},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 1}},
    {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 1}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 1}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 0.2}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 1}},
]

In [7]:
df = pd.read_csv(r"..\datasets\cirrhosis.csv")
df.drop(columns="ID",inplace=True)
target = 'Status'

In [8]:
missing_df = df.isna().sum()
dtypes_df = df.dtypes
pd.concat([missing_df, dtypes_df], axis=1)

Unnamed: 0,0,1
N_Days,0,int64
Status,0,object
Drug,106,object
Age,0,int64
Sex,0,object
Ascites,106,object
Hepatomegaly,106,object
Spiders,106,object
Edema,0,object
Bilirubin,0,float64


In [9]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_selector


numerical_imputer = SimpleImputer(strategy='mean')#.set_output(transform='pandas')
categorical_imputer = SimpleImputer(strategy='most_frequent')#.set_output(transform='pandas')

final_imputer = ColumnTransformer([
    ('numerical', numerical_imputer, make_column_selector(dtype_include=['int', 'float'])),
    ('categorical', categorical_imputer, make_column_selector(dtype_include=['object'])),
])

df = final_imputer.fit_transform(df)

In [10]:
df = pd.DataFrame(df, columns=[c.split('__')[-1] for c in final_imputer.get_feature_names_out()]).astype(dtypes_df)

In [11]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df[target] = le.fit_transform(df[target])

#### split data

In [12]:
# all data
X = df.drop(target, axis=1)
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_proportion, random_state=42)

In [13]:
continuous_features = X.select_dtypes(include=['number']).columns.tolist()
continuous_features

['N_Days',
 'Age',
 'Bilirubin',
 'Cholesterol',
 'Albumin',
 'Copper',
 'Alk_Phos',
 'SGOT',
 'Tryglicerides',
 'Platelets',
 'Prothrombin',
 'Stage']

In [14]:
display(y_train.value_counts())

0    188
2    125
1     21
Name: Status, dtype: int64

In [15]:
full_results_df = pd.DataFrame()

### Whole dataset scores

In [16]:
print(f'whole adult dataset {metric} scores:')
_, whole_scores = utils.fit_and_evaluate(X_train, y_train, X_test, y_test,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
whole_dataset_result_df = pd.DataFrame.from_dict(whole_scores, orient='index', columns=[f'full data {metric} score'])
display(whole_dataset_result_df.T)

whole adult dataset accuracy scores:


Unnamed: 0,lg,rf,xgb
full data accuracy score,0.738095,0.797619,0.761905


In [17]:
full_results_df = pd.concat([full_results_df, whole_dataset_result_df], axis=1)

### little flattening the y-curve

In [18]:
y_train_value_counts = y_train.value_counts()
display(y_train_value_counts)
classes_to_50_gap = 100 - y_train.value_counts()
classes_to_inflate = classes_to_50_gap[classes_to_50_gap>0]
df_inflate = pd.DataFrame()
for c, gap in classes_to_inflate.items():
    df_c_inflate = df[df[target]==c].sample(n=gap, replace=True, random_state=42)
    df_inflate = pd.concat([df_inflate, df_c_inflate])
df_inflate[target].value_counts()


0    188
2    125
1     21
Name: Status, dtype: int64

1    79
Name: Status, dtype: int64

In [19]:
df_full_inflate = pd.concat([df, df_inflate])
X_inflate = df_full_inflate.drop(target, axis=1)
y_inflate = df_full_inflate[target]
X_train_inflate, X_test_inflate, y_train_inflate, y_test_inflate = train_test_split(X_inflate, y_inflate, test_size=test_size_proportion, random_state=42)

In [20]:
print(f'inflate dataset {metric} scores:')
_, inflate_scores = utils.fit_and_evaluate(X_train_inflate, y_train_inflate, X_test_inflate, y_test_inflate,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
inflate_dataset_result_df = pd.DataFrame.from_dict(inflate_scores, orient='index', columns=[f'inflate data {metric} score'])
display(inflate_dataset_result_df.T)

inflate dataset accuracy scores:


Unnamed: 0,lg,rf,xgb
inflate data accuracy score,0.7,0.7,0.77


In [21]:
full_results_df = pd.concat([full_results_df, inflate_dataset_result_df], axis=1)

### running experiments

In [22]:
# best_estimators = {}
# best_scores = {}
# for i, s in enumerate(settings):
#     start = time.time()
#     print(f'{i} / {len(settings)}, {s}', end=' ')
#     augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
#                              method=s['method'], regression=regression_task,
#                              continuous_feats=continuous_features,
#                              cf_scoring = metric,
#                              kw_args=s.get('kw_args', {})
#                              )

#     X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
#     best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
#     utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
#                             search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#     result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
#                                                 orient='index',
#                                                 columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

#     X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
#     best_estimators[f'{i}'], best_scores[f'{i}'] = \
#         utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
#                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
#     result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
#                                        orient='index',
#                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
    
#     full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
#     display(full_results_df)
#     print(f'{time.time() - start} seconds for settings {i}')

best_estimators = {}
best_scores = {}
for i, s in enumerate(settings):
    start = time.time()
    print(f'{i+1} / {len(settings)}, {s}', end=' ')
    if 'cf_genetic' in s['method']:
        augmentor = DataAugmentor(X_train_inflate, y_train_inflate, X_test_inflate, y_test_inflate,
                             method=s['method'], regression=regression_task,
                             continuous_feats=continuous_features,
                             cf_scoring = metric,
                             kw_args=s.get('kw_args', {})
                             )

        X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
        best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
        utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test_inflate, y_test_inflate,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
                                                    orient='index',
                                                    columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

        X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
        best_estimators[f'{i}'], best_scores[f'{i}'] = \
            utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
                                        orient='index',
                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
        full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
        print(f'{time.time() - start} seconds for settings {i}')
    else:
        augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
                                method=s['method'], regression=regression_task,
                                continuous_feats=continuous_features,
                                cf_scoring = metric,
                                kw_args=s.get('kw_args', {})
                                )

        X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
        best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
        utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'],
                                                    orient='index',
                                                    columns=[f'{json.dumps((list(s.values())))} balanced {metric} score'])

        X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
        best_estimators[f'{i}'], best_scores[f'{i}'] = \
            utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                                search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
        result_df = pd.DataFrame.from_dict(best_scores[f'{i}'],
                                        orient='index',
                                        columns=[f'{json.dumps((list(s.values())))} {metric} score'])
        
        full_results_df = pd.concat([full_results_df, result_df_balanced, result_df], axis=1)
        print(f'{time.time() - start} seconds for settings {i}')



1 / 4, {'method': 'random'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
3.489854097366333 seconds for settings 0
2 / 4, {'method': 'smote'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
4.849139213562012 seconds for settings 1
3 / 4, {'method': 'cf_random'} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
0/63


100%|██████████| 1/1 [00:00<00:00,  7.58it/s]
100%|██████████| 1/1 [00:00<00:00,  9.92it/s]
100%|██████████| 1/1 [00:00<00:00, 10.06it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00, 10.00it/s]
100%|██████████| 1/1 [00:00<00:00, 10.05it/s]
100%|██████████| 1/1 [00:00<00:00,  9.88it/s]
100%|██████████| 1/1 [00:00<00:00, 10.09it/s]
100%|██████████| 1/1 [00:00<00:00, 11.86it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.95it/s]
100%|██████████| 1/1 [00:00<00:00, 10.07it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00, 10.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.91it/s]
100%|██████████| 1/1 [00:00<00:00, 12.07it/s]
100%|██████████| 1/1 [00:00<00:00, 11.99it/s]
100%|██████████| 1/1 [00:00<00:00, 10.01it/s]
100%|██████████| 1/1 [00:00<00:00, 10.14it/s]
100%|██████████| 1/1 [00:00<00:00,

50/63


100%|██████████| 1/1 [00:00<00:00,  5.97it/s]
100%|██████████| 1/1 [00:00<00:00,  9.97it/s]
100%|██████████| 1/1 [00:00<00:00, 10.12it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]
100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
100%|██████████| 1/1 [00:00<00:00, 11.97it/s]
100%|██████████| 1/1 [00:00<00:00, 12.24it/s]
100%|██████████| 1/1 [00:00<00:00, 11.96it/s]
100%|██████████| 1/1 [00:00<00:00, 10.01it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:00<00:00, 10.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]


0/167


100%|██████████| 1/1 [00:00<00:00,  3.95it/s]
100%|██████████| 1/1 [00:00<00:00,  3.57it/s]
100%|██████████| 1/1 [00:00<00:00,  9.93it/s]
100%|██████████| 1/1 [00:00<00:00,  7.53it/s]
100%|██████████| 1/1 [00:00<00:00,  7.51it/s]
100%|██████████| 1/1 [00:00<00:00,  3.74it/s]
100%|██████████| 1/1 [00:00<00:00,  4.28it/s]
100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
100%|██████████| 1/1 [00:00<00:00,  5.00it/s]
100%|██████████| 1/1 [00:00<00:00,  3.68it/s]
100%|██████████| 1/1 [00:00<00:00,  7.38it/s]
100%|██████████| 1/1 [00:00<00:00, 10.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.17it/s]
100%|██████████| 1/1 [00:00<00:00, 10.10it/s]
100%|██████████| 1/1 [00:00<00:00,  7.48it/s]
100%|██████████| 1/1 [00:00<00:00,  4.60it/s]
100%|██████████| 1/1 [00:00<00:00,  5.01it/s]
100%|██████████| 1/1 [00:00<00:00,  6.03it/s]
100%|██████████| 1/1 [00:00<00:00,  7.46it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  8.67it/s]
100%|██████████| 1/1 [00:00<00:00,  4.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.44it/s]
100%|██████████| 1/1 [00:00<00:00,  5.01it/s]
100%|██████████| 1/1 [00:00<00:00, 10.06it/s]
100%|██████████| 1/1 [00:00<00:00,  6.63it/s]
100%|██████████| 1/1 [00:00<00:00,  6.66it/s]
100%|██████████| 1/1 [00:00<00:00,  9.91it/s]
100%|██████████| 1/1 [00:00<00:00, 10.45it/s]
100%|██████████| 1/1 [00:00<00:00,  3.58it/s]
100%|██████████| 1/1 [00:00<00:00,  7.46it/s]
100%|██████████| 1/1 [00:00<00:00, 10.48it/s]
100%|██████████| 1/1 [00:00<00:00,  8.59it/s]
100%|██████████| 1/1 [00:00<00:00,  5.72it/s]
100%|██████████| 1/1 [00:00<00:00,  9.51it/s]
100%|██████████| 1/1 [00:00<00:00,  7.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.39it/s]
100%|██████████| 1/1 [00:00<00:00,  6.73it/s]
100%|██████████| 1/1 [00:00<00:00,  8.42it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]


50/167


100%|██████████| 1/1 [00:00<00:00,  4.81it/s]
100%|██████████| 1/1 [00:00<00:00,  4.25it/s]
100%|██████████| 1/1 [00:00<00:00,  5.92it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.45it/s]
100%|██████████| 1/1 [00:00<00:00,  8.65it/s]
100%|██████████| 1/1 [00:00<00:00,  4.43it/s]
100%|██████████| 1/1 [00:00<00:00,  2.40it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  5.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.57it/s]
100%|██████████| 1/1 [00:00<00:00,  8.57it/s]
100%|██████████| 1/1 [00:00<00:00,  6.01it/s]
100%|██████████| 1/1 [00:00<00:00,  5.75it/s]
100%|██████████| 1/1 [00:00<00:00,  3.75it/s]
100%|██████████| 1/1 [00:00<00:00,  2.76it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  2.70it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  9.22it/s]
100%|██████████| 1/1 [00:00<00:00,  8.16it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  6.66it/s]
100%|██████████| 1/1 [00:00<00:00,  7.24it/s]
100%|██████████| 1/1 [00:00<00:00,  3.66it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  6.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
100%|██████████| 1/1 [00:00<00:00,  8.07it/s]
100%|██████████| 1/1 [00:00<00:00,  5.25it/s]
100%|██████████| 1/1 [00:00<00:00,  9.66it/s]
100%|██████████| 1/1 [00:00<00:00, 11.13it/s]
100%|██████████| 1/1 [00:00<00:00,  5.06it/s]
100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:00<00:00,  7.41it/s]
100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
100%|██████████| 1/1 [00:00<00:00,  7.90it/s]
100%|██████████| 1/1 [00:00<00:00,  7.60it/s]
100%|██████████| 1/1 [00:00<00:00,  7.10it/s]
100%|██████████| 1/1 [00:00<00:00,  3.69it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.84it/s]
100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
100%|██████████| 1/1 [00:00<00:00,  6.27it/s]
100%|██████████| 1/1 [00:00<00:00,  8.19it/s]


100/167


100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.73it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  4.83it/s]
100%|██████████| 1/1 [00:00<00:00, 11.02it/s]
100%|██████████| 1/1 [00:00<00:00,  4.99it/s]
100%|██████████| 1/1 [00:00<00:00,  6.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.59it/s]
100%|██████████| 1/1 [00:00<00:00,  7.04it/s]
100%|██████████| 1/1 [00:00<00:00,  5.93it/s]
100%|██████████| 1/1 [00:00<00:00,  7.28it/s]
100%|██████████| 1/1 [00:00<00:00,  4.14it/s]
100%|██████████| 1/1 [00:00<00:00,  7.51it/s]
100%|██████████| 1/1 [00:00<00:00,  6.97it/s]
100%|██████████| 1/1 [00:00<00:00,  5.18it/s]
100%|██████████| 1/1 [00:00<00:00,  8.45it/s]
100%|██████████| 1/1 [00:00<00:00,  4.59it/s]
100%|██████████| 1/1 [00:00<00:00,  7.54it/s]
100%|██████████| 1/1 [00:00<00:00, 10.06it/s]
100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  5.03it/s]
100%|██████████| 1/1 [00:00<00:00,  7.61it/s]
100%|██████████| 1/1 [00:00<00:00,

No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  1.97it/s]
100%|██████████| 1/1 [00:00<00:00,  7.26it/s]
100%|██████████| 1/1 [00:00<00:00,  5.42it/s]
100%|██████████| 1/1 [00:00<00:00, 10.17it/s]
100%|██████████| 1/1 [00:00<00:00, 12.10it/s]


150/167


100%|██████████| 1/1 [00:00<00:00,  7.12it/s]
100%|██████████| 1/1 [00:00<00:00,  7.19it/s]
100%|██████████| 1/1 [00:00<00:00, 10.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.99it/s]
100%|██████████| 1/1 [00:31<00:00, 31.37s/it]
100%|██████████| 1/1 [00:00<00:00,  5.01it/s]
100%|██████████| 1/1 [00:00<00:00,  6.46it/s]
100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
100%|██████████| 1/1 [00:00<00:00,  7.76it/s]
100%|██████████| 1/1 [00:00<00:00,  2.58it/s]


No Counterfactuals found for the given configuration, perhaps try with different parameters... ; total time taken: 00 min 00 sec


100%|██████████| 1/1 [00:00<00:00,  6.29it/s]
100%|██████████| 1/1 [00:00<00:00,  8.51it/s]
100%|██████████| 1/1 [00:00<00:00,  7.54it/s]
100%|██████████| 1/1 [00:00<00:00,  7.50it/s]
100%|██████████| 1/1 [00:00<00:00,  6.65it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  5.46it/s]
100%|██████████| 1/1 [00:00<00:00,  6.21it/s]


0/167


100%|██████████| 1/1 [00:00<00:00,  6.71it/s]
100%|██████████| 1/1 [00:00<00:00,  7.47it/s]
100%|██████████| 1/1 [00:00<00:00,  8.05it/s]
100%|██████████| 1/1 [00:00<00:00,  5.27it/s]
100%|██████████| 1/1 [00:00<00:00,  6.64it/s]
100%|██████████| 1/1 [00:00<00:00,  7.17it/s]
100%|██████████| 1/1 [00:00<00:00,  7.93it/s]
100%|██████████| 1/1 [00:00<00:00,  8.64it/s]
100%|██████████| 1/1 [00:00<00:00,  8.54it/s]
100%|██████████| 1/1 [00:00<00:00,  8.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.17it/s]
100%|██████████| 1/1 [00:00<00:00,  8.68it/s]
100%|██████████| 1/1 [00:00<00:00, 10.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]
100%|██████████| 1/1 [00:33<00:00, 33.88s/it]
100%|██████████| 1/1 [00:00<00:00,  9.96it/s]
100%|██████████| 1/1 [00:00<00:00, 10.02it/s]
100%|██████████| 1/1 [00:00<00:00,  8.42it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  4.25it/s]
100%|██████████| 1/1 [00:00<00:00,

50/167


100%|██████████| 1/1 [00:00<00:00,  3.93it/s]
100%|██████████| 1/1 [00:00<00:00,  6.27it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  8.57it/s]
100%|██████████| 1/1 [00:00<00:00,  7.87it/s]
100%|██████████| 1/1 [00:00<00:00,  7.46it/s]
100%|██████████| 1/1 [00:00<00:00,  7.56it/s]
100%|██████████| 1/1 [00:00<00:00,  7.69it/s]
100%|██████████| 1/1 [00:00<00:00,  5.19it/s]
100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.51it/s]
100%|██████████| 1/1 [00:00<00:00,  6.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.55it/s]
100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,  3.74it/s]
100%|██████████| 1/1 [00:00<00:00,  6.01it/s]
100%|██████████| 1/1 [00:00<00:00,  7.49it/s]
100%|██████████| 1/1 [00:00<00:00,  7.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.35it/s]
100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,

100/167


100%|██████████| 1/1 [00:00<00:00,  7.51it/s]
100%|██████████| 1/1 [00:00<00:00,  7.55it/s]
100%|██████████| 1/1 [00:00<00:00,  4.61it/s]
100%|██████████| 1/1 [00:00<00:00,  5.45it/s]
100%|██████████| 1/1 [00:00<00:00,  5.32it/s]
100%|██████████| 1/1 [00:00<00:00,  8.50it/s]
100%|██████████| 1/1 [00:00<00:00,  8.30it/s]
100%|██████████| 1/1 [00:00<00:00,  9.44it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  7.75it/s]
100%|██████████| 1/1 [00:00<00:00,  7.48it/s]
100%|██████████| 1/1 [00:00<00:00,  8.61it/s]
100%|██████████| 1/1 [00:00<00:00, 10.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.57it/s]
100%|██████████| 1/1 [00:00<00:00,  8.27it/s]
100%|██████████| 1/1 [00:00<00:00,  4.71it/s]
100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
100%|██████████| 1/1 [00:00<00:00,  6.65it/s]
100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,

150/167


100%|██████████| 1/1 [00:00<00:00,  8.58it/s]
100%|██████████| 1/1 [00:00<00:00,  9.64it/s]
100%|██████████| 1/1 [00:00<00:00,  8.52it/s]
100%|██████████| 1/1 [00:00<00:00, 10.07it/s]
100%|██████████| 1/1 [00:00<00:00,  9.98it/s]
100%|██████████| 1/1 [00:00<00:00, 10.13it/s]
100%|██████████| 1/1 [00:00<00:00,  8.52it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  7.50it/s]
100%|██████████| 1/1 [00:00<00:00,  8.45it/s]
100%|██████████| 1/1 [00:00<00:00,  4.27it/s]
100%|██████████| 1/1 [00:00<00:00,  5.43it/s]
100%|██████████| 1/1 [00:00<00:00,  7.57it/s]
100%|██████████| 1/1 [00:00<00:00,  6.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]
100%|██████████| 1/1 [00:00<00:00,  8.57it/s]
100%|██████████| 1/1 [00:00<00:00,  8.56it/s]


253.12092185020447 seconds for settings 2
4 / 4, {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}} categorical_feats: ['Drug', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema']
0/66


100%|██████████| 1/1 [00:00<00:00,  4.33it/s]
100%|██████████| 1/1 [00:00<00:00,  5.00it/s]
100%|██████████| 1/1 [00:00<00:00,  5.90it/s]
100%|██████████| 1/1 [00:00<00:00,  4.88it/s]
100%|██████████| 1/1 [00:00<00:00,  5.34it/s]
100%|██████████| 1/1 [00:00<00:00,  4.61it/s]
100%|██████████| 1/1 [00:00<00:00,  3.32it/s]
100%|██████████| 1/1 [00:00<00:00,  5.59it/s]
100%|██████████| 1/1 [00:00<00:00,  6.00it/s]
100%|██████████| 1/1 [00:00<00:00,  6.31it/s]
100%|██████████| 1/1 [00:00<00:00,  7.33it/s]
100%|██████████| 1/1 [00:00<00:00,  6.97it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  6.88it/s]
100%|██████████| 1/1 [00:00<00:00,  7.55it/s]
100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
100%|██████████| 1/1 [00:00<00:00,  6.85it/s]
100%|██████████| 1/1 [00:00<00:00,  7.48it/s]
100%|██████████| 1/1 [00:00<00:00,  7.77it/s]
100%|██████████| 1/1 [00:00<00:00,  7.45it/s]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s]
100%|██████████| 1/1 [00:00<00:00,

50/66


100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
100%|██████████| 1/1 [00:00<00:00,  6.69it/s]
100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
100%|██████████| 1/1 [00:00<00:00,  6.57it/s]
100%|██████████| 1/1 [00:00<00:00,  6.84it/s]
100%|██████████| 1/1 [00:00<00:00,  5.98it/s]
100%|██████████| 1/1 [00:00<00:00,  7.53it/s]
100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
100%|██████████| 1/1 [00:00<00:00,  4.63it/s]
100%|██████████| 1/1 [00:00<00:00,  6.26it/s]
100%|██████████| 1/1 [00:00<00:00,  6.65it/s]
100%|██████████| 1/1 [00:00<00:00,  6.64it/s]
100%|██████████| 1/1 [00:00<00:00,  7.51it/s]
100%|██████████| 1/1 [00:00<00:00,  6.68it/s]
100%|██████████| 1/1 [00:00<00:00,  6.68it/s]
100%|██████████| 1/1 [00:00<00:00,  7.47it/s]


0/116


100%|██████████| 1/1 [00:00<00:00,  1.89it/s]
100%|██████████| 1/1 [00:00<00:00,  2.71it/s]
100%|██████████| 1/1 [00:00<00:00,  3.41it/s]
100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
100%|██████████| 1/1 [00:00<00:00,  2.29it/s]
100%|██████████| 1/1 [00:00<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s]
100%|██████████| 1/1 [00:00<00:00,  1.25it/s]
100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
100%|██████████| 1/1 [00:00<00:00,  1.19it/s]
100%|██████████| 1/1 [00:00<00:00,  3.47it/s]
100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
100%|██████████| 1/1 [00:00<00:00,  2.84it/s]
100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 1/1 [00:00<00:00,  1.71it/s]
100%|██████████| 1/1 [00:00<00:00,  2.86it/s]
100%|██████████| 1/1 [00:00<00:00,  3.73it/s]
100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
100%|██████████| 1/1 [00:00<00:00,

50/116


100%|██████████| 1/1 [00:00<00:00,  2.51it/s]
100%|██████████| 1/1 [00:00<00:00,  2.62it/s]
100%|██████████| 1/1 [00:00<00:00,  1.78it/s]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
100%|██████████| 1/1 [00:00<00:00,  3.04it/s]
100%|██████████| 1/1 [00:00<00:00,  2.01it/s]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
100%|██████████| 1/1 [00:00<00:00,  2.40it/s]
100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
100%|██████████| 1/1 [00:00<00:00,  1.77it/s]
100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]
100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
100%|██████████| 1/1 [00:00<00:00,  1.93it/s]
100%|██████████| 1/1 [00:00<00:00,  2.20it/s]
100%|██████████| 1/1 [00:00<00:00,  1.54it/s]
100%|██████████| 1/1 [00:01<00:00,  1.10s/it]
100%|██████████| 1/1 [00:01<00:00,  1.04s/it]
100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
100%|██████████| 1/1 [00:00<00:00,

100/116


100%|██████████| 1/1 [00:00<00:00,  2.63it/s]
100%|██████████| 1/1 [00:05<00:00,  5.57s/it]
100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
100%|██████████| 1/1 [00:00<00:00,  2.03it/s]
100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 1/1 [00:00<00:00,  3.09it/s]
100%|██████████| 1/1 [00:00<00:00,  2.37it/s]
100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
100%|██████████| 1/1 [00:00<00:00,  1.51it/s]
100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
100%|██████████| 1/1 [00:00<00:00,  2.38it/s]


0/198


100%|██████████| 1/1 [00:00<00:00,  5.19it/s]
100%|██████████| 1/1 [00:00<00:00,  6.26it/s]
100%|██████████| 1/1 [00:00<00:00,  5.67it/s]
100%|██████████| 1/1 [00:00<00:00,  6.41it/s]
100%|██████████| 1/1 [00:00<00:00,  7.68it/s]
100%|██████████| 1/1 [00:00<00:00,  7.68it/s]
100%|██████████| 1/1 [00:00<00:00,  4.80it/s]
100%|██████████| 1/1 [00:00<00:00,  8.45it/s]
100%|██████████| 1/1 [00:00<00:00,  6.90it/s]
100%|██████████| 1/1 [00:00<00:00,  7.77it/s]
100%|██████████| 1/1 [00:00<00:00,  3.62it/s]
100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
100%|██████████| 1/1 [00:00<00:00,  6.58it/s]
100%|██████████| 1/1 [00:00<00:00,  7.02it/s]
100%|██████████| 1/1 [00:00<00:00,  6.75it/s]
100%|██████████| 1/1 [00:00<00:00,  7.55it/s]
100%|██████████| 1/1 [00:00<00:00,  7.65it/s]
100%|██████████| 1/1 [00:00<00:00,  7.14it/s]
100%|██████████| 1/1 [00:00<00:00,  5.70it/s]
100%|██████████| 1/1 [00:00<00:00,  7.63it/s]
100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
100%|██████████| 1/1 [00:00<00:00,

50/198


100%|██████████| 1/1 [00:01<00:00,  1.01s/it]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
100%|██████████| 1/1 [00:00<00:00,  7.10it/s]
100%|██████████| 1/1 [00:00<00:00,  7.36it/s]
100%|██████████| 1/1 [00:00<00:00,  3.02it/s]
100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
100%|██████████| 1/1 [00:00<00:00,  7.98it/s]
100%|██████████| 1/1 [00:01<00:00,  1.34s/it]
100%|██████████| 1/1 [00:00<00:00,  3.38it/s]
100%|██████████| 1/1 [00:00<00:00,  6.64it/s]
100%|██████████| 1/1 [00:00<00:00,  8.72it/s]
100%|██████████| 1/1 [00:00<00:00,  7.83it/s]
100%|██████████| 1/1 [00:00<00:00,  7.83it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  3.70it/s]
100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
100%|██████████| 1/1 [00:00<00:00,  8.44it/s]
100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
100%|██████████| 1/1 [00:00<00:00,  1.72it/s]
100%|██████████| 1/1 [00:00<00:00,

100/198


100%|██████████| 1/1 [00:00<00:00,  1.23it/s]
100%|██████████| 1/1 [00:01<00:00,  1.74s/it]
100%|██████████| 1/1 [00:00<00:00,  6.32it/s]
100%|██████████| 1/1 [00:00<00:00,  7.95it/s]
100%|██████████| 1/1 [00:00<00:00,  4.55it/s]
100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
100%|██████████| 1/1 [00:00<00:00,  7.69it/s]
100%|██████████| 1/1 [00:00<00:00,  7.60it/s]
100%|██████████| 1/1 [00:00<00:00,  6.29it/s]
100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
100%|██████████| 1/1 [00:00<00:00,  6.14it/s]
100%|██████████| 1/1 [00:00<00:00,  3.83it/s]
100%|██████████| 1/1 [00:00<00:00,  2.14it/s]
100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
100%|██████████| 1/1 [00:00<00:00,  4.30it/s]
100%|██████████| 1/1 [00:00<00:00,  4.21it/s]
100%|██████████| 1/1 [00:00<00:00,  4.22it/s]
100%|██████████| 1/1 [00:00<00:00,  4.32it/s]
100%|██████████| 1/1 [00:00<00:00,  5.64it/s]
100%|██████████| 1/1 [00:00<00:00,  4.36it/s]
100%|██████████| 1/1 [00:00<00:00,

150/198


100%|██████████| 1/1 [00:00<00:00,  7.70it/s]
100%|██████████| 1/1 [00:01<00:00,  1.16s/it]
100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
100%|██████████| 1/1 [00:00<00:00,  4.42it/s]
100%|██████████| 1/1 [00:00<00:00,  6.07it/s]
100%|██████████| 1/1 [00:00<00:00,  8.37it/s]
100%|██████████| 1/1 [00:00<00:00,  7.93it/s]
100%|██████████| 1/1 [00:00<00:00,  8.30it/s]
100%|██████████| 1/1 [00:00<00:00,  5.79it/s]
100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
100%|██████████| 1/1 [00:00<00:00,  1.31it/s]
100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
100%|██████████| 1/1 [00:00<00:00,  1.89it/s]
100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
100%|██████████| 1/1 [00:00<00:00,  4.82it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  4.43it/s]
100%|██████████| 1/1 [00:00<00:00,  4.33it/s]
100%|██████████| 1/1 [00:00<00:00,  4.22it/s]
100%|██████████| 1/1 [00:00<00:00,  4.78it/s]
100%|██████████| 1/1 [00:00<00:00,

174.6029670238495 seconds for settings 3


In [23]:
full_results_df

Unnamed: 0,full data accuracy score,inflate data accuracy score,"[""random""] balanced accuracy score","[""random""] accuracy score","[""smote""] balanced accuracy score","[""smote""] accuracy score","[""cf_random""] balanced accuracy score","[""cf_random""] accuracy score","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 1}] balanced accuracy score","[""cf_genetic"", {""proximity_weight"": 5, ""diversity_weight"": 0.2, ""sparsity_weight"": 0.2, ""total_CFs"": 1, ""desired_class"": 2}] accuracy score"
lg,0.738095,0.7,0.702381,0.761905,0.642857,0.642857,0.702381,0.77381,0.69,0.77381
rf,0.797619,0.7,0.75,0.809524,0.761905,0.761905,0.690476,0.785714,0.74,0.857143
xgb,0.761905,0.77,0.75,0.75,0.738095,0.738095,0.77381,0.77381,0.81,0.892857


In [24]:
full_results_df.to_csv(rf'../log/experiment_multiclass_cirrhosis_{metric}_testsize{test_size_proportion}_augmentsample{augment_sample}.csv')