In [43]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
%pip install lightgbm

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [45]:
import pandas as pd

import sys
sys.path.append('..')

from helpers import utils, pipelines, models

from sklearn.model_selection import train_test_split

import multiprocessing

from data_augmentaion.data_augmentator import DataAugmentor 

import json

import time

### Define research parameters

In [46]:
regression_task = False
# continuous_features = []
metric = 'accuracy'
test_size_proportion = 0.2
augment_sample = 0.5
metrics = ['f1_weighted', 'accuracy', 'balanced_accuracy', 'precision_weighted', 'recall_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'] 

search_pipelines = pipelines.get_classification_pipelines()
search_parameters = models.parameters

settings = [
    {'method': 'random'},
    {'method': 'smote'},
    {'method': 'cf_random'},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 0.2, 'diversity_weight': 5, 'sparsity_weight': 1}},
    {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 1}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 0.2}},
    # {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 1, 'diversity_weight': 1, 'sparsity_weight': 1}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 0.2}},
    # {'method': 'cf_kdtree', 'kw_args': {'sparsity_weight': 1}},
]
name_mapping = {
    'random': 'Random',
    'smote': 'SMOTE',
    'cf_random': 'CF Random',
    'cf_genetic': 'CF Genetic',
}


In [47]:
from sklearn.datasets import make_classification

# Define the parameters of the synthetic dataset
n_samples = 600  # Total number of samples
n_features = 12   # Number of features
n_classes = 3    # Number of classes
class_weights = [0.5, 0.2, 0.3]  # Class imbalance ratio

# Create the imbalanced dataset
X, y = make_classification(n_samples=n_samples, 
                           n_features=n_features,
                           n_informative=3,
                           n_classes=n_classes,
                           n_clusters_per_class=2,
                           weights=class_weights,
                           class_sep=0.5, # 1
                           random_state=42)
X = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
y = pd.Series(y, name="target")

In [48]:
continuous_features = X.columns.tolist()

#### split data

In [49]:
# all data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size_proportion, random_state=42)

In [50]:
display(y_train.value_counts())

0    250
2    142
1     88
Name: target, dtype: int64

In [51]:
full_results_df = pd.DataFrame()

### Whole dataset scores

In [52]:
print(f'whole adult dataset {metric} scores:')
whole_best_ests, whole_scores = utils.fit_and_evaluate(X_train, y_train, X_test, y_test,
                    search_estimators=search_pipelines, search_params=search_parameters, scoring=metric)
whole_dataset_result_df = pd.DataFrame.from_dict(whole_scores, orient='index')
whole_dataset_result_df.columns = pd.MultiIndex.from_product([['whole']] + [whole_dataset_result_df.columns])
whole_dataset_result_df



whole adult dataset accuracy scores:


Unnamed: 0_level_0,whole
Unnamed: 0_level_1,accuracy
lg,0.583333
rf,0.566667
xgb,0.616667


In [53]:
full_results_df = pd.concat([full_results_df, whole_dataset_result_df], axis=1)

### running experiments

In [54]:
best_estimators = {}
best_scores = {}
total_time = time.time()
for i, s in enumerate(settings):
    start = time.time()
    print(f'{i} / {len(settings)}, {s}', end=' ')
    augmentor = DataAugmentor(X_train, y_train, X_test, y_test,
                             method=s['method'], regression=regression_task,
                             continuous_feats=continuous_features,
                             cf_scoring = metric,
                             kw_args=s.get('kw_args', {})
                             )

    X_train_augmented_balanced, y_train_augmented_balanced = augmentor.augment(balance=True)
    best_estimators[f'{i}_balanced'], best_scores[f'{i}_balanced'] = \
     utils.fit_and_evaluate(X_train_augmented_balanced, y_train_augmented_balanced, X_test, y_test,
                            search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
    result_df_balanced = pd.DataFrame.from_dict(best_scores[f'{i}_balanced'], orient='index')
    result_df_balanced.columns = pd.MultiIndex.from_product([[f'{json.dumps((list(s.values())))} balanced']] + [result_df_balanced.columns])


    X_train_augmented, y_train_augmented = augmentor.augment(balance=False, size=augment_sample)
    best_estimators[f'{i}'], best_scores[f'{i}'] = \
        utils.fit_and_evaluate(X_train_augmented, y_train_augmented, X_test, y_test,
                               search_estimators=search_pipelines, search_params=search_parameters, scoring=metrics)
    result_df = pd.DataFrame.from_dict(best_scores[f'{i}'], orient='index')
    result_df.columns = pd.MultiIndex.from_product([[f'{(list(s.values())[0])}']] + [result_df.columns])
    # print(list(s.values())[0])

    full_results_df = pd.concat([full_results_df, result_df], axis=1)
    print(f'{time.time() - start} seconds for settings {i}')
print(f'\nTotal time: {time.time() - total_time}')


0 / 4, {'method': 'random'} 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

6.30389404296875 seconds for settings 0
1 / 4, {'method': 'smote'} 6.310122966766357 seconds for settings 1
2 / 4, {'method': 'cf_random'} model for cf accuracy score: {'cf': {'accuracy': 0.6}}
0/108


100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  9.16it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,

50/108


100%|██████████| 1/1 [00:00<00:00,  8.88it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  8.85it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.11it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.85it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,

100/108


100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]


0/162


100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.11it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.32it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  9.14it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,

50/162


100%|██████████| 1/1 [00:00<00:00,  8.73it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.82it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:17<00:00, 17.78s/it]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  9.11it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,

100/162


100%|██████████| 1/1 [00:00<00:00,  8.80it/s]
100%|██████████| 1/1 [00:00<00:00,  9.09it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.14it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.11it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,

150/162


100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  8.85it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  8.85it/s]
100%|██████████| 1/1 [00:00<00:00,  8.86it/s]


model for cf accuracy score: {'cf': {'accuracy': 0.6}}
0/240


100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  8.82it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  9.19it/s]
100%|██████████| 1/1 [00:00<00:00,  8.86it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  8.84it/s]
100%|██████████| 1/1 [00:00<00:00,  9.13it/s]
100%|██████████| 1/1 [00:00<00:00,  9.21it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,

50/240


100%|██████████| 1/1 [00:00<00:00,  9.09it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
100%|██████████| 1/1 [00:00<00:00,  9.23it/s]
100%|██████████| 1/1 [00:00<00:00,  9.19it/s]
100%|██████████| 1/1 [00:00<00:00,  9.13it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  8.86it/s]
100%|██████████| 1/1 [00:00<00:00,  9.14it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.45it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,

100/240


100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
100%|██████████| 1/1 [00:00<00:00,  9.12it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.93it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
100%|██████████| 1/1 [00:17<00:00, 17.73s/it]
100%|██████████| 1/1 [00:00<00:00,  9.11it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  9.01it/s]
100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,

150/240


100%|██████████| 1/1 [00:00<00:00,  8.89it/s]
100%|██████████| 1/1 [00:00<00:00,  8.88it/s]
100%|██████████| 1/1 [00:00<00:00,  9.03it/s]
100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
100%|██████████| 1/1 [00:00<00:00,  8.91it/s]
100%|██████████| 1/1 [00:00<00:00,  9.16it/s]
100%|██████████| 1/1 [00:00<00:00,  8.99it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:17<00:00, 17.75s/it]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.82it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,  8.87it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  9.07it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,

200/240


100%|██████████| 1/1 [00:00<00:00,  8.95it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  9.04it/s]
100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
100%|██████████| 1/1 [00:00<00:00,  8.92it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.90it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.02it/s]
100%|██████████| 1/1 [00:00<00:00,  8.96it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
100%|██████████| 1/1 [00:00<00:00,  8.88it/s]
100%|██████████| 1/1 [00:00<00:00,  9.09it/s]
100%|██████████| 1/1 [00:00<00:00,  9.10it/s]
100%|██████████| 1/1 [00:00<00:00,  8.94it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.09it/s]
100%|██████████| 1/1 [00:00<00:00,  8.98it/s]
100%|██████████| 1/1 [00:00<00:00,  9.08it/s]
100%|██████████| 1/1 [00:00<00:00,

326.32352209091187 seconds for settings 2
3 / 4, {'method': 'cf_genetic', 'kw_args': {'proximity_weight': 5, 'diversity_weight': 0.2, 'sparsity_weight': 0.2}} model for cf accuracy score: {'cf': {'accuracy': 0.6}}
0/108


100%|██████████| 1/1 [00:00<00:00, 13.99it/s]
100%|██████████| 1/1 [00:00<00:00, 14.22it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.65it/s]
100%|██████████| 1/1 [00:00<00:00, 14.16it/s]
100%|██████████| 1/1 [00:00<00:00, 14.74it/s]
100%|██████████| 1/1 [00:00<00:00, 14.88it/s]
100%|██████████| 1/1 [00:00<00:00, 14.44it/s]
100%|██████████| 1/1 [00:00<00:00, 14.43it/s]
100%|██████████| 1/1 [00:00<00:00, 14.21it/s]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
100%|██████████| 1/1 [00:00<00:00, 14.83it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.56it/s]
100%|██████████| 1/1 [00:00<00:00, 14.26it/s]
100%|██████████| 1/1 [00:00<00:00, 14.28it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.70it/s]
100%|██████████| 1/1 [00:00<00:00, 14.28it/s]
100%|██████████| 1/1 [00:00<00:00, 14.53it/s]
100%|██████████| 1/1 [00:00<00:00, 14.02it/s]
100%|██████████| 1/1 [00:00<00:00,

50/108


100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 14.32it/s]
100%|██████████| 1/1 [00:00<00:00, 14.76it/s]
100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 14.56it/s]
100%|██████████| 1/1 [00:00<00:00, 14.58it/s]
100%|██████████| 1/1 [00:00<00:00, 14.13it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 11.38it/s]
100%|██████████| 1/1 [00:00<00:00, 13.80it/s]
100%|██████████| 1/1 [00:00<00:00, 14.60it/s]
100%|██████████| 1/1 [00:00<00:00, 14.65it/s]
100%|██████████| 1/1 [00:00<00:00, 14.29it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.65it/s]
100%|██████████| 1/1 [00:00<00:00, 14.53it/s]
100%|██████████| 1/1 [00:00<00:00, 10.73it/s]
100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 13.92it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00,

100/108


100%|██████████| 1/1 [00:00<00:00, 14.14it/s]
100%|██████████| 1/1 [00:00<00:00, 14.67it/s]
100%|██████████| 1/1 [00:00<00:00, 14.17it/s]
100%|██████████| 1/1 [00:00<00:00, 14.86it/s]
100%|██████████| 1/1 [00:00<00:00, 14.39it/s]
100%|██████████| 1/1 [00:00<00:00, 14.23it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.42it/s]


0/162


100%|██████████| 1/1 [00:00<00:00, 13.97it/s]
100%|██████████| 1/1 [00:00<00:00, 14.68it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.55it/s]
100%|██████████| 1/1 [00:00<00:00, 14.42it/s]
100%|██████████| 1/1 [00:00<00:00, 14.17it/s]
100%|██████████| 1/1 [00:00<00:00, 14.43it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.68it/s]
100%|██████████| 1/1 [00:00<00:00, 14.21it/s]
100%|██████████| 1/1 [00:00<00:00, 14.59it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.57it/s]
100%|██████████| 1/1 [00:00<00:00, 14.22it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 14.80it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.13it/s]
100%|██████████| 1/1 [00:00<00:00, 14.39it/s]
100%|██████████| 1/1 [00:00<00:00, 14.18it/s]
100%|██████████| 1/1 [00:00<00:00,

50/162


100%|██████████| 1/1 [00:00<00:00, 14.11it/s]
100%|██████████| 1/1 [00:00<00:00, 14.58it/s]
100%|██████████| 1/1 [00:00<00:00, 14.96it/s]
100%|██████████| 1/1 [00:00<00:00, 14.56it/s]
100%|██████████| 1/1 [00:00<00:00, 14.35it/s]
100%|██████████| 1/1 [00:00<00:00, 14.44it/s]
100%|██████████| 1/1 [00:00<00:00, 14.72it/s]
100%|██████████| 1/1 [00:00<00:00, 13.97it/s]
100%|██████████| 1/1 [00:00<00:00, 14.23it/s]
100%|██████████| 1/1 [00:00<00:00, 14.26it/s]
100%|██████████| 1/1 [00:00<00:00, 14.70it/s]
100%|██████████| 1/1 [00:00<00:00, 14.43it/s]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
100%|██████████| 1/1 [00:00<00:00, 14.23it/s]
100%|██████████| 1/1 [00:00<00:00, 14.19it/s]
100%|██████████| 1/1 [00:00<00:00, 14.41it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 13.94it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 14.43it/s]
100%|██████████| 1/1 [00:00<00:00,

100/162


100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 13.70it/s]
100%|██████████| 1/1 [00:00<00:00, 14.27it/s]
100%|██████████| 1/1 [00:00<00:00, 14.20it/s]
100%|██████████| 1/1 [00:00<00:00, 14.28it/s]
100%|██████████| 1/1 [00:00<00:00, 14.61it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.63it/s]
100%|██████████| 1/1 [00:00<00:00, 14.17it/s]
100%|██████████| 1/1 [00:00<00:00, 14.24it/s]
100%|██████████| 1/1 [00:00<00:00, 14.52it/s]
100%|██████████| 1/1 [00:00<00:00, 14.41it/s]
100%|██████████| 1/1 [00:00<00:00, 14.21it/s]
100%|██████████| 1/1 [00:00<00:00, 14.60it/s]
100%|██████████| 1/1 [00:00<00:00, 14.54it/s]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
100%|██████████| 1/1 [00:00<00:00, 14.53it/s]
100%|██████████| 1/1 [00:00<00:00, 14.68it/s]
100%|██████████| 1/1 [00:00<00:00, 14.98it/s]
100%|██████████| 1/1 [00:00<00:00, 14.68it/s]
100%|██████████| 1/1 [00:00<00:00, 14.63it/s]
100%|██████████| 1/1 [00:00<00:00,

150/162


100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.20it/s]
100%|██████████| 1/1 [00:00<00:00, 14.62it/s]
100%|██████████| 1/1 [00:00<00:00, 14.39it/s]
100%|██████████| 1/1 [00:00<00:00, 14.21it/s]
100%|██████████| 1/1 [00:00<00:00, 14.60it/s]
100%|██████████| 1/1 [00:00<00:00, 14.20it/s]
100%|██████████| 1/1 [00:00<00:00, 14.49it/s]
100%|██████████| 1/1 [00:00<00:00, 14.27it/s]
100%|██████████| 1/1 [00:00<00:00, 14.06it/s]


model for cf accuracy score: {'cf': {'accuracy': 0.6}}
0/240


100%|██████████| 1/1 [00:00<00:00, 14.14it/s]
100%|██████████| 1/1 [00:00<00:00, 14.55it/s]
100%|██████████| 1/1 [00:00<00:00, 14.24it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 14.32it/s]
100%|██████████| 1/1 [00:00<00:00, 13.99it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00, 14.52it/s]
100%|██████████| 1/1 [00:00<00:00, 14.07it/s]
100%|██████████| 1/1 [00:00<00:00, 14.99it/s]
100%|██████████| 1/1 [00:00<00:00, 14.16it/s]
100%|██████████| 1/1 [00:00<00:00, 14.94it/s]
100%|██████████| 1/1 [00:00<00:00, 14.61it/s]
100%|██████████| 1/1 [00:00<00:00, 14.73it/s]
100%|██████████| 1/1 [00:00<00:00, 13.70it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
100%|██████████| 1/1 [00:00<00:00, 14.62it/s]
100%|██████████| 1/1 [00:00<00:00, 14.57it/s]
100%|██████████| 1/1 [00:00<00:00, 14.44it/s]
100%|██████████| 1/1 [00:00<00:00,

50/240


100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.05it/s]
100%|██████████| 1/1 [00:00<00:00, 14.04it/s]
100%|██████████| 1/1 [00:00<00:00, 14.61it/s]
100%|██████████| 1/1 [00:00<00:00, 14.52it/s]
100%|██████████| 1/1 [00:00<00:00, 14.49it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00, 14.56it/s]
100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
100%|██████████| 1/1 [00:00<00:00, 14.69it/s]
100%|██████████| 1/1 [00:00<00:00, 14.38it/s]
100%|██████████| 1/1 [00:00<00:00, 14.41it/s]
100%|██████████| 1/1 [00:00<00:00, 14.44it/s]
100%|██████████| 1/1 [00:00<00:00, 14.26it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.25it/s]
100%|██████████| 1/1 [00:00<00:00, 14.62it/s]
100%|██████████| 1/1 [00:00<00:00, 14.21it/s]
100%|██████████| 1/1 [00:00<00:00, 14.50it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00,

100/240


100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 14.42it/s]
100%|██████████| 1/1 [00:00<00:00, 14.55it/s]
100%|██████████| 1/1 [00:00<00:00, 14.49it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.30it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00, 14.69it/s]
100%|██████████| 1/1 [00:00<00:00, 14.50it/s]
100%|██████████| 1/1 [00:00<00:00, 14.67it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.77it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.55it/s]
100%|██████████| 1/1 [00:00<00:00, 14.30it/s]
100%|██████████| 1/1 [00:00<00:00, 14.57it/s]
100%|██████████| 1/1 [00:00<00:00, 14.87it/s]
100%|██████████| 1/1 [00:00<00:00, 14.54it/s]
100%|██████████| 1/1 [00:00<00:00, 14.30it/s]
100%|██████████| 1/1 [00:00<00:00, 14.37it/s]
100%|██████████| 1/1 [00:00<00:00,

150/240


100%|██████████| 1/1 [00:00<00:00, 14.13it/s]
100%|██████████| 1/1 [00:00<00:00, 14.46it/s]
100%|██████████| 1/1 [00:00<00:00, 14.30it/s]
100%|██████████| 1/1 [00:00<00:00, 15.01it/s]
100%|██████████| 1/1 [00:00<00:00, 14.85it/s]
100%|██████████| 1/1 [00:00<00:00, 14.58it/s]
100%|██████████| 1/1 [00:00<00:00, 14.33it/s]
100%|██████████| 1/1 [00:00<00:00, 14.51it/s]
100%|██████████| 1/1 [00:00<00:00, 14.32it/s]
100%|██████████| 1/1 [00:00<00:00, 14.43it/s]
100%|██████████| 1/1 [00:00<00:00, 14.91it/s]
100%|██████████| 1/1 [00:00<00:00, 14.24it/s]
100%|██████████| 1/1 [00:00<00:00, 14.66it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.50it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.47it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.35it/s]
100%|██████████| 1/1 [00:00<00:00, 14.45it/s]
100%|██████████| 1/1 [00:00<00:00, 14.44it/s]
100%|██████████| 1/1 [00:00<00:00,

200/240


100%|██████████| 1/1 [00:00<00:00, 14.73it/s]
100%|██████████| 1/1 [00:00<00:00, 14.02it/s]
100%|██████████| 1/1 [00:00<00:00, 15.02it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.38it/s]
100%|██████████| 1/1 [00:00<00:00, 14.14it/s]
100%|██████████| 1/1 [00:00<00:00, 14.84it/s]
100%|██████████| 1/1 [00:00<00:00, 14.14it/s]
100%|██████████| 1/1 [00:00<00:00, 14.18it/s]
100%|██████████| 1/1 [00:00<00:00, 14.40it/s]
100%|██████████| 1/1 [00:00<00:00, 14.20it/s]
100%|██████████| 1/1 [00:00<00:00, 14.77it/s]
100%|██████████| 1/1 [00:00<00:00, 14.77it/s]
100%|██████████| 1/1 [00:00<00:00, 14.59it/s]
100%|██████████| 1/1 [00:00<00:00, 14.34it/s]
100%|██████████| 1/1 [00:00<00:00, 14.28it/s]
100%|██████████| 1/1 [00:00<00:00, 13.99it/s]
100%|██████████| 1/1 [00:00<00:00, 14.35it/s]
100%|██████████| 1/1 [00:00<00:00, 14.41it/s]
100%|██████████| 1/1 [00:00<00:00, 14.52it/s]
100%|██████████| 1/1 [00:00<00:00, 14.18it/s]
100%|██████████| 1/1 [00:00<00:00,

198.30402612686157 seconds for settings 3

Total time: 537.2422549724579


In [55]:
full_results_df

Unnamed: 0_level_0,whole,random,random,random,random,random,random,random,smote,smote,...,cf_random,cf_random,cf_random,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_genetic
Unnamed: 0_level_1,accuracy,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted,f1_weighted,accuracy,...,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted
lg,0.583333,0.502033,0.583333,0.508333,0.468042,0.583333,0.709737,0.700104,0.535022,0.533333,...,0.541667,0.735865,0.728493,0.517778,0.6,0.525,0.477731,0.6,0.696093,0.686722
rf,0.566667,0.518568,0.575,0.504444,0.647908,0.575,0.758631,0.753972,0.584567,0.583333,...,0.591667,0.743953,0.736306,0.559882,0.608333,0.547778,0.702412,0.608333,0.766832,0.760885
xgb,0.616667,0.62081,0.625,0.602222,0.632371,0.625,0.777409,0.776844,0.583607,0.583333,...,0.616667,0.7813,0.780875,0.639195,0.641667,0.622778,0.654708,0.641667,0.758644,0.778531


In [56]:
print('best methods')
best_methods = pd.DataFrame()
for s in metrics:
    models_scores = full_results_df.xs(s, axis='columns', level=1)
    max_models = models_scores.idxmax(axis='columns')
    max_models.name = s
    max_models['overall'] = models_scores.max(axis=0).idxmax(axis=0)
    best_methods = pd.concat([best_methods, max_models], axis=1)
best_methods

best methods


Unnamed: 0,f1_weighted,accuracy,balanced_accuracy,precision_weighted,recall_weighted,roc_auc_ovr_weighted,roc_auc_ovo_weighted
lg,cf_random,cf_genetic,cf_random,cf_random,cf_genetic,cf_random,cf_random
rf,cf_random,cf_genetic,smote,cf_genetic,cf_genetic,cf_genetic,smote
xgb,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_random,cf_random
overall,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_genetic,cf_random,cf_random


In [57]:
full_results_df.to_csv(rf'../log/experiment_multiclass_artifical.csv')

In [60]:
print('Methods and their scores')
methods_scores = {}
for s in metrics:
    models_scores = full_results_df.xs(s, axis='columns', level=1)
    methods_scores[s] = models_scores

Methods and their scores


In [64]:
float_format = "{:0.4f}".format

for metric, df in methods_scores.items():
    latex = df.to_latex(index=False, float_format=float_format)

    # Write the LaTeX table to a file
    with open(f'../graphs/artifical_{metric}_table.tex', 'w') as f:
        f.write(latex)

  latex = df.to_latex(index=False, float_format=float_format)
