In [2]:
import pandas as pd
from sklearn import datasets

from smote_variants.evaluation import evaluate_oversamplers, model_selection

In [3]:
tmp = pd.DataFrame({'a': [{'a': 1, 'b': 2}, {'a': 2, 'b': 3}]})

In [4]:
tmp['a'].apply(pd.Series)

Unnamed: 0,a,b
0,1,2
1,2,3


In [5]:
import numpy as np

np.isfinite(np.nan)

False

In [6]:
import inspect

In [7]:
list(inspect.signature(pd.DataFrame).parameters.keys())

['data', 'index', 'columns', 'dtype', 'copy']

In [8]:
import smote_variants
list(inspect.signature(smote_variants.SMOTE).parameters.keys())

['proportion',
 'n_neighbors',
 'nn_params',
 'ss_params',
 'n_jobs',
 'random_state',
 '_kwargs']

In [9]:
dataset_0 = datasets.load_breast_cancer()
dataset_0['name'] = 'breast_cancer'
dataset_1 = datasets.load_iris()
dataset_1['name'] = 'iris'
dataset_1['target'][dataset_1['target'] == 2] = 1

In [10]:
results = evaluate_oversamplers(datasets=[dataset_0, dataset_1],
                        oversamplers=[('smote_variants', 'SMOTE', {'n_jobs': 1, 'random_state': 5}),
                                        ('smote_variants', 'SMOTE', {'n_jobs': 1, 'n_neighbors': 9, 'random_state': 5})],
                        classifiers=[('sklearn.tree', 'DecisionTreeClassifier', {'random_state': 5}),
                                        ('sklearn.neighbors', 'KNeighborsClassifier', {'n_jobs': 1})],
                        cache_path='/home/gykovacs/smote_cache/',
                        #cache_path=None,
                        validator_params={'n_repeats': 2, 'n_splits': 5, 'random_state': 5},
                        scaler=('sklearn.preprocessing', 'StandardScaler', {}),
                        n_jobs=2,
                        parse_results=True)

2022-08-26 17:53:26.461779: processing dataset: breast_cancer


2022-08-26 17:53:26,591:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:26,595:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:26,596:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:26,597:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-08-26 17:53:26,599:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:26,601:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-08-26 17:53:26,839:INFO

2022-08-26 17:53:32.935343: processing dataset: iris


2022-08-26 17:53:32,978:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:32,982:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:32,984:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-08-26 17:53:32,985:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:32,987:INFO:SMOTE: simplex sampling with n_dim 2
2022-08-26 17:53:32,988:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:32,989:INFO:NearestNeighborsWithMet

In [16]:
all_scores = ['acc', 'sens', 'spec', 'ppv', 'npv', 'fpr', 'fdr',
                'fnr', 'bacc', 'gacc', 'f1', 'mcc', 'l', 'ltp', 'lfp', 'lfn',
                'ltn', 'lp', 'ln', 'uc', 'informedness', 'markedness', 'p_top20',
                'brier', 'log_loss', 'auc']

In [17]:
def _pivot_best_scores(pdf):
    """
    Pivot the best parameters for each oversampler and classifier

    Args:
        pdf (pd.DataFrame): the results averaged over folds

    Returns:
        pd.DataFrame: the pivoted results
    """
    results_dict_classifier = {}
    results_dict_oversampler = {}
    score_means = {}
    score_stds = {}
    for score in all_scores:
        tmp = pdf[pdf[score + '_mean'] == pdf[score + '_mean'].max()]

        results_dict_classifier[f'{score}_classifier_params'] = [None]
        results_dict_classifier[f'{score}_oversampler_params'] = [None]
        score_means[f'{score}_mean'] = [None]
        score_stds[f'{score}_std'] = [None]

        if len(tmp) > 0:
            results_dict_classifier[f'{score}_classifier_params'] =\
                                     [tmp.iloc[0]['classifier_params']]
            results_dict_classifier[f'{score}_oversampler_params'] =\
                                     [tmp.iloc[0]['oversampler_params']]
            score_means[f'{score}_mean'] = [tmp.iloc[0][f'{score}_mean']]
            score_stds[f'{score}_std'] = [tmp.iloc[0][f'{score}_std']]

    return pd.DataFrame({**results_dict_classifier,
                         **results_dict_oversampler,
                         **score_means,
                         **score_stds})


In [20]:
results.groupby(['dataset', 'oversampler', 'oversampler_module', 'classifier', 'classifier_module'])\
    .apply(_pivot_best_scores)\
    .reset_index(drop=False)

Unnamed: 0,dataset,oversampler,oversampler_module,classifier,classifier_module,level_5,acc_classifier_params,acc_oversampler_params,sens_classifier_params,sens_oversampler_params,...,ltn_std,lp_std,ln_std,uc_std,informedness_std,markedness_std,p_top20_std,brier_std,log_loss_std,auc_std
0,breast_cancer,SMOTE,smote_variants,DecisionTreeClassifier,sklearn.tree,0,{'random_state': 5},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",...,10.452643,0.293488,0.153353,0.013415,0.052526,0.040359,0.072506,0.034362,1.186825,0.026263
1,breast_cancer,SMOTE,smote_variants,KNeighborsClassifier,sklearn.neighbors,0,{'n_jobs': 1},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",...,7.624257,0.293488,0.153353,0.012255,0.05679,0.045269,0.072506,0.01418,0.465845,0.01978
2,iris,SMOTE,smote_variants,DecisionTreeClassifier,sklearn.tree,0,{'random_state': 5},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
3,iris,SMOTE,smote_variants,KNeighborsClassifier,sklearn.neighbors,0,{'n_jobs': 1},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
sampler, classifier = model_selection(dataset=dataset_0,
                        oversamplers=[('smote_variants', 'SMOTE', {'n_jobs': 1, 'random_state': 5}),
                                        ('smote_variants', 'SMOTE', {'n_jobs': 1, 'n_neighbors': 9, 'random_state': 5})],
                        classifiers=[('sklearn.tree', 'DecisionTreeClassifier', {'random_state': 5}),
                                        ('sklearn.neighbors', 'KNeighborsClassifier', {'n_jobs': 1})],
                        #cache_path='/home/gykovacs/smote_cache/',
                        cache_path=None,
                        validator_params={'n_repeats': 2, 'n_splits': 5, 'random_state': 5},
                        scaler=('sklearn.preprocessing', 'StandardScaler', {}),
                        n_jobs=2,
                        score='auc')

2022-08-26 17:53:33.723297: processing dataset: breast_cancer


2022-08-26 17:53:33,730:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:33,730:INFO:SMOTE: Running sampling via ('SMOTE', "{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'ss_params': {'n_dim': 2, 'simplex_sampling': 'random', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}")
2022-08-26 17:53:33,730:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:33,730:INFO:NearestNeighborsWithMetricTensor: NN fitting with metric minkowski
2022-08-26 17:53:33,730:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-08-26 17:53:33,730:INFO:NearestNeighborsWithMetricTensor: kneighbors query minkowski
2022-08-26 17:53:33,731:INFO

KeyError: "['level_4'] not found in axis"

In [None]:
sampler

<smote_variants.oversampling._smote.SMOTE at 0x7f29855d0a30>

In [None]:
[0, 1, 2] == [0, 1, 2]

True

In [None]:
classifier

In [None]:
results

Unnamed: 0,database,oversampler,classifier,oversampler_params_key,classifier_params_key,acc_mean,sens_mean,spec_mean,ppv_mean,npv_mean,...,ltn_std,lp_std,ln_std,uc_std,informedness_std,markedness_std,p_top20_std,brier_std,log_loss_std,auc_std
0,breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},0.934071,0.943995,0.917386,0.952162,0.908941,...,11.019668,0.293488,0.153353,0.013415,0.052526,0.040359,0.072506,0.019612,0.677388,0.026263
1,breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'random_state': 5},0.930539,0.942684,0.910299,0.947659,0.905863,...,10.452643,0.293488,0.153353,0.01971,0.077123,0.073411,0.072506,0.034362,1.186825,0.038561
2,breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},0.925299,0.936972,0.905869,0.944265,0.895145,...,7.428414,0.293488,0.153353,0.012255,0.05679,0.045269,0.072506,0.01418,0.465845,0.017591
3,breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'n_jobs': 1},0.923537,0.935563,0.903488,0.942887,0.893544,...,7.624257,0.293488,0.153353,0.010445,0.052173,0.043289,0.072506,0.015354,0.514612,0.01978
4,iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
5,iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
6,iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
7,iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
all_scores = ['acc', 'sens', 'spec', 'ppv', 'npv', 'fpr', 'fdr',
                'fnr', 'bacc', 'gacc', 'f1', 'mcc', 'l', 'ltp', 'lfp', 'lfn',
                'ltn', 'lp', 'ln', 'uc', 'informedness', 'markedness', 'p_top20',
                'brier', 'log_loss', 'auc']

def aggregate_folds(pdf):
    for score in all_scores:
        pdf[score] = pdf['scores'].apply(lambda x: x[score])
    pdf['database'] = pdf['fold_descriptor'].apply(lambda x: x['name'])
    pdf['fold_idx'] = pdf['fold_descriptor'].apply(lambda x: x['fold_idx'])
    pdf['repeat_idx'] = pdf['fold_descriptor'].apply(lambda x: x['repeat_idx'])
    pdf['oversampler_params'] = pdf['oversampler']
    pdf['oversampler'] = pdf['oversampler_params'].apply(lambda x: x['class_name'])
    pdf['oversampler_params_key'] = pdf['oversampler_params'].apply(str)
    pdf['classifier_params_key'] = pdf['classifier_params'].apply(str)
    return pdf

In [None]:
pdf_prep = aggregate_folds(pdf)

In [None]:
import numpy as np
pdf_avg = pdf_prep.groupby(['database', 'oversampler', 
                            'classifier', 'oversampler_params_key', 
                            'classifier_params_key']).agg(**{score + '_mean': ('score', 'mean') for score in all_scores})

In [None]:
import numpy as np
pdf_avg = pdf_prep.groupby(['database', 'oversampler', 
                            'classifier', 'oversampler_params_key', 
                            'classifier_params_key']).agg(**{**{score + '_mean': (score, 'mean') for score in all_scores},
                                                           **{score + '_std': (score, 'std') for score in all_scores}})

In [None]:
pdf_avg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,acc_mean,sens_mean,spec_mean,ppv_mean,npv_mean,fpr_mean,fdr_mean,fnr_mean,bacc_mean,gacc_mean,...,ltn_std,lp_std,ln_std,uc_std,informedness_std,markedness_std,p_top20_std,brier_std,log_loss_std,auc_std
database,oversampler,classifier,oversampler_params_key,classifier_params_key,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'random_state': 5},0.934071,0.943995,0.917386,0.952162,0.908941,0.082614,0.047838,0.056005,0.930691,0.929819,...,11.019668,0.293488,0.153353,0.013415,0.052526,0.040359,0.072506,0.019612,0.677388,0.026263
breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'random_state': 5},0.930539,0.942684,0.910299,0.947659,0.905863,0.089701,0.052341,0.057316,0.926491,0.925686,...,10.452643,0.293488,0.153353,0.01971,0.077123,0.073411,0.072506,0.034362,1.186825,0.038561
breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'n_jobs': 1},0.925299,0.936972,0.905869,0.944265,0.895145,0.094131,0.055735,0.063028,0.921421,0.920963,...,7.428414,0.293488,0.153353,0.012255,0.05679,0.045269,0.072506,0.01418,0.465845,0.017591
breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'n_jobs': 1},0.923537,0.935563,0.903488,0.942887,0.893544,0.096512,0.057113,0.064437,0.919526,0.919002,...,7.624257,0.293488,0.153353,0.010445,0.052173,0.043289,0.072506,0.015354,0.514612,0.01978
iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_params': {}, 'n_jobs': 1, 'sampling_params': {'n_dim': 2, 'simplex_sampling': 'uniform', 'within_simplex_sampling': 'random', 'gaussian_component': {}}, 'random_state': 5, 'class_name': 'SMOTE'}",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
def pivot_best_scores(pdf):
    base_params = ['database', 'oversampler', 'classifier']
    results_dict_classifier = {}
    results_dict_oversampler = {}
    score_means = {}
    score_stds = {}
    for score in all_scores:
        tmp = pdf[pdf[score + '_mean'] == pdf[score + '_mean'].max()]
        if len(tmp) > 0:
            results_dict_classifier[f'{score}_classifier_params'] = [tmp.iloc[0]['classifier_params_key']]
            results_dict_classifier[f'{score}_oversampler_params'] = [tmp.iloc[0]['oversampler_params_key']]
            score_means[f'{score}_mean'] = [tmp.iloc[0][f'{score}_mean']]
            score_stds[f'{score}_std'] = [tmp.iloc[0][f'{score}_std']]
        else:
            results_dict_classifier[f'{score}_classifier_params'] = [None]
            results_dict_classifier[f'{score}_oversampler_params'] = [None]
            score_means[f'{score}_mean'] = [None]
            score_stds[f'{score}_std'] = [None]

    return pd.DataFrame({**results_dict_classifier,
                         **results_dict_oversampler,
                         **score_means,
                         **score_stds})

In [None]:
results\
    .reset_index(drop=False)\
    .groupby(['database', 'oversampler', 'classifier'])\
    .apply(pivot_best_scores)\
    .reset_index(drop=False)\
    [['database', 'oversampler', 'classifier', 'auc_mean', 'auc_std']]

Unnamed: 0,database,oversampler,classifier,auc_mean,auc_std
0,breast_cancer,SMOTE,DecisionTreeClassifier,0.930691,0.026263
1,breast_cancer,SMOTE,KNeighborsClassifier,0.960231,0.01978
2,iris,SMOTE,DecisionTreeClassifier,1.0,0.0
3,iris,SMOTE,KNeighborsClassifier,1.0,0.0


In [None]:
from sklearn.neighbors._classification import KNeighborsClassifier

In [None]:
KNeighborsClassifier.__module__

'sklearn.neighbors._classification'

In [None]:
pdf_avg.reset_index(drop=False)

Unnamed: 0,database,oversampler,classifier,oversampler_params_key,classifier_params_key,acc,sens,spec,ppv,npv,...,ltn,lp,ln,uc,informedness,markedness,p_top20,brier,log_loss,auc
0,breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},0.934071,0.943995,0.917386,0.952162,0.908941,...,-149.545905,-33.281311,-41.858889,0.102212,0.861381,0.861103,0.368182,0.065929,2.277139,0.930691
1,breast_cancer,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'random_state': 5},0.930539,0.942684,0.910299,0.947659,0.905863,...,-148.474669,-33.281311,-41.858889,0.10087,0.852983,0.853522,0.368182,0.069461,2.399134,0.926491
2,breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},0.925299,0.936972,0.905869,0.944265,0.895145,...,-148.126537,-33.281311,-41.858889,0.09595,0.842841,0.83941,0.368182,0.059977,0.9545901,0.958374
3,breast_cancer,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'n_jobs': 1},0.923537,0.935563,0.903488,0.942887,0.893544,...,-147.83502,-33.281311,-41.858889,0.094792,0.839052,0.836431,0.368182,0.059834,0.897479,0.960231
4,iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,...,-23.025851,-8.109302,-10.986123,,1.0,1.0,0.0,0.0,9.992007e-16,1.0
5,iris,SMOTE,DecisionTreeClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'random_state': 5},1.0,1.0,1.0,1.0,1.0,...,-23.025851,-8.109302,-10.986123,,1.0,1.0,0.0,0.0,9.992007e-16,1.0
6,iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 5, 'nn_para...",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,...,-23.025851,-8.109302,-10.986123,,1.0,1.0,0.0,0.0,9.992007e-16,1.0
7,iris,SMOTE,KNeighborsClassifier,"{'proportion': 1.0, 'n_neighbors': 9, 'nn_para...",{'n_jobs': 1},1.0,1.0,1.0,1.0,1.0,...,-23.025851,-8.109302,-10.986123,,1.0,1.0,0.0,0.0,9.992007e-16,1.0


In [None]:
pdf['score'] = pdf['scores'].apply(lambda x: x['auc'])

In [None]:
pdf[pdf['score'] == pdf['score'].max()].iloc[0]

fold_descriptor           {'db_n': 150, 'db_n_attr': 4, 'imbalance_ratio...
oversampler                                                           SMOTE
runtime                                                            0.001434
y_pred                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
y_test                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, ...
scores                    {'tp': 20, 'tn': 10, 'fp': 0, 'fn': 0, 'p': 20...
classifier                                           DecisionTreeClassifier
classifier_params                                       {'random_state': 5}
acc                                                                     1.0
sens                                                                    1.0
spec                                                                    1.0
ppv                                                                     1.0
npv                                                                     1.0
fpr         

In [None]:
from smote_variants import MulticlassOversampling

In [None]:
mo = MulticlassOversampling('SUNDO')

In [None]:
from smote_variants import SUNDO

In [None]:
import inspect

In [None]:
'proportion' in list(inspect.signature(SUNDO).parameters.keys())

False