In [1]:
import pandas as pd
import common_datasets.binary_classification as bin_clas
import common_datasets.multiclass_classification as mult_clas
import common_datasets.regression as regression

In [2]:
bin_clas.get_data_loaders('all')

[<function common_datasets.binary_classification._binary_classification.load_abalone19()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone9_18()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone_17_vs_7_8_9_10()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone_19_vs_10_11_12_13()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone_20_vs_8_9_10()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone_21_vs_8()>,
 <function common_datasets.binary_classification._binary_classification.load_abalone_3_vs_11()>,
 <function common_datasets.binary_classification._binary_classification.load_ada()>,
 <function common_datasets.binary_classification._binary_classification.load_appendicitis()>,
 <function common_datasets.binary_classification._binary_classification.load_australian()>,
 <function common_datasets.binary_clas

In [3]:
data_loaders = [val for val in dir(bin_clas) if val.startswith('load')]

results = []

for data_loader in data_loaders:
    data_loader = getattr(bin_clas, data_loader)
    dataset = data_loader()
    
    del dataset['data']
    del dataset['target']
    del dataset['feature_names']
    del dataset['feature_types']
    del dataset['target_label']
    del dataset['citation']
    
    dataset['data_loader'] = data_loader.__name__
    
    results.append(dataset)

pd.DataFrame(results).to_dict(orient='records')

[{'name': 'abalone19',
  'phenotype': 'abalone',
  'citation_key': 'keel',
  'n_col': 9,
  'n_col_orig': 8,
  'n_col_non_unique_orig': 8,
  'n': 4174,
  'DESCR': 'abalone19',
  'n_minority': 32,
  'imbalance_ratio': 129.4375,
  'data_loader': 'load_abalone19'},
 {'name': 'abalone9_18',
  'phenotype': 'abalone',
  'citation_key': 'keel',
  'n_col': 9,
  'n_col_orig': 8,
  'n_col_non_unique_orig': 8,
  'n': 731,
  'DESCR': 'abalone9_18',
  'n_minority': 42,
  'imbalance_ratio': 16.404761904761905,
  'data_loader': 'load_abalone9_18'},
 {'name': 'abalone-17_vs_7-8-9-10',
  'phenotype': 'abalone',
  'citation_key': 'keel',
  'n_col': 9,
  'n_col_orig': 8,
  'n_col_non_unique_orig': 8,
  'n': 2338,
  'DESCR': 'abalone-17_vs_7-8-9-10',
  'n_minority': 58,
  'imbalance_ratio': 39.310344827586206,
  'data_loader': 'load_abalone_17_vs_7_8_9_10'},
 {'name': 'abalone-19_vs_10-11-12-13',
  'phenotype': 'abalone',
  'citation_key': 'keel',
  'n_col': 9,
  'n_col_orig': 8,
  'n_col_non_unique_orig':

In [4]:
data_loaders = [val for val in dir(mult_clas) if val.startswith('load')]

results = []

for data_loader in data_loaders:
    data_loader = getattr(mult_clas, data_loader)
    dataset = data_loader()
    
    del dataset['data']
    del dataset['target']
    del dataset['feature_names']
    del dataset['feature_types']
    del dataset['target_label']
    del dataset['citation']
    
    dataset['data_loader'] = data_loader.__name__
    
    results.append(dataset)

pd.DataFrame(results).to_dict(orient='records')

[{'name': 'abalone',
  'phenotype': 'abalone',
  'citation_key': 'krnn',
  'n_col': 7,
  'n_col_orig': 7,
  'n_col_non_unique_orig': 7,
  'n': 4177,
  'DESCR': 'abalone',
  'n_classes': 28,
  'data_loader': 'load_abalone'},
 {'name': 'automobile',
  'phenotype': 'automobile',
  'citation_key': 'keel',
  'n_col': 54,
  'n_col_orig': 25,
  'n_col_non_unique_orig': 24,
  'n': 159,
  'DESCR': 'automobile',
  'n_classes': 6,
  'data_loader': 'load_automobile'},
 {'name': 'balance',
  'phenotype': 'balance',
  'citation_key': 'keel',
  'n_col': 4,
  'n_col_orig': 4,
  'n_col_non_unique_orig': 4,
  'n': 625,
  'DESCR': 'balance',
  'n_classes': 3,
  'data_loader': 'load_balance'},
 {'name': 'car',
  'phenotype': 'car',
  'citation_key': 'keel',
  'n_col': 15,
  'n_col_orig': 6,
  'n_col_non_unique_orig': 6,
  'n': 1728,
  'DESCR': 'car',
  'n_classes': 4,
  'data_loader': 'load_car'},
 {'name': 'cleveland',
  'phenotype': 'cleveland',
  'citation_key': 'keel',
  'n_col': 13,
  'n_col_orig': 1

In [5]:
mult_clas.get_data_loaders('all')

[<function common_datasets.multiclass_classification._multiclass_classification.load_abalone()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_automobile()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_balance()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_car()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_cleveland()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_contraceptive()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_dermatology()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_ecoli()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_flare()>,
 <function common_datasets.multiclass_classification._multiclass_classification.load_glass()>,
 <function common_dataset

In [6]:
data_loaders = [val for val in dir(regression) if val.startswith('load')]

results = []

for data_loader in data_loaders:
    data_loader = getattr(regression, data_loader)
    dataset = data_loader()
    
    del dataset['data']
    del dataset['target']
    del dataset['feature_names']
    del dataset['feature_types']
    del dataset['target_label']
    del dataset['citation']
    
    dataset['data_loader'] = data_loader.__name__
    
    results.append(dataset)

pd.DataFrame(results).to_dict(orient='records')

[{'name': 'airfoil',
  'phenotype': 'airfoil',
  'citation_key': 'krnn',
  'n_col': 5,
  'n_col_orig': 5,
  'n_col_non_unique_orig': 5,
  'n': 1503,
  'DESCR': 'airfoil',
  'data_loader': 'load_airfoil'},
 {'name': 'autoMPG6',
  'phenotype': 'autoMPG',
  'citation_key': 'keel',
  'n_col': 5,
  'n_col_orig': 5,
  'n_col_non_unique_orig': 5,
  'n': 392,
  'DESCR': 'autoMPG6',
  'data_loader': 'load_autoMPG6'},
 {'name': 'baseball',
  'phenotype': 'baseball',
  'citation_key': 'keel',
  'n_col': 16,
  'n_col_orig': 16,
  'n_col_non_unique_orig': 16,
  'n': 337,
  'DESCR': 'baseball',
  'data_loader': 'load_baseball'},
 {'name': 'ccpp',
  'phenotype': 'ccpp',
  'citation_key': 'uci',
  'n_col': 4,
  'n_col_orig': 4,
  'n_col_non_unique_orig': 4,
  'n': 9568,
  'DESCR': 'ccpp',
  'data_loader': 'load_ccpp'},
 {'name': 'communities',
  'phenotype': 'communities',
  'citation_key': 'krnn',
  'n_col': 2262,
  'n_col_orig': 126,
  'n_col_non_unique_orig': 126,
  'n': 1994,
  'DESCR': 'communiti

In [7]:
regression.get_data_loaders('all')

[<function common_datasets.regression._regression.load_airfoil()>,
 <function common_datasets.regression._regression.load_autoMPG6()>,
 <function common_datasets.regression._regression.load_baseball()>,
 <function common_datasets.regression._regression.load_ccpp()>,
 <function common_datasets.regression._regression.load_communities()>,
 <function common_datasets.regression._regression.load_compactiv()>,
 <function common_datasets.regression._regression.load_cpu_performance()>,
 <function common_datasets.regression._regression.load_diabetes()>,
 <function common_datasets.regression._regression.load_forestfires()>,
 <function common_datasets.regression._regression.load_laser()>,
 <function common_datasets.regression._regression.load_mortgage()>,
 <function common_datasets.regression._regression.load_puma32h()>,
 <function common_datasets.regression._regression.load_real_estate_valuation()>,
 <function common_datasets.regression._regression.load_residential_building()>,
 <function common_