In [21]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from multi_imbalance.datasets import load_datasets
from multi_imbalance.resampling.GlobalCS import GlobalCS
from multi_imbalance.resampling.MDO import MDO
from multi_imbalance.resampling.SOUP import SOUP
from imblearn.metrics import geometric_mean_score

np.random.seed(0)
import os
print(os.getcwd())

datasets = load_datasets('../data')
results = dict()

for dataset_name, dataset_values in datasets.items():
    results[dataset_name]=dict()
    for resample in ['base','global','smote','soup','mdo']:
        error_flag = False
        clf_tree = DecisionTreeClassifier(random_state=0)
        
        if resample == 'base':
            X, y = dataset_values.data, dataset_values.target
        elif resample=='soup':
            soup = SOUP()
            X, y = soup.fit_transform(np.copy(dataset_values.data), np.copy(dataset_values.target))
        elif resample=='global':
            global_cs = GlobalCS()
            X, y = global_cs.fit_transform(np.copy(dataset_values.data), np.copy(dataset_values.target))
        elif resample=='smote':
            try:
                smote = SMOTE()
                X, y = smote.fit_sample(np.copy(dataset_values.data), np.copy(dataset_values.target))
            except Exception as e:
                error_flag = True
                print(resample, dataset_name, e)
                X, y = dataset_values.data, dataset_values.target
        elif resample=='mdo':
            try:
                mdo = MDO()
                X,y = mdo.fit_transform(np.copy(dataset_values.data), np.copy(dataset_values.target))
            except Exception as e:
                error_flag = True
                print(resample, dataset_name, e)
                X, y = dataset_values.data, dataset_values.target

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
        clf_tree.fit(X_train, y_train)
        y_pred = clf_tree.predict(X_test)
        score = str(round(geometric_mean_score(y_test, y_pred),3))
        result = '*' + score if error_flag else score
        results[dataset_name][resample]=result

df = pd.DataFrame(results).T
df



/home/plutasnyy/git/multi-imbalance/benchmarks
mdo cleveland Input must be 1- or 2-d.
smote ecoli Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 6
smote yeast Expected n_neighbors <= n_samples,  but n_samples = 5, n_neighbors = 6


  explained_variance_ = (S ** 2) / (n_samples - 1)
  variables_variance = np.diag(np.cov(uncorrelated_samples, rowvar=False))
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  "recall")
  "recall")
  "recall")


Unnamed: 0,base,global,smote,soup,mdo
balance_scale,0.0,0.87,0.823,0.838,0.0
cleveland,0.0,0.936,0.673,0.744,*0.0
cmc,0.413,0.659,0.567,0.648,0.524
dermatology,0.831,0.986,0.985,0.952,0.962
ecoli,0.0,0.99,*0.0,0.945,0.0
glass,0.502,0.909,0.805,0.868,0.0
hayes_roth,0.935,0.776,0.725,0.874,0.741
new_thyroid,0.988,0.964,0.929,1.0,0.923
winequailty_red,0.0,0.871,0.759,0.794,0.0
yeast,0.0,0.861,*0.0,0.86,0.0
