In [23]:
import numpy as np
import pandas as pd
from IPython.core.display import display
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from multi_imbalance.datasets import load_datasets
from multi_imbalance.resampling.SOUP import SOUP
from multi_imbalance.resampling.MDO import MDO
from multi_imbalance.resampling.GlobalCS import GlobalCS

from imblearn.metrics import geometric_mean_score
from imblearn.over_sampling import SMOTE

np.random.seed(0)

datasets = load_datasets('/home/plutasnyy/git/multi-imbalance/data/')
results_g_mean = dict()
results_acc = dict()

for dataset_name, dataset_values in datasets.items():
    print(dataset_name)
    X, y = dataset_values.data, dataset_values.target
    X_train, X_test, y_train, y_test = train_test_split(X, y,shuffle=True, stratify=y, test_size=0.7, random_state=0)
    results_g_mean[dataset_name]=dict()
    results_acc[dataset_name]=dict()
    for resample in ['base','global','smote','soup','mdo']:
        error_flag = False
        clf_tree = DecisionTreeClassifier(random_state=0)
        
        if resample == 'base':
            X_train_resampled, y_train_resampled = X_train, y_train
        elif resample=='soup':
            soup = SOUP()
            X_train_resampled, y_train_resampled = soup.fit_transform(np.copy(X_train), np.copy(y_train))
        elif resample=='global':
            global_cs = GlobalCS()
            X_train_resampled, y_train_resampled = global_cs.fit_transform(np.copy(X_train), np.copy(y_train))
        elif resample=='smote':
            try:
                smote = SMOTE()
                X_train_resampled, y_train_resampled = smote.fit_sample(np.copy(X_train), np.copy(y_train))
            except Exception as e:
                error_flag = True
                print(resample, dataset_name, e)
                X_train_resampled, y_train_resampled = X_train, y_train
        elif resample=='mdo':
            mdo = MDO(k=9, k1_frac=0, seed=0)
            X_train_resampled, y_train_resampled = mdo.fit_transform(np.copy(X_train), np.copy(y_train))
            
        clf_tree.fit(X_train_resampled, y_train_resampled)
        y_pred = clf_tree.predict(X_test)
        g_mean = float(str(round(geometric_mean_score(y_test, y_pred, correction=0.001),3)))
        acc = float(str(round(accuracy_score(y_test, y_pred),3)))
        result_g_mean = None if error_flag else g_mean
        result_acc = None if error_flag else acc
        
        results_g_mean[dataset_name][resample]=result_g_mean
        results_acc[dataset_name][resample]=result_acc

display("G-MEAN")
df = pd.DataFrame(results_g_mean).T
display(df)

display("ACC")
df2 = pd.DataFrame(results_acc).T
display(df2)

df.fillna(df.median(), inplace=True)
display(df.mean())

balance_scale
cleveland
smote cleveland Expected n_neighbors <= n_samples,  but n_samples = 4, n_neighbors = 6
cmc
dermatology
ecoli
smote ecoli Expected n_neighbors <= n_samples,  but n_samples = 1, n_neighbors = 6
glass
smote glass Expected n_neighbors <= n_samples,  but n_samples = 5, n_neighbors = 6
hayes_roth
new_thyroid
winequailty_red
smote winequailty_red Expected n_neighbors <= n_samples,  but n_samples = 3, n_neighbors = 6
yeast
smote yeast Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 6


'G-MEAN'

Unnamed: 0,base,global,smote,soup,mdo
balance_scale,0.424,0.343,0.483,0.519,0.415
cleveland,0.101,0.09,,0.093,0.076
cmc,0.427,0.433,0.445,0.479,0.441
dermatology,0.946,0.977,0.963,0.947,0.977
ecoli,0.123,0.133,,0.122,0.144
glass,0.585,0.599,,0.649,0.713
hayes_roth,0.548,0.552,0.622,0.585,0.606
new_thyroid,0.84,0.84,0.84,0.904,0.932
winequailty_red,0.26,0.083,,0.281,0.111
yeast,0.108,0.197,,0.193,0.283


'ACC'

Unnamed: 0,base,global,smote,soup,mdo
balance_scale,0.751,0.767,0.751,0.66,0.728
cleveland,0.516,0.479,,0.455,0.493
cmc,0.467,0.478,0.472,0.483,0.48
dermatology,0.961,0.977,0.961,0.926,0.977
ecoli,0.784,0.788,,0.657,0.767
glass,0.627,0.687,,0.573,0.627
hayes_roth,0.548,0.57,0.624,0.591,0.613
new_thyroid,0.901,0.901,0.901,0.947,0.934
winequailty_red,0.517,0.529,,0.482,0.453
yeast,0.452,0.472,,0.413,0.483


base      0.4362
global    0.4247
smote     0.6463
soup      0.4772
mdo       0.4698
dtype: float64