In [1]:
import os
import numpy as np
import pickle
import pandas as pd 

from tqdm import tqdm

In [2]:
dir = [d for d in os.listdir('../experiments') if d!='logs']

In [3]:
methods = ['Upsampling_logistic', 'SMOTE_logistic', 'ADASYN_logistic', 'BORDELINE_logistic']

In [4]:
def generate_test(sample, thr_bas_default, thr_aug_default):
    results = []
    for d in dir:
        path_main = '../experiments/' + d
        for method in methods:
            pct_gain_list = []
            for k in tqdm(range(50)):
                base_metrics = pickle.load(open(path_main + f'/metrics_base_log_{sample}_{k}.pkl', 'rb'))
                method_metrics = pickle.load(open(path_main + f'/{method}/target_0.5_{sample}_{k}.pkl', 'rb'))
                ba_list = []
                for k in range(len(base_metrics)):
                    if thr_bas_default:
                        tn, fp, fn, tp = base_metrics[k]['matrix'][.5].ravel()
                    else:
                        thr = base_metrics[k]['thr']
                        tn, fp, fn, tp = base_metrics[k]['matrix'][thr].ravel()
                    ba = (tp/(tp+fn) + tn/(tn+fp))/2
                    ba_list.append(ba)
                ba_model = []
                for k in range(len(method_metrics)):
                    if thr_aug_default:
                        tn, fp, fn, tp = method_metrics[k]['matrix'][.5].ravel()
                    else:
                        thr = method_metrics[k]['thr']
                        tn, fp, fn, tp = method_metrics[k]['matrix'][thr].ravel()
                    ba = (tp/(tp+fn) + tn/(tn+fp))/2
                    ba_model.append(ba)
                pct_gain = (np.array(ba_model) - np.mean(ba_list))/(np.mean(ba_list)) * 100
                pct_gain_list.append(pct_gain)
            pct_gain_list = np.array(pct_gain_list)
            mu_j = pct_gain_list.mean(axis=1)
            sigma_j = pct_gain_list.std(axis=1)
            sigma = pct_gain_list.mean(axis=1).std()

            sample_pct_mean = []
            print('Começando bootstrap')
            for B in range(1000):
                sample_pct = []
                sample_mu = np.random.normal(0, sigma, 50)
                for j in range(50):
                    sample_mu_j = sample_mu[j]
                    sample_pct_j = np.random.normal(sample_mu_j, sigma_j[j], 40)
                    sample_pct.extend(sample_pct_j.tolist())
                sample_pct_mean.append(np.mean(sample_pct))
            p_value = np.mean(np.abs(sample_pct_mean) > np.abs(pct_gain_list.mean()))
            results.append([d, method, pct_gain_list.mean(), np.median(pct_gain_list), p_value])
    return results

In [5]:
list_df = []
sample = 500
results = generate_test(sample, True, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)

results = generate_test(sample, False, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])
df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for augmented and \n optimized for base model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


results = generate_test(sample, False, False)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'Optimized c for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


sample = 2000
results = generate_test(sample, True, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)

results = generate_test(sample, False, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])
df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for augmented and \n optimized for base model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


results = generate_test(sample, False, False)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(str), df['mean'].apply(lambda x:f'{x}*'))
df['facet_a'] = 'Optimized c for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)



100%|██████████| 50/50 [00:03<00:00, 16.33it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 16.88it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 16.44it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 17.13it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.20it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.55it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.89it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.64it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.86it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.06it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.42it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.80it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.67it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.67it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.37it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.90it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.10it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.84it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.89it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.21it/s]


Começando bootstrap


100%|██████████| 50/50 [00:09<00:00,  5.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.88it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.22it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.73it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.53it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.23it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.55it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.07it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.29it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.06it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.30it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.90it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.38it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.61it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.89it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.58it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.34it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.35it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.03it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.26it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.14it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  6.78it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.11it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.01it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.94it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.38it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.18it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.61it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.78it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 16.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.02it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.37it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.09it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.06it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.71it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.09it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.70it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.08it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.85it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.28it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.75it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 13.11it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.26it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.78it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.58it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.53it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.19it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.04it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 13.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.97it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.34it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.89it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.90it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.79it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.26it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.95it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.17it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.02it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.24it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.74it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  6.74it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.96it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:09<00:00,  5.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:11<00:00,  4.34it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  5.67it/s]


Começando bootstrap


100%|██████████| 50/50 [00:10<00:00,  4.58it/s]


Começando bootstrap


100%|██████████| 50/50 [00:09<00:00,  5.28it/s]


Começando bootstrap


100%|██████████| 50/50 [00:09<00:00,  5.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  6.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.23it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.77it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Começando bootstrap


100%|██████████| 50/50 [00:10<00:00,  4.97it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.19it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.09it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  6.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  7.02it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.37it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.22it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.10it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.12it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.18it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.94it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.26it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  6.87it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  7.97it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  7.03it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.67it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.85it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.35it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.19it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  5.79it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.98it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.75it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.30it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.10it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 13.04it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 12.72it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.43it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.10it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.73it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.37it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.46it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.80it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.59it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.50it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.67it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 20.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 13.12it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.19it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.98it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.86it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 15.35it/s]


Começando bootstrap


100%|██████████| 50/50 [00:03<00:00, 14.24it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.29it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.71it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 12.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.98it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.72it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.34it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.64it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.13it/s]


Começando bootstrap


100%|██████████| 50/50 [00:06<00:00,  8.22it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  8.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:07<00:00,  7.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.24it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  5.77it/s]


Começando bootstrap


100%|██████████| 50/50 [00:08<00:00,  6.17it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 11.74it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.56it/s]


Começando bootstrap


100%|██████████| 50/50 [00:04<00:00, 10.36it/s]


Começando bootstrap


100%|██████████| 50/50 [00:05<00:00,  9.62it/s]


Começando bootstrap


In [6]:
df_heatmap = pd.concat(list_df)

In [8]:
df_heatmap["method"] = df_heatmap.method.str.replace('_logistic', '')

In [9]:
df_heatmap.to_csv('heatmap_ba.csv')