In [1]:
import os
import numpy as np
import pickle
import pandas as pd 

from tqdm import tqdm

In [2]:
dir = [d for d in os.listdir('../experiments') if d!='logs']

In [3]:
methods = ['Upsampling', 'SMOTE', 'ADASYN', 'BORDELINE']

In [4]:
def generate_test(sample, thr_bas_default, thr_aug_default):
    results = []
    for d in dir:
        path_main = '../experiments/' + d
        for method in methods:
            pct_gain_list = []
            for k in tqdm(range(50)):
                base_metrics = pickle.load(open(path_main + f'/metrics_base_{sample}_{k}.pkl', 'rb'))
                method_metrics = pickle.load(open(path_main + f'/{method}/target_0.5_{sample}_{k}.pkl', 'rb'))
                sen_lis = []
                for k in range(len(base_metrics)):
                    if thr_bas_default:
                        tn, fp, fn, tp = base_metrics[k]['matrix'][.5].ravel()
                    else:
                        thr = base_metrics[k]['thr']
                        tn, fp, fn, tp = base_metrics[k]['matrix'][thr].ravel()
                    sen = (tp)/(tp+fn)
                    sen_lis.append(sen)
                sen_model = []
                for k in range(len(method_metrics)):
                    if thr_aug_default:
                        tn, fp, fn, tp = method_metrics[k]['matrix'][.5].ravel()
                    else:
                        thr = method_metrics[k]['thr']
                        tn, fp, fn, tp = method_metrics[k]['matrix'][thr].ravel()
                    sen = (tp)/(tp+fn)
                    sen_model.append(sen)
                pct_gain = (np.array(sen_model) - np.mean(sen_lis))/(np.mean(sen_lis)) * 100
                pct_gain_list.append(pct_gain)
            pct_gain_list = np.array(pct_gain_list)
            mu_j = pct_gain_list.mean(axis=1)
            sigma_j = pct_gain_list.std(axis=1)
            sigma = pct_gain_list.mean(axis=1).std()

            sample_pct_mean = []
            print('Começando bootstrap')
            for B in range(1000):
                sample_pct = []
                sample_mu = np.random.normal(0, sigma, 50)
                for j in range(50):
                    sample_mu_j = sample_mu[j]
                    sample_pct_j = np.random.normal(sample_mu_j, sigma_j[j], 40)
                    sample_pct.extend(sample_pct_j.tolist())
                sample_pct_mean.append(np.mean(sample_pct))
            p_value = np.mean(np.abs(sample_pct_mean) > np.abs(pct_gain_list.mean()))
            results.append([d, method, pct_gain_list.mean(), np.median(pct_gain_list), p_value])
    return results

In [5]:
list_df = []
sample = 500
results = generate_test(sample, True, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)

results = generate_test(sample, False, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])
df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for augmented and \n optimized for base model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


results = generate_test(sample, False, False)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'Optimized c for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


sample = 2000
results = generate_test(sample, True, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)

results = generate_test(sample, False, True)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])
df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'c=0.5 for augmented and \n optimized for base model'
df['facet_b'] = f'n={sample}'
list_df.append(df)


results = generate_test(sample, False, False)
df = pd.DataFrame(results, columns=['dataset', 'method', 'mean', 'median', 'p_value'])

df['mean'] = df['mean'].round()
df.loc[df['mean']==0, 'mean'] = 0
df['mean_p'] = np.where(df['p_value'] < 0.01, df['mean'], np.nan)
df['mean_text'] = np.where(df['p_value'] < 0.01, df['mean'].astype(int).astype(str), df['mean'].astype(int).apply(lambda x:f'{x}*'))
df['facet_a'] = 'Optimized c for both model'
df['facet_b'] = f'n={sample}'
list_df.append(df)



  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 50/50 [00:01<00:00, 47.46it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 72.68it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.47it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 74.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 41.96it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 40.21it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 40.98it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 42.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 43.03it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 41.47it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 41.23it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 37.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 49.88it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 48.23it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 50.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 50.44it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 46.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 46.10it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 43.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 43.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.29it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 19.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 18.92it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 53.57it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 47.58it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 48.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 44.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 51.50it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 53.32it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 50.50it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 50.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 65.06it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 62.91it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 37.91it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 35.99it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 35.43it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 33.33it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 34.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 37.02it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 35.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 35.23it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 46.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 60.86it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 85.90it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 90.91it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 80.18it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 75.71it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.64it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 72.95it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 33.80it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 31.44it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 32.35it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 32.92it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 100.74it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 87.00it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 95.63it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 85.31it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 106.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 93.66it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 90.66it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 79.03it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 124.91it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 118.22it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 115.93it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 131.59it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 75.37it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 68.12it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.20it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 65.68it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.75it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 67.71it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 76.97it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.71it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 97.06it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 89.27it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 104.75it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 94.23it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 85.48it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.51it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.46it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.59it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 34.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 32.36it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 33.21it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 31.69it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 100.20it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 79.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 90.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 80.97it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 96.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 76.53it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 87.65it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 101.39it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 91.65it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 86.04it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 91.46it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 91.96it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 67.48it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 62.65it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.56it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 57.57it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 61.04it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.27it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.41it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 68.70it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.94it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 68.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 81.96it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 64.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 60.65it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 58.38it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 22.94it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 20.68it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 21.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 20.73it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 85.08it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 86.28it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 87.88it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 102.08it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 81.94it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 92.57it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 73.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 93.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 88.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 90.59it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 90.36it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 65.86it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 64.26it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 65.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.25it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 67.27it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.30it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.12it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.54it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 70.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 77.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 77.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 73.15it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 79.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 60.60it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 56.07it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 65.21it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 21.53it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 21.76it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 22.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 22.04it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 82.55it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 82.49it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.05it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 87.49it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 71.99it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 83.19it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.07it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 84.99it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 92.34it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 98.73it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 80.89it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 98.22it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 62.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 58.73it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 62.17it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 58.99it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 63.66it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 59.83it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 66.03it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 58.93it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 61.53it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 60.62it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 69.86it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 59.66it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 72.68it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 57.29it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 59.50it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 52.90it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 23.91it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 24.77it/s]


Começando bootstrap


100%|██████████| 50/50 [00:02<00:00, 24.82it/s]


Começando bootstrap


100%|██████████| 50/50 [00:01<00:00, 25.16it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 103.27it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 92.43it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 76.45it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 105.77it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 104.92it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 104.99it/s]


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 97.65it/s] 


Começando bootstrap


100%|██████████| 50/50 [00:00<00:00, 85.89it/s] 


Começando bootstrap


In [6]:
df_heatmap = pd.concat(list_df)

In [7]:
df_heatmap.to_csv('heatmap_esp.csv')