In [1]:
!python -m pip install multiple-hypothesis-testing



In [2]:
try:
    from google.colab import drive
    drive.mount('/content/gdrive/')
    # import glob; print(glob.glob('/content/gdrive/Othercomputers/My Laptop/projects/RUNI/Thesis/*'))
    import sys
    sys.path.append('/content/gdrive/Othercomputers/My Laptop/projects/RUNI/Thesis')
except:
    pass

In [3]:
from Synthetic_Data_Generators import Multi_Class_Normal_Population as Data_Generator
from Synthetic_Data_Generators import Two_Lists_Tuple, Data_Generator_Base
from Higher_Criticism import Higher_Criticism


In [4]:
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.family'] = 'monospace'


In [12]:
def asymptotic_analysis(N_range: list[int], beta_range: list[float], r_range: list[float], hc_models: list, monte_carlo: int = 1000) -> None:
    params_list = Two_Lists_Tuple(list(enumerate(beta_range)), list(enumerate(r_range)))
    collect_results = {}
    many_params = len(params_list) > 1
    for ind_N, N in enumerate(N_range):
        print(f'Working on sample size: {N}')
        noise_generator = Data_Generator_Base(N)
        noise_values = Higher_Criticism.monte_carlo_best_objectives(hc_models=hc_models, data_generator=noise_generator, monte_carlo=monte_carlo, disable_tqdm=many_params)
        for (ind_beta, beta), (ind_r, r) in tqdm(params_list, disable= not many_params):
            signal_generator = Data_Generator(**Data_Generator.params_from_N_r_beta(N=N, r=r, beta=beta))
            hc_monte_carlo = Higher_Criticism.monte_carlo_statistics_HC(hc_models=hc_models, noise_values=noise_values, data_generator=signal_generator, disable_tqdm=many_params)
            for key, auc in hc_monte_carlo.items():
                if key not in collect_results:
                    collect_results[key] = np.empty(shape=(len(N_range),len(r_range),len(beta_range)), dtype=np.float32)
                collect_results[key][ind_N, ind_r, ind_beta] = auc

    for (ind_beta, beta), (ind_r, r) in params_list:
        fig, ax = plt.subplots(1, 1, figsize=(6, 6))
        max_auc = 0
        for key in collect_results:
            auc = collect_results[key][:, ind_r, ind_beta].reshape(-1)
            line_params = {'linestyle': 'dashed' if 'power' in key else 'solid'}
            if 'B' in key:
                line_params['linewidth'] = 3
            ax.plot(N_range, auc, label=key, **line_params)
            max_auc = max(max_auc, auc.max())
        if max_auc >= 0.9:
            ax.set_ylim(top=1.0)
        ax.set_title(f'AUC values as function of number of samples using {monte_carlo} monte carlo runs.\n' + f'r={r:.2f} beta={beta:.2f}')
        ax.legend(loc='center right', bbox_to_anchor=(1.7, 0.5))
        plt.show()


In [14]:

N_range = [1000, 2000, 3000, 5000, 6000, 7000, 8000, 10000, 20000, 30000, 1000000]
hc_models = [Higher_Criticism(work_mode='bonferroni'), Higher_Criticism(work_mode='bh')]
hc_models += [Higher_Criticism(work_mode='hc', global_max=True, gamma=-gamma_power) for gamma_power in np.linspace(0.1,0.9,9)]
hc_models += [Higher_Criticism(work_mode='hc', global_max=True, gamma=gamma) for gamma in np.linspace(0.1,0.4,4)]

asymptotic_analysis(N_range=N_range, beta_range=[0.7], r_range=[0.5], hc_models=hc_models, monte_carlo=1000)
asymptotic_analysis(N_range=N_range, beta_range=[0.5], r_range=[0.05], hc_models=hc_models, monte_carlo=1000)


Working on sample size: 1000


100%|██████████| 1000/1000 [00:00<00:00, 2292.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1590.08it/s]


Working on sample size: 2000


100%|██████████| 1000/1000 [00:00<00:00, 1876.20it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1176.48it/s]


Working on sample size: 3000


100%|██████████| 1000/1000 [00:00<00:00, 1395.35it/s]
100%|██████████| 1000/1000 [00:01<00:00, 907.63it/s]


Working on sample size: 5000


100%|██████████| 1000/1000 [00:00<00:00, 1053.39it/s]
100%|██████████| 1000/1000 [00:01<00:00, 689.42it/s]


Working on sample size: 6000


100%|██████████| 1000/1000 [00:01<00:00, 908.76it/s]
100%|██████████| 1000/1000 [00:01<00:00, 606.27it/s]


Working on sample size: 7000


100%|██████████| 1000/1000 [00:01<00:00, 822.18it/s]
100%|██████████| 1000/1000 [00:01<00:00, 537.63it/s]


Working on sample size: 8000


100%|██████████| 1000/1000 [00:01<00:00, 741.01it/s]
100%|██████████| 1000/1000 [00:02<00:00, 488.05it/s]


Working on sample size: 10000


100%|██████████| 1000/1000 [00:01<00:00, 618.73it/s]
100%|██████████| 1000/1000 [00:02<00:00, 395.53it/s]


Working on sample size: 20000


100%|██████████| 1000/1000 [00:03<00:00, 305.32it/s]
100%|██████████| 1000/1000 [00:04<00:00, 216.57it/s]


Working on sample size: 30000


100%|██████████| 1000/1000 [00:04<00:00, 231.74it/s]
100%|██████████| 1000/1000 [00:06<00:00, 147.52it/s]


Working on sample size: 1000000


100%|██████████| 1000/1000 [07:21<00:00,  2.26it/s]
 12%|█▏        | 120/1000 [01:05<07:58,  1.84it/s]


KeyboardInterrupt: 