### Porting to Google Colab
The following cell enables this notebook to run from Google Colab as well as from your local machine IDE.<br>
You can change `root_directory` and/or `this_notebook_google_path` to point to the directory in your Google account, which contains this notebook, together with the `imgs` sub-directory and the rest of the files.<br>

In [1]:
import sys
import os
try:
    from google.colab import drive as google_drive # type: ignore
except:
    # no Google Colab --> fall back to local machine
    google_drive = None

if google_drive is not None:
    google_drive_directory = os.path.join('/','content','gdrive')
    google_drive.mount(google_drive_directory)
    all_projects_path = os.path.join(google_drive_directory, 'Othercomputers','My Laptop', 'projects')
else:
    all_projects_path = os.path.join('d:\\', 'projects')

project_path = os.path.join(all_projects_path,'RUNI','Thesis')
assert os.path.exists(project_path), f'Project path {project_path} not found!'
# enable import python files from this notebook's path
sys.path.append(project_path)
# enable reading images and data files from this notebook's path
os.chdir(project_path)

datasets_path = os.path.join(project_path, 'datasets')
assert os.path.exists(datasets_path), f'Datasets path {datasets_path} not found!'

output_path = os.path.join(project_path, 'output')
os.makedirs(output_path, exist_ok=True)
assert os.path.exists(output_path), f'Output path {output_path} not found!'

print(f'Current working directory: {os.getcwd()}')
print(f'Datasets path: {datasets_path}')
print(f'Output path: {output_path}')

Current working directory: d:\projects\RUNI\Thesis
Datasets path: d:\projects\RUNI\Thesis\datasets
Output path: d:\projects\RUNI\Thesis\output


In [2]:
from python.hpc import HybridArray

Numba version: 0.60.0
numba.njit is available.
CUDA is available and will be used for GPU operations.
Printing CUDA active device attributes:
    Name:                               NVIDIA GeForce GTX 1650
    Free Memory:                        3367680 [KB]
    Total Memory:                       4193984 [KB]
    Compute capability:                 7.5
    Clock rate:                         1560.00 [MHz]
    Memory clock rate:                  4001.00 [MHz]
    Memory bus width:                   128 bits
    Memory band width (theoretical)     128.03 [GByte/Sec]
    Number of multiprocessors:          16
    Minimal grid size:                  128
    Maximum grid size:                  (2147483647, 65535, 65535)
    Maximum block dimensions:           (1024, 1024, 64)
    Maximum threads per block:          1024
    Warp size:                          32
    Maximum shared memory per block:    49152 [bytes]
    Maximum registers per block:        65536
    Total constant memory:   

In [3]:
import numpy as np
from python.rare_weak_model.rare_weak_model import rare_weak_model
from python.error_controlling_methods.error_controlling_methods import topk, bonferroni, benjamini_hochberg

def simulation(shape: tuple, gpu: bool, njit: bool, method: str,\
               num_steps: int=3, epsilon: float = 0.01, mu: float = 1.0) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    with (
        HybridArray() as sorted_p_values,
        HybridArray() as counts,
        HybridArray() as num_discoveries
    ):    
        sorted_p_values.realloc(shape=shape, dtype=np.float64, use_gpu=gpu)
        n1 = max(np.uint32(1),np.uint32(epsilon*shape[1]))
        rare_weak_model(sorted_p_values_output=sorted_p_values, cumulative_counts_output=counts,\
                        mu=mu, n1=n1, num_steps=num_steps, use_njit=njit)
        if method == 'topk':
            topk(sorted_p_values_input=sorted_p_values, num_discoveries_output=num_discoveries, use_njit=njit)
        elif method == 'bonferroni':
            bonferroni(sorted_p_values_input=sorted_p_values, num_discoveries_output=num_discoveries, use_njit=njit)
        elif method == 'benjamini_hochberg':
            benjamini_hochberg(sorted_p_values_input=sorted_p_values, num_discoveries_output=num_discoveries, use_njit=njit)
        else:
            assert False
        ret = (sorted_p_values.numpy(), counts.numpy(), num_discoveries.numpy())
    return ret

def simulation3(shape: tuple, method: str, num_steps: int=3,\
                epsilon: float = 0.01, mu: float = 1.0) -> None:
    print(f'Running on {method=} {shape=} {epsilon=} {mu=} {num_steps=}')
    for gpu,njit in [(False,False), (False,True), (True,False)]:
        p_values, counts, num_discoveries = simulation(shape=shape, gpu=gpu, njit=njit, method=method, num_steps=num_steps, epsilon=epsilon, mu=mu)
        print(f'{gpu=} {njit=} --> p_values.mean={p_values.mean():.2f} counts.mean={counts.mean():.2f} num_discoveries.mean={num_discoveries.mean():.2f}')




In [4]:
shape=(100,1000)
num_steps = 3
epsilon = 0.1
mu = 1.0
simulation3(shape=shape, method='topk', num_steps=num_steps, epsilon=epsilon, mu=mu)

Running on method='topk' shape=(100, 1000) epsilon=0.1 mu=1.0 num_steps=3
gpu=False njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=500.50
gpu=False njit=True --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=500.50
gpu=True njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=500.50




In [5]:
simulation3(shape=shape, method='bonferroni', num_steps=num_steps, epsilon=epsilon, mu=mu)

Running on method='bonferroni' shape=(100, 1000) epsilon=0.1 mu=1.0 num_steps=3
gpu=False njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=760.11
gpu=False njit=True --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=760.11
gpu=True njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=760.11




In [6]:
simulation3(shape=shape, method='benjamini_hochberg', num_steps=num_steps, epsilon=epsilon, mu=mu)

Running on method='benjamini_hochberg' shape=(100, 1000) epsilon=0.1 mu=1.0 num_steps=3
gpu=False njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=640.74
gpu=False njit=True --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=640.74
gpu=True njit=False --> p_values.mean=0.24 counts.mean=49.52 num_discoveries.mean=640.74


