# Description

TODO

# Modules loading

Make sure only one core is used everywhere.

In [1]:
%env CM_N_JOBS=1
%env NUMBA_NUM_THREADS=1
%env MKL_NUM_THREADS=1
%env OPEN_BLAS_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env OMP_NUM_THREADS=1

env: CM_N_JOBS=1
env: NUMBA_NUM_THREADS=1
env: MKL_NUM_THREADS=1
env: OPEN_BLAS_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: OMP_NUM_THREADS=1


In [2]:
from time import time

import numpy as np
import pandas as pd
from scipy.stats import pearsonr, spearmanr

from clustermatch import conf
from clustermatch.coef import cm
from clustermatch.methods import mic

# Settings

In [3]:
OUTPUT_FILENAME = "time_test.pkl"

In [4]:
DATA_SIZES = [
    100,
    500,
    1000,
    5000,
    10000,
    50000,
    100000,
    1000000,
]

N_REPS = 10

In [5]:
np.random.seed(0)

# Paths

In [6]:
OUTPUT_DIR = conf.RESULTS_DIR / "time_test"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
display(OUTPUT_DIR)

PosixPath('/opt/data/results/time_test')

# Functions

In [7]:
time_results = pd.DataFrame(columns=["data_size", "method", "time", "sim"])

In [8]:
def run_method(func, method_name, size):
    n_reps = N_REPS
    if size < 500:
        n_reps = 1000

    for r in range(n_reps):
        d1 = np.random.rand(size)
        d2 = np.random.rand(size)

        start_time = time()
        sim = func(d1, d2)
        end_time = time()
        met_time = end_time - start_time

        idx = time_results.shape[0]
        time_results.loc[idx] = [d1.shape[0], method_name, met_time, sim]

# Run

In [9]:
# initialize methods
cm(np.random.rand(100), np.random.rand(100))

0.02047038824894002

In [10]:
for s in DATA_SIZES:
    print(f"Size: {s}")

    print(f"  p")
    run_method(lambda x, y: pearsonr(x, y)[0], "p-1", s)

    print(f"  s")
    run_method(lambda x, y: spearmanr(x, y)[0], "s-1", s)

    print(f"  cm")
    run_method(lambda x, y: cm(x, y), "cm-1", s)

    if s <= 50000:
        print(f"  mic_e")
        run_method(lambda x, y: mic(x, y, estimator="mic_e"), "mic_e-1", s)

    if s <= 10000:
        print(f"  mic")
        run_method(lambda x, y: mic(x, y), "mic-1", s)

    print("Saving to pickle")
    time_results.to_pickle(OUTPUT_DIR / OUTPUT_FILENAME)

Size: 100
  p
  s
  cm
  mic_e
  mic
Saving to pickle
Size: 500
  p
  s
  cm
  mic_e
  mic
Saving to pickle
Size: 1000
  p
  s
  cm
  mic_e
  mic
Saving to pickle
Size: 5000
  p
  s
  cm
  mic_e
  mic
Saving to pickle
Size: 10000
  p
  s
  cm
  mic_e
  mic
Saving to pickle
Size: 50000
  p
  s
  cm
  mic_e
Saving to pickle
Size: 100000
  p
  s
  cm
Saving to pickle
Size: 1000000
  p
  s
  cm
Saving to pickle
