# Description

Multi-threading version of code in `09`

# Remove pycache dir

In [1]:
!echo ${CODE_DIR}




In [2]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print

./libs/ccc/__pycache__
./libs/ccc/sklearn/__pycache__
./libs/ccc/scipy/__pycache__
./libs/ccc/coef/__pycache__
./libs/ccc/utils/__pycache__
./libs/ccc/pytorch/__pycache__


In [3]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -prune -exec rm -rf {} \;

In [4]:
!find ${CODE_DIR} -regex '^.*\(__pycache__\)$' -print

# Modules

In [5]:
import numpy as np

from ccc.coef import ccc

# Settings

In [6]:
N_REPS = 10

In [7]:
np.random.seed(0)

# Setup

In [8]:
# let numba compile all the code before profiling
ccc(np.random.rand(10), np.random.rand(10))

0.15625

# Run with `n_samples` small

## `n_samples=50`

In [9]:
N_SAMPLES = 50

In [10]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [11]:
def func(n_reps=N_REPS):
    for i in range(n_reps):
        ccc(x, y, n_jobs=8)

In [12]:
%%timeit func()
func()

14.6 ms ± 40 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [13]:
%%prun -s cumulative -l 20 -T 09-n_samples_small_50.txt
func()

 
*** Profile printout saved to text file '09-n_samples_small_50.txt'. 


         8195 function calls in 0.018 seconds

   Ordered by: cumulative time
   List reduced from 114 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.018    0.018 {built-in method builtins.exec}
        1    0.000    0.000    0.018    0.018 <string>:1(<module>)
        1    0.000    0.000    0.018    0.018 158102722.py:1(func)
       10    0.000    0.000    0.018    0.002 impl.py:307(ccc)
       10    0.000    0.000    0.013    0.001 impl.py:492(compute_coef)
       10    0.000    0.000    0.012    0.001 impl.py:485(cdist_func)
       10    0.000    0.000    0.012    0.001 impl.py:192(cdist_parts_parallel)
      659    0.011    0.000    0.011    0.000 {method 'acquire' of '_thread.lock' objects}
      154    0.000    0.000    0.011    0.000 threading.py:280(wait)
       90    0.000    0.000    0.010    0.000 threading.py:563(wait)
       80    0.000    0.000    0.010    0.000 thread.py:161(submit)


## `n_samples=100`

In [14]:
N_SAMPLES = 100

In [15]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [16]:
%%timeit func()
func()

24.9 ms ± 190 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
%%prun -s cumulative -l 20 -T 09-n_samples_small_100.txt
func()

 
*** Profile printout saved to text file '09-n_samples_small_100.txt'. 


         10901 function calls in 0.029 seconds

   Ordered by: cumulative time
   List reduced from 120 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.029    0.029 {built-in method builtins.exec}
        1    0.000    0.000    0.029    0.029 <string>:1(<module>)
        1    0.000    0.000    0.029    0.029 158102722.py:1(func)
       10    0.000    0.000    0.028    0.003 impl.py:307(ccc)
       10    0.000    0.000    0.023    0.002 impl.py:492(compute_coef)
       10    0.000    0.000    0.023    0.002 impl.py:485(cdist_func)
       10    0.001    0.000    0.022    0.002 impl.py:192(cdist_parts_parallel)
      887    0.019    0.000    0.019    0.000 {method 'acquire' of '_thread.lock' objects}
      208    0.000    0.000    0.019    0.000 threading.py:280(wait)
      124    0.000    0.000    0.019    0.000 threading.py:563(wait)
      110    0.000    0.000    0.015    0.000 thread.py:161(submit)

## `n_samples=500`

In [18]:
N_SAMPLES = 500

In [19]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [20]:
%%timeit func()
func()

29.3 ms ± 233 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
%%prun -s cumulative -l 20 -T 09-n_samples_small_500.txt
func()

 
*** Profile printout saved to text file '09-n_samples_small_500.txt'. 


         11112 function calls in 0.032 seconds

   Ordered by: cumulative time
   List reduced from 114 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.032    0.032 {built-in method builtins.exec}
        1    0.000    0.000    0.032    0.032 <string>:1(<module>)
        1    0.000    0.000    0.032    0.032 158102722.py:1(func)
       10    0.000    0.000    0.031    0.003 impl.py:307(ccc)
       10    0.000    0.000    0.025    0.002 impl.py:492(compute_coef)
       10    0.000    0.000    0.024    0.002 impl.py:485(cdist_func)
       10    0.001    0.000    0.024    0.002 impl.py:192(cdist_parts_parallel)
      935    0.022    0.000    0.022    0.000 {method 'acquire' of '_thread.lock' objects}
      224    0.000    0.000    0.022    0.000 threading.py:280(wait)
      132    0.000    0.000    0.020    0.000 threading.py:563(wait)
      110    0.000    0.000    0.015    0.000 thread.py:161(submit)

## `n_samples=1000`

In [22]:
N_SAMPLES = 1000

In [23]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [24]:
%%timeit func()
func()

34.7 ms ± 164 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [25]:
%%prun -s cumulative -l 20 -T 09-n_samples_small_1000.txt
func()

 
*** Profile printout saved to text file '09-n_samples_small_1000.txt'. 


         11853 function calls in 0.038 seconds

   Ordered by: cumulative time
   List reduced from 114 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.038    0.038 {built-in method builtins.exec}
        1    0.000    0.000    0.038    0.038 <string>:1(<module>)
        1    0.000    0.000    0.038    0.038 158102722.py:1(func)
       10    0.001    0.000    0.038    0.004 impl.py:307(ccc)
       10    0.000    0.000    0.028    0.003 impl.py:492(compute_coef)
       10    0.000    0.000    0.028    0.003 impl.py:485(cdist_func)
       10    0.001    0.000    0.028    0.003 impl.py:192(cdist_parts_parallel)
     1051    0.028    0.000    0.028    0.000 {method 'acquire' of '_thread.lock' objects}
      253    0.000    0.000    0.027    0.000 threading.py:280(wait)
      150    0.000    0.000    0.023    0.000 threading.py:563(wait)
      110    0.000    0.000    0.016    0.000 thread.py:161(submit)

# Run with `n_samples` large

## `n_samples=50000`

In [26]:
N_SAMPLES = 50000

In [27]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [28]:
%%timeit func()
func()

967 ms ± 5.67 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%%prun -s cumulative -l 20 -T 09-n_samples_large_50000.txt
func()

 
*** Profile printout saved to text file '09-n_samples_large_50000.txt'. 


         12363 function calls in 0.957 seconds

   Ordered by: cumulative time
   List reduced from 114 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.957    0.957 {built-in method builtins.exec}
        1    0.000    0.000    0.957    0.957 <string>:1(<module>)
        1    0.009    0.009    0.957    0.957 158102722.py:1(func)
       10    0.013    0.001    0.949    0.095 impl.py:307(ccc)
     1148    0.917    0.001    0.917    0.001 {method 'acquire' of '_thread.lock' objects}
      274    0.001    0.000    0.914    0.003 threading.py:280(wait)
       10    0.000    0.000    0.473    0.047 impl.py:492(compute_coef)
       10    0.000    0.000    0.472    0.047 impl.py:485(cdist_func)
       10    0.001    0.000    0.472    0.047 impl.py:192(cdist_parts_parallel)
      170    0.000    0.000    0.464    0.003 threading.py:563(wait)
      100    0.001    0.000    0.455    0.005 _base.py:201(as_compl

## `n_samples=100000`

In [30]:
N_SAMPLES = 100000

In [31]:
x = np.random.rand(N_SAMPLES)
y = np.random.rand(N_SAMPLES)

In [32]:
%%timeit func()
func()

1.96 s ± 3.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
%%prun -s cumulative -l 20 -T 09-n_samples_large_100000.txt
func()

 
*** Profile printout saved to text file '09-n_samples_large_100000.txt'. 


         12320 function calls in 1.962 seconds

   Ordered by: cumulative time
   List reduced from 114 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    1.962    1.962 {built-in method builtins.exec}
        1    0.000    0.000    1.962    1.962 <string>:1(<module>)
        1    0.014    0.014    1.962    1.962 158102722.py:1(func)
       10    0.021    0.002    1.948    0.195 impl.py:307(ccc)
     1142    1.898    0.002    1.898    0.002 {method 'acquire' of '_thread.lock' objects}
      271    0.001    0.000    1.896    0.007 threading.py:280(wait)
       10    0.000    0.000    0.962    0.096 impl.py:492(compute_coef)
       10    0.000    0.000    0.962    0.096 impl.py:485(cdist_func)
       10    0.001    0.000    0.961    0.096 impl.py:192(cdist_parts_parallel)
      170    0.000    0.000    0.952    0.006 threading.py:563(wait)
      110    0.000    0.000    0.945    0.009 _base.py:418(result)
