In [1]:
import zarr
from scipy.spatial.distance import squareform
import anjl
from tqdm.auto import tqdm

%load_ext memory_profiler

In [2]:
# from anjl._dynamic import dynamic_search_parallel, dynamic_update_parallel
# dynamic_search_parallel.parallel_diagnostics(level=4)
# dynamic_update_parallel.parallel_diagnostics(level=4)

## Small

In [3]:
small_dist = zarr.load("../benchmark/small/dist.zarr.zip")
small_D = squareform(small_dist)
small_D.shape

(181, 181)

In [4]:
%%timeit -r1000 -n1
anjl.dynamic_nj(small_dist, parallel=False)

2.02 ms ± 112 μs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [5]:
%%timeit -r1000 -n1
anjl.dynamic_nj(small_dist, parallel=True)

The slowest run took 4.73 times longer than the fastest. This could mean that an intermediate result is being cached.
4.53 ms ± 725 μs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [6]:
%%timeit -r1000 -n1
anjl.rapid_nj(small_dist)

4.69 ms ± 208 μs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [7]:
%%timeit -r1000 -n1
anjl.canonical_nj(small_dist, parallel=False)

3.51 ms ± 183 μs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


In [8]:
%%timeit -r1000 -n1
anjl.canonical_nj(small_dist, parallel=True)

The slowest run took 5.19 times longer than the fastest. This could mean that an intermediate result is being cached.
2.29 ms ± 468 μs per loop (mean ± std. dev. of 1000 runs, 1 loop each)


## Medium

In [9]:
medium_dist = zarr.load("../benchmark/medium/dist.zarr.zip")
medium_D = squareform(medium_dist)
medium_D.shape

(472, 472)

In [10]:
%%timeit -r500 -n1
anjl.dynamic_nj(medium_dist, parallel=False)

9.12 ms ± 281 μs per loop (mean ± std. dev. of 500 runs, 1 loop each)


In [11]:
%%timeit -r500 -n1
anjl.dynamic_nj(medium_dist, parallel=True)

14.3 ms ± 916 μs per loop (mean ± std. dev. of 500 runs, 1 loop each)


In [12]:
%%timeit -r200 -n1
anjl.rapid_nj(medium_dist)

39.9 ms ± 1.06 ms per loop (mean ± std. dev. of 200 runs, 1 loop each)


In [13]:
%%timeit -r200 -n1
anjl.canonical_nj(medium_dist, parallel=False)

48 ms ± 2.75 ms per loop (mean ± std. dev. of 200 runs, 1 loop each)


In [14]:
%%timeit -r200 -n1
anjl.canonical_nj(medium_dist, parallel=True)

13.3 ms ± 2.77 ms per loop (mean ± std. dev. of 200 runs, 1 loop each)


## Large

In [15]:
large_dist = zarr.load("../benchmark/large/dist.zarr.zip")
large_D = squareform(large_dist)
large_D.shape

(3081, 3081)

In [16]:
%%memit
anjl.dynamic_nj(large_dist);

peak memory: 338.93 MiB, increment: 17.98 MiB


In [17]:
%%memit
anjl.canonical_nj(large_dist);

peak memory: 338.83 MiB, increment: 17.80 MiB


In [18]:
%%timeit -r20 -n1
anjl.dynamic_nj(large_dist, parallel=False)

415 ms ± 4.71 ms per loop (mean ± std. dev. of 20 runs, 1 loop each)


In [19]:
%%timeit -r20 -n1
anjl.dynamic_nj(large_dist, parallel=True)

318 ms ± 5.15 ms per loop (mean ± std. dev. of 20 runs, 1 loop each)


In [20]:
%%timeit -r5 -n1
anjl.rapid_nj(large_dist)

3.69 s ± 11.7 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [21]:
%%timeit -r3 -n1
anjl.canonical_nj(large_dist, parallel=False)

13.3 s ± 24 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [22]:
%%timeit -r3 -n1
anjl.canonical_nj(large_dist, parallel=True)

2.19 s ± 14.4 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


## XL

In [23]:
xl_dist = zarr.load("../benchmark/xl/dist.zarr.zip")
xl_D = squareform(xl_dist)
xl_D.shape

(5868, 5868)

In [24]:
%%memit
anjl.dynamic_nj(xl_dist);

peak memory: 603.45 MiB, increment: 65.62 MiB


In [25]:
%%memit
anjl.canonical_nj(xl_dist);

peak memory: 603.45 MiB, increment: 65.62 MiB


In [26]:
%%timeit -r3 -n1
anjl.dynamic_nj(xl_dist, parallel=False);

2.09 s ± 15.8 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [27]:
%%timeit -r3 -n1
anjl.dynamic_nj(xl_dist, parallel=True);

1.42 s ± 5.43 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [28]:
%%time
anjl.dynamic_nj(xl_dist, progress=tqdm);

  0%|          | 0/5866 [00:00<?, ?it/s]

CPU times: user 11.1 s, sys: 43.9 ms, total: 11.1 s
Wall time: 1.49 s


In [29]:
%%time
anjl.rapid_nj(xl_dist, progress=tqdm);

  0%|          | 0/5867 [00:00<?, ?it/s]

CPU times: user 22.2 s, sys: 260 ms, total: 22.5 s
Wall time: 22.3 s


In [30]:
%%time
anjl.canonical_nj(xl_dist, progress=tqdm, parallel=False);

  0%|          | 0/5867 [00:00<?, ?it/s]

CPU times: user 1min 40s, sys: 126 ms, total: 1min 40s
Wall time: 1min 40s


In [31]:
%%time
anjl.canonical_nj(xl_dist, progress=tqdm, parallel=True);

  0%|          | 0/5867 [00:00<?, ?it/s]

CPU times: user 2min 46s, sys: 182 ms, total: 2min 46s
Wall time: 21 s


## XXL

In [32]:
xxl_dist = zarr.load("../benchmark/xxl/dist.zarr.zip")
xxl_D = squareform(xxl_dist)
xxl_D.shape

(8825, 8825)

In [33]:
%%memit
anjl.dynamic_nj(xxl_dist);

peak memory: 1152.92 MiB, increment: 148.50 MiB


In [34]:
%%time
anjl.dynamic_nj(xxl_dist, parallel=False);

CPU times: user 6.83 s, sys: 19 ms, total: 6.85 s
Wall time: 6.85 s


In [35]:
%%time
anjl.dynamic_nj(xxl_dist, parallel=True);

CPU times: user 32.4 s, sys: 57 ms, total: 32.5 s
Wall time: 4.27 s


In [36]:
%%time
anjl.dynamic_nj(xxl_dist, parallel=True, progress=tqdm);

  0%|          | 0/8823 [00:00<?, ?it/s]

CPU times: user 36.3 s, sys: 93.8 ms, total: 36.4 s
Wall time: 4.86 s


In [37]:
%%time
anjl.rapid_nj(xxl_dist, progress=tqdm);

  0%|          | 0/8824 [00:00<?, ?it/s]

CPU times: user 1min 18s, sys: 650 ms, total: 1min 19s
Wall time: 1min 19s


In [38]:
%%time
anjl.canonical_nj(xxl_dist, progress=tqdm, parallel=True);

  0%|          | 0/8824 [00:00<?, ?it/s]

CPU times: user 11min 40s, sys: 799 ms, total: 11min 41s
Wall time: 1min 29s


## Epilogue

In [39]:
!lscpu

Architecture:             x86_64
  CPU op-mode(s):         32-bit, 64-bit
  Address sizes:          39 bits physical, 48 bits virtual
  Byte Order:             Little Endian
CPU(s):                   8
  On-line CPU(s) list:    0-7
Vendor ID:                GenuineIntel
  Model name:             Intel(R) Xeon(R) CPU E3-1505M v5 @ 2.80GHz
    CPU family:           6
    Model:                94
    Thread(s) per core:   2
    Core(s) per socket:   4
    Socket(s):            1
    Stepping:             3
    CPU max MHz:          3700.0000
    CPU min MHz:          800.0000
    BogoMIPS:             5599.85
    Flags:                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge m
                          ca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 s
                          s ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc 
                          art arch_perfmon pebs bts rep_good nopl xtopology nons
                          top_tsc cpuid aperfmperf pni pclmulq