In [17]:
import nuad.vienna_nupack as nv
length = 20
seqs = [(nv.random_dna_seq(length), nv.random_dna_seq(length)) for _ in range(8)]

In [26]:
from pprint import pprint
import nuad.vienna_nupack as nv
import nuad.constraints as nc
import pathos
pool = pathos.pools.ProcessPool(processes=2)
def ev(pairs):
    print(f'{pairs=}')
    energies = [nv.pfunc(pair) for pair in pairs]
    print(f'{energies=}')
    return energies

pairs_lists = nc.chunker(seqs, num_chunks=2)
pprint(pairs_lists)
pool.map(ev, pairs_lists)

[[('GAGGTGGAAATCGACTTCTT', 'GCCTCACCTTTGCTGCACTT'),
  ('TTACAACGCTCACGATGCGA', 'AGTCAACCAACAATGACGTA'),
  ('TTCCTCAGTATTAGGCCGCT', 'ACTTATCGGTCATGAGCGTG'),
  ('GGGTATAACGAGCACTCGTA', 'ACCTTTTGTCCAACCTTAAG')],
 [('ATTGCGGGCGTACAACTGTA', 'GTATGCCATAGGGTGTCAGT'),
  ('AGACAACCATTCACTTGTTC', 'AGCGCAATGAATCATAATTC'),
  ('AGGGGAAAAGACCCGCTTCC', 'AAGAATTCCCATAATTTTTT'),
  ('GGCTCGAGTGTTTAATATAA', 'CGCCGGTTGTGATGGGAAGG')]]


[[-7.354785159977888,
  -7.617475786837124,
  -7.248002496090672,
  -8.49126940119469],
 [-6.591846792273751,
  -5.910449527000241,
  -8.023192596131457,
  -5.041334552751463]]

In [2]:
import nuad.vienna_nupack as nv

def pfunc_all(seqs):
    for s1, s2 in seqs:
        p = nv.pfunc((s1,s2))

%timeit pfunc_all(seqs)
%timeit nv.rna_duplex_multiple(seqs)
%timeit nv.rna_plex_multiple(seqs)

KeyboardInterrupt: 

In [1]:
from multiprocessing.pool import ThreadPool
from pathos.pools import ProcessPool
import psutil

num_cores = psutil.cpu_count(logical=False)
# num_cores = psutil.cpu_count(logical=True)
print(num_cores)
thread_pool = ThreadPool(processes=num_cores)
# process_pool = ProcessPool(processes=num_cores)
process_pool = ProcessPool(nodes=num_cores)

10


In [2]:
# pfunc
import nuad.vienna_nupack as nv
from pprint import pprint
from timeit import timeit
from importlib import reload
reload(nv)

nv.pfunc('ACGTACGTAGCTA') # call once so overhead of loading Model isn't being measured in any call below

num_trials = 5
print(f'using {process_pool.ncpus} processes in ProcessPool')
print(f'averaging each run over {num_trials} trials')
# for seq_length in [10, 15, 20, 30, 40, 50, 75, 100, 125, 150]:
for seq_length in [10, 100, 125, 150, 175, 200]:
# for seq_length in [200]:
    print('#'*80)
    print(f'#  seq length = {seq_length}')
    #for num_seqs in [1, 2, 4, 8, 10, 15, 20, 30, 40, 50, 75, 100, 200, 300, 400, 500]: # for lower seq_length
    # for num_seqs in [1, 2, 4, 8, 10, 15, 20]:
    for num_seqs in [100]:
        parallel_seqs = [nv.random_dna_seq(seq_length) for _ in range(num_seqs)]
        sequential_seqs = [nv.random_dna_seq(seq_length) for _ in range(num_seqs)]
        print(f'#  {"*"*60}')
        print(f'#  * num seqs = {num_seqs}')

        def pfunc_sequential():
            energies = tuple(nv.pfunc(seq) for seq in sequential_seqs)
            return energies
        
        time_s = timeit(pfunc_sequential, number=num_trials)
        time_s_ave = time_s / num_trials
        print(f'#  * ave seq time = {time_s_ave:.2f} sec')

        time_p = timeit(lambda: nv.pfunc_parallel(process_pool, parallel_seqs), number=num_trials)
        time_p_ave = time_p / num_trials
        print(f'#  * ave par time = {time_p_ave:.2f} sec')

        print(f'#  * seq/par      = {time_s_ave/time_p_ave:.2f}')
        print('#')
    print()

using 10 processes in ProcessPool
averaging each run over 5 trials
################################################################################
#  seq length = 10
#  ************************************************************
#  * num seqs = 100
#  * ave seq time = 0.08 sec
#  * ave par time = 0.28 sec
#  * seq/par      = 0.30
#

################################################################################
#  seq length = 100
#  ************************************************************
#  * num seqs = 100
#  * ave seq time = 0.25 sec
#  * ave par time = 0.08 sec
#  * seq/par      = 2.95
#

################################################################################
#  seq length = 125
#  ************************************************************
#  * num seqs = 100
#  * ave seq time = 0.37 sec
#  * ave par time = 0.09 sec
#  * seq/par      = 3.91
#

################################################################################
#  seq length = 150
#  *****************

In [19]:
# rna_duplex_multiple
import nuad.vienna_nupack as nv
from pprint import pprint
from timeit import timeit
from importlib import reload
reload(nv)

num_trials = 3
print(f'using {thread_pool.??} threads in ThreadPool')
print(f'averaging each run over {num_trials} trials')
for seq_length in [10, 15, 20, 30, 40, 50, 75, 100]:
    print('#'*80)
    print(f'#  seq length = {seq_length}')
    for num_pairs in [100, 200, 300, 400, 500, 1000, 2000, 5000, 10000, 20000, 50000]:
        pairs = [(nv.random_dna_seq(seq_length), nv.random_dna_seq(seq_length)) for _ in range(num_pairs)]
        print(f'#  {"*"*60}')
        print(f'#  * num_pairs = {num_pairs}')

        time_s = timeit(lambda: nv.rna_duplex_multiple(pairs), number=num_trials)
        time_s_ave = time_s / num_trials
        print(f'#  * ave seq time = {time_s_ave:.2f} sec')

        time_p = timeit(lambda: nv.rna_duplex_multiple_parallel(thread_pool, pairs), number=num_trials)
        time_p_ave = time_p / num_trials
        print(f'#  * ave par time = {time_p_ave:.2f} sec')

        print(f'#  * seq/par      = {time_s_ave/time_p_ave:.2f}')
        print('#')
    print()

using 10 processes in ProcessPool
averaging each run over 3 trials
################################################################################
#  seq length = 10
#  ************************************************************
#  * num_pairs = 100
#  * ave seq time = 0.48 sec
#  * ave par time = 0.42 sec
#  * seq/par      = 1.15
#
#  ************************************************************
#  * num_pairs = 200
#  * ave seq time = 0.40 sec
#  * ave par time = 0.32 sec
#  * seq/par      = 1.25
#
#  ************************************************************
#  * num_pairs = 300
#  * ave seq time = 0.29 sec
#  * ave par time = 0.35 sec
#  * seq/par      = 0.84
#
#  ************************************************************
#  * num_pairs = 400
#  * ave seq time = 0.38 sec
#  * ave par time = 0.36 sec
#  * seq/par      = 1.04
#
#  ************************************************************
#  * num_pairs = 500
#  * ave seq time = 0.39 sec
#  * ave par time = 0.33 sec
#  * seq/p

KeyboardInterrupt: 

Process ForkPoolWorker-33:
Process ForkPoolWorker-31:
Process ForkPoolWorker-32:
Process ForkPoolWorker-46:
Process ForkPoolWorker-43:
Process ForkPoolWorker-41:
Process ForkPoolWorker-42:
Process ForkPoolWorker-36:
Process ForkPoolWorker-38:
Process ForkPoolWorker-45:
Process ForkPoolWorker-35:
Process ForkPoolWorker-37:
Process ForkPoolWorker-39:
Process ForkPoolWorker-34:
Process ForkPoolWorker-44:
Traceback (most recent call last):
Process ForkPoolWorker-40:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ddoty/.local/lib/python3.8/site-packages/multiprocess/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ddoty/.local/l

(-4.4, -4.3, -4.6, -2.0, -5.4, -4.1, -3.2, -6.1, -3.6, -4.9, -7.3, -5.9, -2.8, -4.5, -6.6, -9.4, -8.6, -12.4, -3.0, -3.6, -5.2, -9.8, -3.7, -4.6, -5.3, -6.5, -6.8, -4.7, -7.9, -6.9, -2.8, -6.1, -5.4, -3.3, -3.3, -5.2, -5.3, -4.7, -7.5, -4.3, -8.2, -4.1, -4.4, -5.2, -5.0, -5.6, -2.9, -3.1, -3.5, -7.5, -5.1, -9.8, -6.8, -7.9, -3.7, -10.0, -7.6, -3.2, -4.2, -4.0, -4.5, -8.1, -6.3, -6.5, -5.3, -3.3, -10.7, -5.3, -7.1, -4.2, -4.8, -2.0, -2.0, -3.3, -7.1, -3.3, -3.5, -5.4, -2.1, -5.4, -4.9, -5.6, -9.0, -7.8, -3.7, -5.7, -3.8, -6.0, -1.9, -5.5, -4.7, -4.7, -10.3, -4.6, -5.0, -6.2, -3.8, -7.3, -9.5, -3.2, -5.0, -3.6, -3.0, -2.6, -5.0, -7.4, -4.1, -7.9, -3.3, -5.4, -8.4, -6.2, -6.3, -3.8, -4.5, -3.8, -5.4, -5.9, -5.7, -3.5, -6.6, -6.4, -4.8, -2.1, -3.7, -3.8, -6.5, -3.2, -3.0, -5.0, -4.4, -5.2, -10.7, -4.0, -5.6, -6.6, -5.8, -4.6, -4.7, -5.6, -5.7, -3.1, -5.8, -5.5, -6.8, -6.5, -7.0, -6.2, -3.9, -4.6, -3.5, -2.9, -2.7, -4.9, -6.6, -8.6, -8.4, -4.7, -6.3, -11.6, -3.0, -2.7, -4.3, -4.2, -4.3, -5.

In [6]:
import pathos.helpers as ph
import psutil as psutil
from inspect import getsource

print(f'{ph.cpu_count()=}')
print(f'{ps.cpu_count()=}')
print(f'{psutil.cpu_count(logical=True)=}')
print(f'{psutil.cpu_count(logical=False)=}')

ph.cpu_count()=8
ps.cpu_count()=8
psutil.cpu_count(logical=True)=8
psutil.cpu_count(logical=False)=4


AttributeError: 'function' object has no attribute 'cache_clear'

In [15]:
import nuad.constraints as nc
from importlib import reload
reload(nc)

nc.chunker(('A', 'B', 'C', 'D', 'E', 'F', 'G'), num_chunks=4)
num = 9
nc.chunker(list(range(1,num+1)), num_chunks=8)

[[1, 2], [3, 4], [5, 6], [7, 8], [9]]