Skip to content

Commit

Permalink
Merge pull request #146 from indralab/remove_multiproc
Browse files Browse the repository at this point in the history
Remove multiprocessing when calling Preassembler
  • Loading branch information
pagreene committed Nov 16, 2020
2 parents 10cd7db + 4befdd7 commit f71db9c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 21 deletions.
18 changes: 3 additions & 15 deletions indra_db/preassembly/preassemble_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,13 @@ class DbPreassembler:
Parameters
----------
n_proc : int
Select the number of processes that will be used when performing
preassembly. Default is 1.
batch_size : int
Select the maximum number of statements you wish to be handled at a
time. In general, a larger batch size will somewhat be faster, but
require much more memory.
"""
def __init__(self, n_proc=None, batch_size=10000, s3_cache=None,
def __init__(self, batch_size=10000, s3_cache=None,
print_logs=False, stmt_type=None, yes_all=False):
self.n_proc = n_proc
self.batch_size = batch_size
if s3_cache is not None:
# Make the cache specific to stmt type. This guards against
Expand Down Expand Up @@ -668,8 +664,7 @@ def _clean_statements(self, stmts):
@clockit
def _get_support_links(self, unique_stmts, split_idx=None):
"""Find the links of refinement/support between statements."""
id_maps = self.pa._generate_id_maps(unique_stmts, poolsize=self.n_proc,
split_idx=split_idx)
id_maps = self.pa._generate_id_maps(unique_stmts, split_idx=split_idx)
ret = set()
for ix_pair in id_maps:
if ix_pair[0] == ix_pair[1]:
Expand Down Expand Up @@ -732,13 +727,6 @@ def _make_parser():
action='store_true',
help='Continue uploading or updating, picking up where you left off.'
)
parser.add_argument(
'-n', '--num-procs',
type=int,
default=None,
help=('Select the number of processors to use during this operation. '
'Default is 1.')
)
parser.add_argument(
'-b', '--batch',
type=int,
Expand Down Expand Up @@ -793,7 +781,7 @@ def _main():
assert db is not None
db.grab_session()
s3_cache = S3Path.from_string(args.cache)
pa = DbPreassembler(args.num_procs, args.batch, s3_cache,
pa = DbPreassembler(args.batch, s3_cache,
stmt_type=args.stmt_type, yes_all=args.yes_all)

desc = 'Continuing' if args.continuing else 'Beginning'
Expand Down
12 changes: 6 additions & 6 deletions indra_db/tests/test_preassembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def _check_statement_distillation(num_stmts):


@needs_py3
def _check_preassembly_with_database(num_stmts, batch_size, n_proc=1):
def _check_preassembly_with_database(num_stmts, batch_size):
db = get_pa_loaded_db(num_stmts)

# Now test the set of preassembled (pa) statements from the database
Expand All @@ -372,7 +372,7 @@ def _check_preassembly_with_database(num_stmts, batch_size, n_proc=1):

# Run the preassembly initialization.
start = datetime.now()
pa_manager = pm.PreassemblyManager(batch_size=batch_size, n_proc=n_proc,
pa_manager = pm.PreassemblyManager(batch_size=batch_size,
print_logs=True)
pa_manager.create_corpus(db)
end = datetime.now()
Expand Down Expand Up @@ -416,8 +416,8 @@ def _check_preassembly_with_database(num_stmts, batch_size, n_proc=1):


@needs_py3
def _check_db_pa_supplement(num_stmts, batch_size, split=0.8, n_proc=1):
pa_manager = pm.PreassemblyManager(batch_size=batch_size, n_proc=n_proc,
def _check_db_pa_supplement(num_stmts, batch_size, split=0.8):
pa_manager = pm.PreassemblyManager(batch_size=batch_size,
print_logs=True)
db = get_pa_loaded_db(num_stmts, split=split, pam=pa_manager)
opa_inp_stmts = _get_opa_input_stmts(db)
Expand Down Expand Up @@ -638,9 +638,9 @@ def test_db_incremental_preassembly_small():

# @attr('nonpublic', 'slow')
# def test_db_incremental_preassembly_very_large():
# _check_db_pa_supplement(100000, 20000, n_proc=2)
# _check_db_pa_supplement(100000, 20000)


# @attr('nonpublic', 'slow')
# def test_db_incremental_preassembly_1M():
# _check_db_pa_supplement(1000000, 200000, n_proc=6)
# _check_db_pa_supplement(1000000, 200000)

0 comments on commit f71db9c

Please sign in to comment.