Skip to content

Commit

Permalink
Merge pull request #288 from biothings/mongodb_no_cursor_timeout_fix
Browse files Browse the repository at this point in the history
turn off some debugging logs in `doc_feeder`
  • Loading branch information
Yao Yao committed May 11, 2023
2 parents 31e50d8 + 873092c commit e6ab13c
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions biothings/utils/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from biothings.utils.backend import DocESBackend, DocMongoBackend
from biothings.utils.common import (
timesofar,
# timesofar,
dotdict,
get_compressed_outfile,
get_random_string,
Expand Down Expand Up @@ -350,7 +350,7 @@ def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None,
logger.debug("Retrieving documents from collection '%s'. start = %d, end = %d, total = %d.", collection.name, s, e, n)

cursor_index = s # the integer index in the collection that the cursor is pointing to
job_start_time = time.time()
# job_start_time = time.time()
batch_start_time = time.time()

try:
Expand All @@ -363,10 +363,10 @@ def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None,
# logger.debug("Querying '%s' from collection '%s' in session '%s'.", query, collection.name, session_uuid)
if s:
cur.skip(s)
logger.debug("Skipped %d documents from collection '%s'.", s, collection.name)
# logger.debug("Skipped %d documents from collection '%s'.", s, collection.name)
if e:
cur.limit(e - s) # specify the maximum number of documents the cursor will return
logger.debug("Limited the cursor to fetch only %d documents (%d ~ %d) from collection '%s'.", e - s, s, e, collection.name)
# logger.debug("Limited the cursor to fetch only %d documents (%d ~ %d) from collection '%s'.", e - s, s, e, collection.name)
cur.batch_size(step) # specify the number of documents the cursor returns per batch (transparent to cursor iterators)

if inbatch: # which specifies this `doc_feeder` function to return docs in batch. Not related to `cursor.batch_size()`
Expand Down Expand Up @@ -394,8 +394,8 @@ def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None,
yield doc_batch
doc_batch = []

logger.debug("Done.[%.1f%%,%s]", cursor_index * 100. / n, timesofar(batch_start_time))
logger.debug("Processing %d-%d documents...", cursor_index + 1, min(cursor_index + step, e))
# logger.debug("Done.[%.1f%%,%s]", cursor_index * 100. / n, timesofar(batch_start_time))
# logger.debug("Processing %d-%d documents...", cursor_index + 1, min(cursor_index + step, e))
if batch_callback:
batch_callback(cursor_index, time.time() - batch_start_time)
if cursor_index < e:
Expand All @@ -405,7 +405,7 @@ def doc_feeder(collection, step=1000, s=None, e=None, inbatch=False, query=None,
# Important: need to yield the last batch here
yield doc_batch

logger.debug("Finished.[total time: %s]", timesofar(job_start_time))
# logger.debug("Finished.[total time: %s]", timesofar(job_start_time))
finally:
cur.close()
logger.debug("Session '%s' to be ended.", session_uuid)
Expand Down

0 comments on commit e6ab13c

Please sign in to comment.