Skip to content

Commit

Permalink
Add option to use API key for NCBI requests
Browse files Browse the repository at this point in the history
  • Loading branch information
haydenm committed Jan 29, 2020
1 parent cd0bfa2 commit dfc35b1
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
8 changes: 8 additions & 0 deletions bin/design.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
def main(args):
logger = logging.getLogger(__name__)

# Set NCBI API key
if args.ncbi_api_key:
ncbi_neighbors.ncbi_api_key = args.ncbi_api_key

# Read the genomes from FASTA sequences
genomes_grouped = []
genomes_grouped_names = []
Expand Down Expand Up @@ -817,6 +821,10 @@ def check_max_num_processes(val):
"this may result in substantial memory usage; but it may provide "
"an improvement in runtime when there are relatively few "
"candidate probes and a very large blacklisted input"))
parser.add_argument('--ncbi-api-key',
help=("API key to use for NCBI e-utils. Using this increases the "
"limit on requests/second and may prevent an IP address "
"from being block due to too many requests"))

# Log levels and version
parser.add_argument('--debug',
Expand Down
34 changes: 25 additions & 9 deletions catch/utils/ncbi_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
logger = logging.getLogger(__name__)


# Global variable for API key
ncbi_api_key = None


def urlopen_with_tries(url, initial_wait=5, rand_wait_range=(1, 60),
max_num_tries=5):
"""
Expand Down Expand Up @@ -48,20 +52,22 @@ def urlopen_with_tries(url, initial_wait=5, rand_wait_range=(1, 60),
logger.debug(("Making request to open url: %s"), url)
r = urllib.request.urlopen(url)
return r
except urllib.error.HTTPError:
except urllib.error.HTTPError as e:
if num_tries == max_num_tries:
# This was the last allowed try
logger.critical(("Encountered HTTPError %d times (the maximum "
"allowed) when opening url: %s"), num_tries, url)
"allowed) when opening url: %s; error: %s"), num_tries,
url, e)
raise
else:
# Pause for a bit and retry
wait = initial_wait * 2**(num_tries - 1)
rand_wait = random.randint(*rand_wait_range)
total_wait = wait + rand_wait
logger.info(("Encountered HTTPError when opening url; "
"sleeping for %d seconds, and then trying again"),
total_wait)
"sleeping for %d seconds, and then trying again "
"(try %d of %d); error: %s"),
total_wait, num_tries, max_num_tries, e)
time.sleep(total_wait)
except:
logger.critical(("Encountered unexpected error while opening "
Expand All @@ -78,8 +84,11 @@ def ncbi_neighbors_url(taxid):
Returns:
str representing download URL
"""
params = urllib.parse.urlencode({'taxid': taxid, 'cmd': 'download2'})
url = 'https://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?%s' % params
params = {'taxid': taxid, 'cmd': 'download2'}
if ncbi_api_key is not None:
params['api_key'] = ncbi_api_key
params_url = urllib.parse.urlencode(params)
url = 'https://www.ncbi.nlm.nih.gov/genomes/GenomesGroup.cgi?%s' % params_url
return url


Expand Down Expand Up @@ -149,10 +158,13 @@ def ncbi_fasta_download_url(accessions):
str representing download URL
"""
ids = ','.join(accessions)
params = {'id': ids, 'db': 'nuccore', 'rettype': 'fasta',
'retmode': 'text'}
if ncbi_api_key is not None:
params['api_key'] = ncbi_api_key
# Use safe=',' to not encode ',' as '%2'
params = urllib.parse.urlencode({'id': ids, 'db': 'nuccore',
'rettype': 'fasta', 'retmode': 'text'}, safe=',')
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?%s' % params
params_url = urllib.parse.urlencode(params, safe=',')
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?%s' % params_url
return url


Expand All @@ -174,6 +186,10 @@ def fetch_fastas(accessions, batch_size=100, reqs_per_sec=2):
"""
logger.debug(("Fetching fasta files for %d accessions") % len(accessions))

if ncbi_api_key is not None:
# Using an API keys allows more requests per second (up to 10)
reqs_per_sec = 7

# Make temp file
fp = tempfile.NamedTemporaryFile()

Expand Down

0 comments on commit dfc35b1

Please sign in to comment.