Skip to content

Commit

Permalink
Use urllib2 instead of urllib so we can catch HTTP errors directly wi…
Browse files Browse the repository at this point in the history
…thout having to go through an UndoHandle
  • Loading branch information
mdehoon committed Aug 3, 2010
1 parent adcfd49 commit 2c4d8b9
Showing 1 changed file with 13 additions and 64 deletions.
77 changes: 13 additions & 64 deletions Bio/Entrez/__init__.py
Expand Up @@ -47,9 +47,8 @@
_open Internally used function.
"""
import urllib, time, warnings
import urllib, urllib2, time, warnings
import os.path
from Bio import File


email = None
Expand All @@ -73,7 +72,7 @@ def epost(db, **keywds):
cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
variables = {'db' : db}
variables.update(keywds)
return _open(cgi, variables, post=True)
return _open(cgi, variables, post=False)

def efetch(db, **keywds):
"""Fetches Entrez results which are returned as a handle.
Expand Down Expand Up @@ -316,67 +315,17 @@ def _open(cgi, params={}, post=False):
E-utilities.""", UserWarning)
# Open a handle to Entrez.
options = urllib.urlencode(params, doseq=True)
if post:
#HTTP POST
handle = urllib.urlopen(cgi, data=options)
else:
#HTTP GET
cgi += "?" + options
handle = urllib.urlopen(cgi)

# Wrap the handle inside an UndoHandle.
uhandle = File.UndoHandle(handle)

# Check for errors in the first 7 lines.
# This is kind of ugly.
lines = []
for i in range(7):
lines.append(uhandle.readline())
for i in range(6, -1, -1):
uhandle.saveline(lines[i])

try:
data = ''.join(lines)
except TypeError:
#On Python 3 the lines will be bytes not unicode strings...
data = ''.join(x.decode() for x in lines)
#Note that this doesn't alter the nature of the UndoHandle this
#function will return to the caller (it will still be using bytes)

if "500 Proxy Error" in data:
# Sometimes Entrez returns a Proxy Error instead of results
raise IOError("500 Proxy Error (NCBI busy?)")
elif "502 Proxy Error" in data:
raise IOError("502 Proxy Error (NCBI busy?)")
elif "WWW Error 500 Diagnostic" in data:
raise IOError("WWW Error 500 Diagnostic (NCBI busy?)")
elif "<title>Service unavailable!</title>" in data:
#Probably later in the file it will say "Error 503"
raise IOError("Service unavailable!")
elif "<title>Bad Gateway!</title>" in data:
#Probably later in the file it will say:
# "The proxy server received an invalid
# response from an upstream server."
raise IOError("Bad Gateway!")
elif "<title>414 Request-URI Too Large</title>" in data \
or "<h1>Request-URI Too Large</h1>" in data:
raise IOError("Requested URL too long (try using EPost?)")
elif data.startswith("Error:"):
#e.g. 'Error: Your session has expired. Please repeat your search.\n'
raise IOError(data.strip())
elif data.startswith("The resource is temporarily unavailable"):
#This can occur with an invalid query_key
#Perhaps this should be a ValueError?
raise IOError("The resource is temporarily unavailable")
elif data.startswith("download dataset is empty"):
#This can occur when omit the identifier, or the WebEnv and query_key
#Perhaps this should be a ValueError?
raise IOError("download dataset is empty")
elif data[:5] == "ERROR":
# XXX Possible bug here, because I don't know whether this really
# occurs on the first line. I need to check this!
raise IOError("ERROR, possibly because id not available?")
# Should I check for 404? timeout? etc?
return uhandle
if post:
#HTTP POST
handle = urllib2.urlopen(cgi, data=options)
else:
#HTTP GET
cgi += "?" + options
handle = urllib2.urlopen(cgi)
except urllib2.HTTPError, exception:
raise exception

return handle

_open.previous = 0

1 comment on commit 2c4d8b9

@peterjc
Copy link
Member

@peterjc peterjc commented on 2c4d8b9 Aug 3, 2010

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why keep the try/except since all you do is raise the error?

[I'm not 100% sure we remove all the previously implemented tests]

Please sign in to comment.