Permalink
Browse files

Turn urllib's bytes handle into a unicode handle for Python 3

  • Loading branch information...
peterjc committed Dec 6, 2011
1 parent 793ab98 commit a7f3857d012c0077760e38ea8c1914c2323057e4
Showing with 50 additions and 24 deletions.
  1. +4 −4 Bio/TogoWS/__init__.py
  2. +28 −0 Bio/_py3k.py
  3. +18 −20 Tests/test_TogoWS.py
View
@@ -33,7 +33,7 @@
import urllib
import urllib2
import time
-from Bio._py3k import _as_string
+from Bio._py3k import _binary_to_string_handle
#Caches:
_search_db_names = None
@@ -45,7 +45,7 @@
def _get_fields(url):
"""Queries a TogoWS URL for a plain text list of values (PRIVATE)."""
handle = _open(url)
- fields = _as_string(handle.read()).strip().split()
+ fields = handle.read().strip().split()
handle.close()
return fields
@@ -179,7 +179,7 @@ def search_iter(db, query, limit=None, batch=100):
while remain:
batch = min(batch, remain)
#print "%r left, asking for %r" % (remain, batch)
- ids = _as_string(search(db, query, offset, batch).read()).strip().split()
+ ids = search(db, query, offset, batch).read().strip().split()
assert len(ids)==batch, "Got %i, expected %i" % (len(ids), batch)
#print "offset %i, %s ... %s" % (offset, ids[0], ids[-1])
if ids == prev_ids:
@@ -312,7 +312,7 @@ def _open(url, post=None):
#We now trust TogoWS to have set an HTTP error code, that
#suffices for my current unit tests. Previously we would
#examine the start of the data returned back.
- return handle
+ return _binary_to_string_handle(handle)
_open.previous = 0
View
@@ -43,6 +43,30 @@ def _is_int_or_long(i):
"""
return isinstance(i, int)
+ import io
+ def _binary_to_string_handle(handle):
+ """Treat a binary (bytes) handle like a text (unicode) handle."""
+ #See also http://bugs.python.org/issue5628
+ #and http://bugs.python.org/issue13541
+ #return io.TextIOWrapper(io.BufferedReader(handle))
+ class EvilHandleHack(object):
+ def __init__(self, handle):
+ self._handle = handle
+ def read(self, length=None):
+ return _as_string(self._handle.read(length))
+ def readline(self):
+ return _as_string(self._handle.readline())
+ def __iter__(self):
+ for line in self._handle:
+ yield _as_string(line)
+ def close(self):
+ return self._handle.close()
+ def seek(self, pos):
+ return self._handle.seek(pos)
+ def tell(self):
+ return self._handle.tell(pos)
+ return EvilHandleHack(handle)
+
else:
#Python 2 code
@@ -68,3 +92,7 @@ def _is_int_or_long(i):
#will be changed to "isinstance(i, int) or isinstance(i, int)"
#but that doesn't matter.
return isinstance(i, int) or isinstance(i, long)
+
+ def _binary_to_string_handle(handle):
+ """Treat a binary handle like a text handle."""
+ return handle
View
@@ -13,8 +13,6 @@
import requires_internet
requires_internet.check()
-from Bio._py3k import _as_string
-
#We want to test these:
from Bio import TogoWS
@@ -112,15 +110,15 @@ def test_pubmed_16381885(self):
def test_pubmed_16381885_ti(self):
"""Bio.TogoWS.entry("pubmed", "16381885", field="ti")"""
handle = TogoWS.entry("pubmed", "16381885", field="ti")
- data = _as_string(handle.read()).strip()
+ data = handle.read().strip()
handle.close()
self.assertEqual(data,
'From genomics to chemical genomics: new developments in KEGG.')
def test_pubmed_16381885_title(self):
"""Bio.TogoWS.entry("pubmed", "16381885", field="title")"""
handle = TogoWS.entry("pubmed", "16381885", field="title")
- data = _as_string(handle.read()).strip()
+ data = handle.read().strip()
handle.close()
self.assertEqual(data,
'From genomics to chemical genomics: new developments in KEGG.')
@@ -129,7 +127,7 @@ def test_pubmed_16381885_au(self):
"""Bio.TogoWS.entry("pubmed", "16381885", field="au")"""
#Gives one name per line (i.e. \n separated), no dots
handle = TogoWS.entry("pubmed", "16381885", field="au")
- data = _as_string(handle.read()).strip().split("\n")
+ data = handle.read().strip().split("\n")
handle.close()
self.assertEqual(data, ['Kanehisa M', 'Goto S', 'Hattori M',
'Aoki-Kinoshita KF', 'Itoh M',
@@ -140,7 +138,7 @@ def test_pubmed_16381885_authors(self):
"""Bio.TogoWS.entry("pubmed", "16381885", field="authors")"""
#Gives names tab separated (i.e. \t separated)
handle = TogoWS.entry("pubmed", "16381885", field="authors")
- data = _as_string(handle.read()).strip().split("\t")
+ data = handle.read().strip().split("\t")
handle.close()
self.assertEqual(data, ['Kanehisa, M.', 'Goto, S.', 'Hattori, M.',
'Aoki-Kinoshita, K. F.', 'Itoh, M.',
@@ -188,7 +186,7 @@ def test_pubmed_16381885_and_19850725_authors(self):
handle = TogoWS.entry("pubmed", "16381885,19850725", field="authors")
#Little hack to remove blank lines...
#names = handle.read().replace("\n\n", "\n").strip().split("\n")
- names = _as_string(handle.read()).strip().split("\n")
+ names = handle.read().strip().split("\n")
handle.close()
self.assertEqual(2, len(names))
names1, names2 = names
@@ -210,7 +208,7 @@ def test_invalid_db(self):
def test_ddbj_genbank_length(self):
"""Bio.TogoWS.entry("genbank", "NC_000913.2", field="length")"""
handle = TogoWS.entry("genbank", "NC_000913.2", field="length")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "4639675")
@@ -227,49 +225,49 @@ def test_ddbj_genbank(self):
def test_ddbj_genbank_length(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="length")"""
handle = TogoWS.entry("ddbj", "X52960", field="length")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "248")
def test_ddbj_genbank_seq(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="seq")"""
handle = TogoWS.entry("ddbj", "X52960", field="seq")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(seguid(data), "Ktxz0HgMlhQmrKTuZpOxPZJ6zGU")
def test_ddbj_genbank_definition(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="definition")"""
handle = TogoWS.entry("ddbj", "X52960", field="definition")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "Coleus blumei viroid 1 (CbVd) RNA.")
def test_ddbj_genbank_accession(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="accession")"""
handle = TogoWS.entry("ddbj", "X52960", field="accession")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "X52960")
def test_ddbj_genbank_accession(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="version")"""
handle = TogoWS.entry("ddbj", "X52960", field="version")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "1")
def test_ddbj_genbank_acc_version(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="acc_version")"""
handle = TogoWS.entry("ddbj", "X52960", field="acc_version")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "X52960.1")
def test_ddbj_genbank_organism(self):
"""Bio.TogoWS.entry("ddbj", "X52960", field="organism")"""
handle = TogoWS.entry("ddbj", "X52960", field="organism")
- data = _as_string(handle.read()).strip() #ignore trailing \n
+ data = handle.read().strip() #ignore trailing \n
handle.close()
self.assertEqual(data, "Coleus blumei viroid 1")
@@ -286,36 +284,36 @@ def test_ddbj_invalid_format(self):
def test_ddbj_gff3(self):
"""Bio.TogoWS.entry("ddbj", "X52960", format="gff")"""
handle = TogoWS.entry("ddbj", "X52960", format="gff")
- data = _as_string(handle.read())
+ data = handle.read()
handle.close()
self.assert_(data.startswith("##gff-version 3\nX52960\tDDBJ\t"), data)
def test_genbank_gff3(self):
"""Bio.TogoWS.entry("nucleotide", "X52960", format="gff")"""
#Note - Using manual URL with genbank instead of nucleotide works
handle = TogoWS.entry("nucleotide", "X52960", format="gff")
- data = _as_string(handle.read())
+ data = handle.read()
handle.close()
self.assert_(data.startswith("##gff-version 3\nX52960\tGenbank\t"), data)
def test_embl_AM905444_gff3(self):
"""Bio.TogoWS.entry("embl", "AM905444", format="gff")"""
handle = TogoWS.entry("embl", "AM905444", format="gff")
- data = _as_string(handle.read())
+ data = handle.read()
handle.close()
self.assert_(data.startswith("##gff-version 3\nAM905444\tembl\t"), data)
def test_embl_AM905444_seq(self):
"""Bio.TogoWS.entry("embl", "AM905444", field="seq")"""
handle = TogoWS.entry("embl", "AM905444", field="seq")
- data = _as_string(handle.read()).strip() #ignore any trailing \n
+ data = handle.read().strip() #ignore any trailing \n
handle.close()
self.assertEqual(seguid(data), "G0HtLpwF7i4FXUaUjDUPTjok79c")
def test_embl_AM905444_definition(self):
"""Bio.TogoWS.entry("embl", "AM905444", field="definition")"""
handle = TogoWS.entry("embl", "AM905444", field="definition")
- data = _as_string(handle.read()).strip() #ignore any trailing \n
+ data = handle.read().strip() #ignore any trailing \n
handle.close()
self.assertEqual(data, "Herbaspirillum seropedicae locus tag HS193.0074 for porin")

0 comments on commit a7f3857

Please sign in to comment.