Skip to content

Commit 995ec93

Browse files
committed
Fixes for universal newline support by genfromtxt (where supported)
Changed genfromtxt's openers to text mode in Python3.3+ to use its automatic newline recognition; added LZMA compression ('.xz').
1 parent 2aafae5 commit 995ec93

File tree

3 files changed

+98
-11
lines changed

3 files changed

+98
-11
lines changed

numpy/lib/_datasource.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ class _FileOpeners(object):
5353
`_FileOpeners` contains a dictionary that holds one method for each
5454
supported file format. Attribute lookup is implemented in such a way that
5555
an instance of `_FileOpeners` itself can be indexed with the keys of that
56-
dictionary. Currently uncompressed files as well as files
57-
compressed with ``gzip`` or ``bz2`` compression are supported.
56+
dictionary. Currently uncompressed files as well as files compressed with
57+
``gzip``, ``bz2`` or (from Python3.3 on) ``xz`` compression are supported.
5858
5959
Notes
6060
-----
@@ -64,7 +64,7 @@ class _FileOpeners(object):
6464
Examples
6565
--------
6666
>>> np.lib._datasource._file_openers.keys()
67-
[None, '.bz2', '.gz']
67+
[None, '.bz2', '.xz', '.gz']
6868
>>> np.lib._datasource._file_openers['.gz'] is gzip.open
6969
True
7070
@@ -75,9 +75,17 @@ def __init__(self):
7575
def _load(self):
7676
if self._loaded:
7777
return
78+
try:
79+
import lzma
80+
self._file_openers[".xz"] = lzma.open
81+
except ImportError:
82+
pass
7883
try:
7984
import bz2
80-
self._file_openers[".bz2"] = bz2.BZ2File
85+
if sys.version_info[:2] < (3,3):
86+
self._file_openers[".bz2"] = bz2.BZ2File
87+
else:
88+
self._file_openers[".bz2"] = bz2.open
8189
except ImportError:
8290
pass
8391
try:

numpy/lib/npyio.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
11911191
----------
11921192
fname : file or str
11931193
File, filename, or generator to read. If the filename extension is
1194-
`.gz` or `.bz2`, the file is first decompressed. Note that
1194+
`.gz`, `.bz2` or `.xz`, the file is first decompressed. Note that
11951195
generators must return byte strings in Python 3k.
11961196
dtype : dtype, optional
11971197
Data type of the resulting array.
@@ -1354,9 +1354,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
13541354
try:
13551355
if isinstance(fname, basestring):
13561356
if sys.version_info[0] == 2:
1357-
fhd = iter(np.lib._datasource.open(fname, 'rbU'))
1357+
fhd = iter(np.lib._datasource.open(fname, 'rU'))
1358+
elif sys.version_info[1] < 3:
1359+
fhd = iter(np.lib._datasource.open(fname, 'r'))
13581360
else:
1359-
fhd = iter(np.lib._datasource.open(fname, 'rb'))
1361+
fhd = iter(np.lib._datasource.open(fname, 'rt'))
13601362
own_fhd = True
13611363
else:
13621364
fhd = iter(fname)
@@ -1391,7 +1393,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
13911393
if names is True:
13921394
if comments in first_line:
13931395
first_line = asbytes('').join(first_line.split(comments)[1:])
1394-
first_values = split_line(first_line)
1396+
first_values = split_line(asbytes(first_line))
13951397
except StopIteration:
13961398
# return an empty array if the datafile is empty
13971399
first_line = asbytes('')
@@ -1611,7 +1613,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
16111613

16121614
# Parse each line
16131615
for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
1614-
values = split_line(line)
1616+
values = split_line(asbytes(line))
16151617
nbvalues = len(values)
16161618
# Skip an empty line
16171619
if nbvalues == 0:

numpy/lib/tests/test_io.py

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import sys
44
import gzip
5+
import bz2
6+
if sys.version_info[:2] > (3,2):
7+
import lzma
58
import os
69
import threading
710
import shutil
@@ -1608,8 +1611,54 @@ def test_recfromcsv(self):
16081611
def test_gft_using_filename(self):
16091612
# Test that we can load data from a filename as well as a file object
16101613
wanted = np.arange(6).reshape((2, 3))
1611-
if sys.version_info[0] >= 3:
1612-
# python 3k is known to fail for '\r'
1614+
linesep = ('\n', '\r\n', '\r')
1615+
1616+
for sep in linesep:
1617+
data = '0 1 2' + sep + '3 4 5'
1618+
f, name = mkstemp()
1619+
# We can't use NamedTemporaryFile on windows, because we cannot
1620+
# reopen the file.
1621+
try:
1622+
os.write(f, asbytes(data))
1623+
assert_array_equal(np.genfromtxt(name), wanted)
1624+
finally:
1625+
os.close(f)
1626+
os.unlink(name)
1627+
1628+
def test_gft_from_gzip(self):
1629+
# Test that we can load data from a gzipped file
1630+
wanted = np.arange(6).reshape((2, 3))
1631+
if sys.version_info[:2] < (3,3):
1632+
# universal newline conversion does not work for unknown reasons
1633+
# in 2.7, not supported (mode='rt') in 3.2
1634+
linesep = ('\n', '\r\n')
1635+
else:
1636+
linesep = ('\n', '\r\n', '\r')
1637+
1638+
for sep in linesep:
1639+
data = '0 1 2' + sep + '3 4 5'
1640+
s = BytesIO()
1641+
g = gzip.GzipFile(fileobj=s, mode='w')
1642+
g.write(asbytes(data))
1643+
g.close()
1644+
s.seek(0)
1645+
1646+
f, name = mkstemp(suffix='.gz')
1647+
# We can't use NamedTemporaryFile on windows, because we cannot
1648+
# reopen the file.
1649+
try:
1650+
os.write(f, s.read())
1651+
s.close()
1652+
assert_array_equal(np.genfromtxt(name), wanted)
1653+
finally:
1654+
os.close(f)
1655+
os.unlink(name)
1656+
1657+
def test_gft_from_bzip2(self):
1658+
# Test that we can load data from a bzip2 file
1659+
wanted = np.arange(6).reshape((2, 3))
1660+
if sys.version_info[0] == 3 and sys.version_info[1] < 3:
1661+
# universal newline conversion not supported (mode='rt') in 3.2
16131662
linesep = ('\n', '\r\n')
16141663
else:
16151664
linesep = ('\n', '\r\n', '\r')
@@ -1621,6 +1670,34 @@ def test_gft_using_filename(self):
16211670
# reopen the file.
16221671
try:
16231672
os.write(f, asbytes(data))
1673+
os.system('bzip2 %s' % name)
1674+
assert_array_equal(np.genfromtxt(name+'.bz2'), wanted)
1675+
finally:
1676+
os.system('bzip2 -d %s.bz2' % name)
1677+
os.close(f)
1678+
os.unlink(name)
1679+
1680+
@np.testing.dec.knownfailureif(sys.version_info[:2] < (3,3),
1681+
"LZMA not supported in Python<3.3")
1682+
def test_gft_from_xz(self):
1683+
# Test that we can load data from an xz file
1684+
wanted = np.arange(6).reshape((2, 3))
1685+
linesep = ('\n', '\r\n', '\r')
1686+
1687+
for sep in linesep:
1688+
data = '0 1 2' + sep + '3 4 5'
1689+
s = BytesIO()
1690+
g = lzma.LZMAFile(s, mode='w')
1691+
g.write(asbytes(data))
1692+
g.close()
1693+
s.seek(0)
1694+
1695+
f, name = mkstemp(suffix='.xz')
1696+
# We can't use NamedTemporaryFile on windows, because we cannot
1697+
# reopen the file.
1698+
try:
1699+
os.write(f, s.read())
1700+
s.close()
16241701
assert_array_equal(np.genfromtxt(name), wanted)
16251702
finally:
16261703
os.close(f)

0 commit comments

Comments
 (0)