Skip to content

Commit

Permalink
petlx.array done
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Jan 14, 2015
1 parent b211c91 commit 7910624
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 148 deletions.
14 changes: 9 additions & 5 deletions TODO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@ DONE update version identifier scheme

DONE update requirements to need petl >= 1.0

TODO remove ipython package, update documentation
DONE remove ipython package, update documentation

TODO migrate push package
DONE migrate push package

TODO create remote branch v1.0 and sync
DONE create remote branch v1.0 and sync

TODO try installing dependencies under py34
TODO try installing dependencies under py27 and py34

TODO setup for testing under py26, py27, py34
TODO setup tox for testing under py27 and py34

TODO rewrite interval module using intervaltree

TODO rewrite xls module to remove dependency on xlutils

TODO change all docstring examples to use new import style

Expand Down
7 changes: 2 additions & 5 deletions petlx/all.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
__author__ = 'Alistair Miles <alimanfoo@googlemail.com>'


"""
Meta-module to import all petlx modules and thereby cause them to be integrated
with petl.fluent and petl.interactive.
Meta-module to import all petlx modules and thereby cause them to be activated.
"""


Expand Down
172 changes: 72 additions & 100 deletions petlx/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,21 @@
"""


import sys
from petl.util import columns, iterpeek, RowContainer, ValuesContainer
from petlx.util import UnsatisfiedDependency


dep_message = """
The package numpy is required. Instructions for installation can be found
at http://docs.scipy.org/doc/numpy/user/install.html or try apt-get install
python-numpy.
"""
import petl as etl
from petl.compat import next, string_types
from petl.util.base import iterpeek, ValuesView, Table


def guessdtype(table):
try:
import numpy as np
except ImportError as e:
raise UnsatisfiedDependency(e, dep_message)
else:
# get numpy to infer dtype
it = iter(table)
fields = it.next()
rows = tuple(it)
dtype = np.rec.array(rows).dtype
dtype.names = fields
return dtype
import numpy as np
# get numpy to infer dtype
it = iter(table)
hdr = next(it)
flds = list(map(str, hdr))
rows = tuple(it)
dtype = np.rec.array(rows).dtype
dtype.names = flds
return dtype


def toarray(table, dtype=None, count=-1, sample=1000):
Expand Down Expand Up @@ -89,80 +79,69 @@ def toarray(table, dtype=None, count=-1, sample=1000):
"""

try:
import numpy as np
except ImportError as e:
raise UnsatisfiedDependency(e, dep_message)
import numpy as np
it = iter(table)
peek, it = iterpeek(it, sample)
hdr = next(it)
flds = list(map(str, hdr))

if dtype is None:
dtype = guessdtype(peek)

elif isinstance(dtype, string_types):
# insert field names from source table
typestrings = [s.strip() for s in dtype.split(',')]
dtype = [(f, t) for f, t in zip(flds, typestrings)]

elif (isinstance(dtype, dict)
and ('names' not in dtype or 'formats' not in dtype)):
# allow for partial specification of dtype
cols = etl.columns(peek)
newdtype = {'names': [], 'formats': []}
for f in flds:
newdtype['names'].append(f)
if f in dtype and isinstance(dtype[f], tuple):
# assume fully specified
newdtype['formats'].append(dtype[f][0])
elif f not in dtype:
# not specified at all
a = np.array(cols[f])
newdtype['formats'].append(a.dtype)
else:
# assume directly specified, just need to add offset
newdtype['formats'].append(dtype[f])
dtype = newdtype

else:
pass # leave dtype as-is

it = iter(table)
peek, it = iterpeek(it, sample)
fields = it.next()

if dtype is None:
dtype = guessdtype(peek)

elif isinstance(dtype, basestring):
# insert field names from source table
typestrings = [s.strip() for s in dtype.split(',')]
dtype = [(f, t) for f, t in zip(fields, typestrings)]

elif (isinstance(dtype, dict)
and ('names' not in dtype or 'formats' not in dtype)):
# allow for partial specification of dtype
cols = columns(peek)
newdtype = {'names': [], 'formats': []}
for f in fields:
newdtype['names'].append(f)
if f in dtype and isinstance(dtype[f], tuple):
# assume fully specified
newdtype['formats'].append(dtype[f][0])
elif f not in dtype:
# not specified at all
a = np.array(cols[f])
newdtype['formats'].append(a.dtype)
else:
# assume directly specified, just need to add offset
newdtype['formats'].append(dtype[f])
dtype = newdtype

else:
pass # leave dtype as-is

# numpy is fussy about having tuples, need to make sure
it = (tuple(row) for row in it)
sa = np.fromiter(it, dtype=dtype, count=count)

return sa
# numpy is fussy about having tuples, need to make sure
it = (tuple(row) for row in it)
sa = np.fromiter(it, dtype=dtype, count=count)

return sa


def torecarray(*args, **kwargs):
"""
Convenient shorthand for ``toarray(...).view(np.recarray)``.
.. versionadded:: 0.5.1
"""
try:
import numpy as np
except ImportError as e:
raise UnsatisfiedDependency(e, dep_message)
else:
return toarray(*args, **kwargs).view(np.recarray)

import numpy as np
return toarray(*args, **kwargs).view(np.recarray)


def fromarray(a):
"""
Extract a table from a numpy structured array.
.. versionadded:: 0.4
"""

return ArrayContainer(a)
return ArrayView(a)


class ArrayContainer(RowContainer):
class ArrayView(Table):

def __init__(self, a):
self.a = a
Expand All @@ -174,26 +153,19 @@ def __iter__(self):


def valuestoarray(vals, dtype=None, count=-1, sample=1000):

try:
import numpy as np
except ImportError as e:
raise UnsatisfiedDependency(e, dep_message)
else:

it = iter(vals)

if dtype is None:
peek, it = iterpeek(it, sample)
dtype = np.array(peek).dtype

a = np.fromiter(it, dtype=dtype, count=count)
return a


ValuesContainer.array = valuestoarray


from petlx.integration import integrate
integrate(sys.modules[__name__])

import numpy as np
it = iter(vals)
if dtype is None:
peek, it = iterpeek(it, sample)
dtype = np.array(peek).dtype
a = np.fromiter(it, dtype=dtype, count=count)
return a


# integrate extension with petl
etl.toarray = toarray
Table.toarray = toarray
etl.torecarray = torecarray
Table.torecarray = torecarray
etl.fromarray = fromarray
ValuesView.array = valuestoarray
36 changes: 6 additions & 30 deletions petlx/test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
"""

import math

import numpy as np

from nose.tools import eq_
from petl.testutils import ieq

import petl as etl
from petl.test.helpers import ieq, eq_
from petlx.array import toarray, fromarray, torecarray
from petlx.testutils import assertclose
import petl.fluent as etl


def test_toarray_nodtype():
Expand Down Expand Up @@ -90,7 +89,7 @@ def test_toarray_stringdtype():
('oranges', 3, 4.4),
('pears', 7, .1)]

a = toarray(t, dtype='a4, i2, f4')
a = toarray(t, dtype='U4, i2, f4')
assert isinstance(a, np.ndarray)
assert isinstance(a['foo'], np.ndarray)
assert isinstance(a['bar'], np.ndarray)
Expand All @@ -113,7 +112,7 @@ def test_toarray_dictdtype():
('oranges', 3, 4.4),
('pears', 7, .1)]

a = toarray(t, dtype={'foo': 'a4'}) # specify partial dtype
a = toarray(t, dtype={'foo': 'U4'}) # specify partial dtype
assert isinstance(a, np.ndarray)
assert isinstance(a['foo'], np.ndarray)
assert isinstance(a['bar'], np.ndarray)
Expand All @@ -136,7 +135,7 @@ def test_toarray_explicitdtype():
('oranges', 3, 4.4),
('pears', 7, .1)]

a = toarray(t, dtype=[('A', 'a4'), ('B', 'i2'), ('C', 'f4')])
a = toarray(t, dtype=[('A', 'U4'), ('B', 'i2'), ('C', 'f4')])
assert isinstance(a, np.ndarray)
assert isinstance(a['A'], np.ndarray)
assert isinstance(a['B'], np.ndarray)
Expand All @@ -152,29 +151,6 @@ def test_toarray_explicitdtype():
assertclose(.1, a['C'][2])


def test_toarray_lists():

t = [['foo', 'bar', 'baz'],
['apples', 1, 2.5],
['oranges', 3, 4.4],
['pears', 7, .1]]

a = toarray(t)
assert isinstance(a, np.ndarray)
assert isinstance(a['foo'], np.ndarray)
assert isinstance(a['bar'], np.ndarray)
assert isinstance(a['baz'], np.ndarray)
eq_('apples', a['foo'][0])
eq_('oranges', a['foo'][1])
eq_('pears', a['foo'][2])
eq_(1, a['bar'][0])
eq_(3, a['bar'][1])
eq_(7, a['bar'][2])
assert math.fabs(2.5 - a['baz'][0]) < 0.001
assert math.fabs(4.4 - a['baz'][1]) < 0.001
assert math.fabs(.1 - a['baz'][2]) < 0.001


def test_fromarray():
t = [('foo', 'bar', 'baz'),
('apples', 1, 2.5),
Expand Down
2 changes: 1 addition & 1 deletion rtfd_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
petl>=0.25.1
petl>=1.0
numpydoc==0.5
13 changes: 6 additions & 7 deletions test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
nose
petl>=0.25.1
#petl>=1.0
Cython
ipython
numpy
numexpr
tables
openpyxl
xlrd
xlutils
xlwt
pysam
bx-python
xlwt-future
intervaltree
numpydoc
psycopg2
mysql-python
pymysql
sqlalchemy
whoosh

pandas
tox

0 comments on commit 7910624

Please sign in to comment.