Skip to content

Commit

Permalink
math/py-pandas: Update to 0.12.0
Browse files Browse the repository at this point in the history
- Update to 0.12.0
- Two BUILD_DEPENDS are only RUN_DEPENDS, move them
- Patch some files out of MANIFEST.in
- Cherry pick upstream issue/commit [1]
- Define regression-test target and TEST_DEPENDS
- Document USES=display in support of unit tests

[1] pandas-dev/pandas#4353

PR:		ports/180889
Submitted by:	John W. O'Brien <john@saltant.com> (maintainer)
  • Loading branch information
koobs committed Sep 20, 2013
1 parent d6c1701 commit b63a7f3
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 16 deletions.
28 changes: 24 additions & 4 deletions math/py-pandas/Makefile
@@ -1,7 +1,7 @@
# $FreeBSD$

PORTNAME= pandas
PORTVERSION= 0.11.0
PORTVERSION= 0.12.0
CATEGORIES= math devel python
MASTER_SITES= CHEESESHOP
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
Expand All @@ -11,18 +11,38 @@ COMMENT= Flexible, high-performance data analysis in Python

LICENSE= BSD

BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}dateutil>0:${PORTSDIR}/devel/py-dateutil \
${PYNUMPY} \
${PYTHON_PKGNAMEPREFIX}pytz>0:${PORTSDIR}/devel/py-pytz
BUILD_DEPENDS= ${PYNUMPY}
RUN_DEPENDS:= ${BUILD_DEPENDS} \
${PYTHON_PKGNAMEPREFIX}dateutil>0:${PORTSDIR}/devel/py-dateutil \
${PYTHON_PKGNAMEPREFIX}pytz>0:${PORTSDIR}/devel/py-pytz \
${PYTHON_PKGNAMEPREFIX}sqlite3>0:${PORTSDIR}/databases/py-sqlite3
TEST_DEPENDS= nosetests:${PORTSDIR}/devel/py-nose \
${PYTHON_PKGNAMEPREFIX}numexpr>0:${PORTSDIR}/math/py-numexpr \
${PYTHON_PKGNAMEPREFIX}tables>0:${PORTSDIR}/devel/py-tables \
${PYTHON_PKGNAMEPREFIX}scipy>0:${PORTSDIR}/science/py-scipy \
${PYTHON_PKGNAMEPREFIX}matplotlib>0:${PORTSDIR}/math/py-matplotlib \
${PYTHON_PKGNAMEPREFIX}xlrd>0:${PORTSDIR}/textproc/py-xlrd \
${PYTHON_PKGNAMEPREFIX}xlwt>0:${PORTSDIR}/textproc/py-xlwt \
${PYTHON_PKGNAMEPREFIX}beautifulsoup>0:${PORTSDIR}/www/py-beautifulsoup \
${PYTHON_PKGNAMEPREFIX}html5lib>0:${PORTSDIR}/www/py-html5lib \
${PYTHON_PKGNAMEPREFIX}lxml>0:${PORTSDIR}/devel/py-lxml

SUB_FILES= pkg-message

USE_PYTHON= yes
USE_PYDISTUTILS= easy_install
PYEASYINSTALL_ARCHDEP= yes

# Uncomment the following line to enable regression-test
# on a headless (X11-less) host (eg: RedPorts)
#USES= display:regression-test

regression-test: build
@(cd ${WRKSRC} && \
${PYTHON_CMD} ${PYSETUP} build_ext --inplace && \
${PYTHON_CMD} ${PYSETUP} nosetests \
)

post-install:
@${CAT} ${PKGMESSAGE}

Expand Down
4 changes: 2 additions & 2 deletions math/py-pandas/distinfo
@@ -1,2 +1,2 @@
SHA256 (pandas-0.11.0.tar.gz) = 4d74d4d408494ccc690f6e61d11cb266bb55b142d22bf9b8a7063206073a90d7
SIZE (pandas-0.11.0.tar.gz) = 2783590
SHA256 (pandas-0.12.0.tar.gz) = be9b4c3611801b366873531bc87a87dd16b19e7d78fa84c21898cd007931c86d
SIZE (pandas-0.12.0.tar.gz) = 3155059
12 changes: 12 additions & 0 deletions math/py-pandas/files/patch-MANIFEST.in
@@ -0,0 +1,12 @@
--- ./MANIFEST.in.orig 2013-07-27 06:55:55.000000000 -0400
+++ ./MANIFEST.in 2013-07-27 06:56:02.000000000 -0400
@@ -2,9 +2,7 @@
include LICENSE
include RELEASE.md
include README.rst
-include TODO.rst
include setup.py
-include setupegg.py

graft doc
prune doc/build
162 changes: 162 additions & 0 deletions math/py-pandas/files/patch-pandas-issue4353-pullrequest4356
@@ -0,0 +1,162 @@
This patch is adapted from the following upstream commit.

[PATCH] TST/BUG/CLN: make stata IO tests use temporary files for writing
c98e09951f74bf3445f8144363afaa769be69940

--- pandas/io/tests/test_stata.py.orig
+++ pandas/io/tests/test_stata.py
@@ -10,9 +10,8 @@ import numpy as np

from pandas.core.frame import DataFrame, Series
from pandas.io.parsers import read_csv
-from pandas.io.stata import read_stata, StataReader, StataWriter
+from pandas.io.stata import read_stata, StataReader
import pandas.util.testing as tm
-from pandas.util.testing import ensure_clean
from pandas.util.misc import is_little_endian


@@ -27,15 +26,12 @@ class StataTests(unittest.TestCase):
self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
- self.dta5 = os.path.join(self.dirpath, 'stata5.dta')
- self.dta6 = os.path.join(self.dirpath, 'stata6.dta')
self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
- self.dta10 = os.path.join(self.dirpath, 'stata10.dta')

def read_dta(self, file):
return read_stata(file, convert_dates=True)
@@ -46,9 +42,11 @@ class StataTests(unittest.TestCase):
def test_read_dta1(self):
reader = StataReader(self.dta1)
parsed = reader.data()
- # Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
+ # Pandas uses np.nan as missing value.
+ # Thus, all columns will be of type float, regardless of their name.
expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
- columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
+ columns=['float_miss', 'double_miss', 'byte_miss',
+ 'int_miss', 'long_miss'])

for i, col in enumerate(parsed.columns):
np.testing.assert_almost_equal(
@@ -90,7 +88,9 @@ class StataTests(unittest.TestCase):
np.datetime64('NaT')
)
],
- columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date']
+ columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
+ 'monthly_date', 'quarterly_date', 'half_yearly_date',
+ 'yearly_date']
)

with warnings.catch_warnings(record=True) as w:
@@ -125,34 +125,40 @@ class StataTests(unittest.TestCase):
["nine", "two", 9, np.nan, "nine"],
["ten", "one", "ten", np.nan, "ten"]
],
- columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled'])
+ columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
+ 'labeled_with_missings', 'float_labelled'])

tm.assert_frame_equal(parsed, expected)

- def test_write_dta5(self):
+ def test_read_write_dta5(self):
if not is_little_endian():
- raise nose.SkipTest("known failure of test_write_dta5 on non-little endian")
+ raise nose.SkipTest("known failure of test_write_dta5 on "
+ "non-little endian")

original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
- columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
+ columns=['float_miss', 'double_miss', 'byte_miss',
+ 'int_miss', 'long_miss'])
original.index.name = 'index'

- with ensure_clean(self.dta5) as path:
+ with tm.ensure_clean() as path:
original.to_stata(path, None, False)
written_and_read_again = self.read_dta(path)
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
+ original)

def test_write_dta6(self):
if not is_little_endian():
- raise nose.SkipTest("known failure of test_write_dta6 on non-little endian")
+ raise nose.SkipTest("known failure of test_write_dta6 on "
+ "non-little endian")

original = self.read_csv(self.csv3)
original.index.name = 'index'

- with ensure_clean(self.dta6) as path:
+ with tm.ensure_clean() as path:
original.to_stata(path, None, False)
written_and_read_again = self.read_dta(path)
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
+ original)

@nose.tools.nottest
def test_read_dta7(self):
@@ -190,29 +196,30 @@ class StataTests(unittest.TestCase):
decimal=3
)

- def test_read_dta10(self):
+ def test_read_write_dta10(self):
if not is_little_endian():
- raise nose.SkipTest("known failure of test_write_dta10 on non-little endian")
+ raise nose.SkipTest("known failure of test_write_dta10 on "
+ "non-little endian")

- original = DataFrame(
- data=
- [
- ["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
- ],
- columns=['string', 'object', 'integer', 'float', 'datetime'])
+ original = DataFrame(data=[["string", "object", 1, 1.1,
+ np.datetime64('2003-12-25')]],
+ columns=['string', 'object', 'integer', 'float',
+ 'datetime'])
original["object"] = Series(original["object"], dtype=object)
original.index.name = 'index'

- with ensure_clean(self.dta10) as path:
+ with tm.ensure_clean() as path:
original.to_stata(path, {'datetime': 'tc'}, False)
written_and_read_again = self.read_dta(path)
- tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
+ tm.assert_frame_equal(written_and_read_again.set_index('index'),
+ original)

def test_stata_doc_examples(self):
- with ensure_clean(self.dta5) as path:
+ with tm.ensure_clean() as path:
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
df.to_stata(path)

+
if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
--- pandas/util/testing.py.orig
+++ pandas/util/testing.py
@@ -86,7 +86,7 @@ def set_trace():
#------------------------------------------------------------------------------
# contextmanager to ensure the file cleanup
@contextmanager
-def ensure_clean(filename = None):
+def ensure_clean(filename=None):
# if we are not passed a filename, generate a temporary
if filename is None:
filename = tempfile.mkstemp()[1]
14 changes: 14 additions & 0 deletions math/py-pandas/files/pkg-message.in
Expand Up @@ -9,6 +9,20 @@ matplotlib math/py-matplotlib
xlrd textproc/py-xlrd
xlwt textproc/py-xlwt

To use the HTML table parsing capabilities that are new to 0.12.0, you
will need to install

beautifulsoup www/py-beautifulsoup

and one or the other or both of

html5lib www/py-html5lib
lxml devel/py-lxml

Please read the pandas documentation before deciding:

http://pandas.pydata.org/pandas-docs/stable/gotchas.html#html-gotchas

Also recommended, but not yet available in ports are:

bottleneck http://berkeleyanalytics.com/bottleneck/
Expand Down

0 comments on commit b63a7f3

Please sign in to comment.