math/py-pandas: Update to 0.12.0

- Update to 0.12.0 - Two BUILD_DEPENDS are only RUN_DEPENDS, move them - Patch some files out of MANIFEST.in - Cherry pick upstream issue/commit [1] - Define regression-test target and TEST_DEPENDS - Document USES=display in support of unit tests [1] pandas-dev/pandas#4353 PR: ports/180889 Submitted by: John W. O'Brien <john@saltant.com> (maintainer)
truenas · Sep 20, 2013 · b63a7f3 · b63a7f3
1 parent d6c1701
commit b63a7f3
Show file tree

Hide file tree

Showing 6 changed files with 288 additions and 16 deletions.
diff --git a/math/py-pandas/Makefile b/math/py-pandas/Makefile
@@ -1,7 +1,7 @@
 # $FreeBSD$
 
 PORTNAME=	pandas
-PORTVERSION=	0.11.0
+PORTVERSION=	0.12.0
 CATEGORIES=	math devel python
 MASTER_SITES=	CHEESESHOP
 PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
@@ -11,18 +11,38 @@ COMMENT=	Flexible, high-performance data analysis in Python
 
 LICENSE=	BSD
 
-BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}dateutil>0:${PORTSDIR}/devel/py-dateutil \
-		${PYNUMPY} \
-		${PYTHON_PKGNAMEPREFIX}pytz>0:${PORTSDIR}/devel/py-pytz
+BUILD_DEPENDS=	${PYNUMPY}
 RUN_DEPENDS:=	${BUILD_DEPENDS} \
+		${PYTHON_PKGNAMEPREFIX}dateutil>0:${PORTSDIR}/devel/py-dateutil \
+		${PYTHON_PKGNAMEPREFIX}pytz>0:${PORTSDIR}/devel/py-pytz \
 		${PYTHON_PKGNAMEPREFIX}sqlite3>0:${PORTSDIR}/databases/py-sqlite3
+TEST_DEPENDS=	nosetests:${PORTSDIR}/devel/py-nose \
+		${PYTHON_PKGNAMEPREFIX}numexpr>0:${PORTSDIR}/math/py-numexpr \
+		${PYTHON_PKGNAMEPREFIX}tables>0:${PORTSDIR}/devel/py-tables \
+		${PYTHON_PKGNAMEPREFIX}scipy>0:${PORTSDIR}/science/py-scipy \
+		${PYTHON_PKGNAMEPREFIX}matplotlib>0:${PORTSDIR}/math/py-matplotlib \
+		${PYTHON_PKGNAMEPREFIX}xlrd>0:${PORTSDIR}/textproc/py-xlrd \
+		${PYTHON_PKGNAMEPREFIX}xlwt>0:${PORTSDIR}/textproc/py-xlwt \
+		${PYTHON_PKGNAMEPREFIX}beautifulsoup>0:${PORTSDIR}/www/py-beautifulsoup \
+		${PYTHON_PKGNAMEPREFIX}html5lib>0:${PORTSDIR}/www/py-html5lib \
+		${PYTHON_PKGNAMEPREFIX}lxml>0:${PORTSDIR}/devel/py-lxml
 
 SUB_FILES=	pkg-message
 
 USE_PYTHON=		yes
 USE_PYDISTUTILS=	easy_install
 PYEASYINSTALL_ARCHDEP=	yes
 
+# Uncomment the following line to enable regression-test
+# on a headless (X11-less) host (eg: RedPorts)
+#USES=		display:regression-test
+
+regression-test: build
+	@(cd ${WRKSRC} && \
+		${PYTHON_CMD} ${PYSETUP} build_ext --inplace && \
+		${PYTHON_CMD} ${PYSETUP} nosetests \
+	)
+
 post-install:
 	@${CAT} ${PKGMESSAGE}
 

diff --git a/math/py-pandas/distinfo b/math/py-pandas/distinfo
@@ -1,2 +1,2 @@
-SHA256 (pandas-0.11.0.tar.gz) = 4d74d4d408494ccc690f6e61d11cb266bb55b142d22bf9b8a7063206073a90d7
-SIZE (pandas-0.11.0.tar.gz) = 2783590
+SHA256 (pandas-0.12.0.tar.gz) = be9b4c3611801b366873531bc87a87dd16b19e7d78fa84c21898cd007931c86d
+SIZE (pandas-0.12.0.tar.gz) = 3155059
diff --git a/math/py-pandas/files/patch-MANIFEST.in b/math/py-pandas/files/patch-MANIFEST.in
@@ -0,0 +1,12 @@
+--- ./MANIFEST.in.orig	2013-07-27 06:55:55.000000000 -0400
++++ ./MANIFEST.in	2013-07-27 06:56:02.000000000 -0400
+@@ -2,9 +2,7 @@
+ include LICENSE
+ include RELEASE.md
+ include README.rst
+-include TODO.rst
+ include setup.py
+-include setupegg.py
+
+ graft doc
+ prune doc/build
diff --git a/math/py-pandas/files/patch-pandas-issue4353-pullrequest4356 b/math/py-pandas/files/patch-pandas-issue4353-pullrequest4356
@@ -0,0 +1,162 @@
+This patch is adapted from the following upstream commit.
+
+[PATCH] TST/BUG/CLN: make stata IO tests use temporary files for writing
+c98e09951f74bf3445f8144363afaa769be69940
+
+--- pandas/io/tests/test_stata.py.orig
++++ pandas/io/tests/test_stata.py
+@@ -10,9 +10,8 @@ import numpy as np
+
+ from pandas.core.frame import DataFrame, Series
+ from pandas.io.parsers import read_csv
+-from pandas.io.stata import read_stata, StataReader, StataWriter
++from pandas.io.stata import read_stata, StataReader
+ import pandas.util.testing as tm
+-from pandas.util.testing import ensure_clean
+ from pandas.util.misc import is_little_endian
+
+
+@@ -27,15 +26,12 @@ class StataTests(unittest.TestCase):
+         self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
+         self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
+         self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
+-        self.dta5 = os.path.join(self.dirpath, 'stata5.dta')
+-        self.dta6 = os.path.join(self.dirpath, 'stata6.dta')
+         self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
+         self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
+         self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
+         self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
+         self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
+         self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
+-        self.dta10 = os.path.join(self.dirpath, 'stata10.dta')
+
+     def read_dta(self, file):
+         return read_stata(file, convert_dates=True)
+@@ -46,9 +42,11 @@ class StataTests(unittest.TestCase):
+     def test_read_dta1(self):
+         reader = StataReader(self.dta1)
+         parsed = reader.data()
+-        # Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
++        # Pandas uses np.nan as missing value.
++        # Thus, all columns will be of type float, regardless of their name.
+         expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
+-                             columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
++                             columns=['float_miss', 'double_miss', 'byte_miss',
++                                      'int_miss', 'long_miss'])
+
+         for i, col in enumerate(parsed.columns):
+             np.testing.assert_almost_equal(
+@@ -90,7 +88,9 @@ class StataTests(unittest.TestCase):
+                     np.datetime64('NaT')
+                 )
+             ],
+-            columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date']
++            columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
++                     'monthly_date', 'quarterly_date', 'half_yearly_date',
++                     'yearly_date']
+         )
+
+         with warnings.catch_warnings(record=True) as w:
+@@ -125,34 +125,40 @@ class StataTests(unittest.TestCase):
+                 ["nine", "two", 9, np.nan, "nine"],
+                 ["ten", "one", "ten", np.nan, "ten"]
+             ],
+-            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled'])
++            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
++                     'labeled_with_missings', 'float_labelled'])
+
+         tm.assert_frame_equal(parsed, expected)
+
+-    def test_write_dta5(self):
++    def test_read_write_dta5(self):
+         if not is_little_endian():
+-            raise nose.SkipTest("known failure of test_write_dta5 on non-little endian")
++            raise nose.SkipTest("known failure of test_write_dta5 on "
++                                "non-little endian")
+
+         original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
+-                             columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
++                             columns=['float_miss', 'double_miss', 'byte_miss',
++                                      'int_miss', 'long_miss'])
+         original.index.name = 'index'
+
+-        with ensure_clean(self.dta5) as path:
++        with tm.ensure_clean() as path:
+             original.to_stata(path, None, False)
+             written_and_read_again = self.read_dta(path)
+-            tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
++            tm.assert_frame_equal(written_and_read_again.set_index('index'),
++                                  original)
+
+     def test_write_dta6(self):
+         if not is_little_endian():
+-            raise nose.SkipTest("known failure of test_write_dta6 on non-little endian")
++            raise nose.SkipTest("known failure of test_write_dta6 on "
++                                "non-little endian")
+
+         original = self.read_csv(self.csv3)
+         original.index.name = 'index'
+
+-        with ensure_clean(self.dta6) as path:
++        with tm.ensure_clean() as path:
+             original.to_stata(path, None, False)
+             written_and_read_again = self.read_dta(path)
+-            tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
++            tm.assert_frame_equal(written_and_read_again.set_index('index'),
++                                  original)
+
+     @nose.tools.nottest
+     def test_read_dta7(self):
+@@ -190,29 +196,30 @@ class StataTests(unittest.TestCase):
+                 decimal=3
+             )
+
+-    def test_read_dta10(self):
++    def test_read_write_dta10(self):
+         if not is_little_endian():
+-            raise nose.SkipTest("known failure of test_write_dta10 on non-little endian")
++            raise nose.SkipTest("known failure of test_write_dta10 on "
++                                "non-little endian")
+
+-        original = DataFrame(
+-            data=
+-            [
+-                ["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
+-            ],
+-            columns=['string', 'object', 'integer', 'float', 'datetime'])
++        original = DataFrame(data=[["string", "object", 1, 1.1,
++                                    np.datetime64('2003-12-25')]],
++                             columns=['string', 'object', 'integer', 'float',
++                                      'datetime'])
+         original["object"] = Series(original["object"], dtype=object)
+         original.index.name = 'index'
+
+-        with ensure_clean(self.dta10) as path:
++        with tm.ensure_clean() as path:
+             original.to_stata(path, {'datetime': 'tc'}, False)
+             written_and_read_again = self.read_dta(path)
+-            tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
++            tm.assert_frame_equal(written_and_read_again.set_index('index'),
++                                  original)
+
+     def test_stata_doc_examples(self):
+-        with ensure_clean(self.dta5) as path:
++        with tm.ensure_clean() as path:
+             df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
+             df.to_stata(path)
+
++
+ if __name__ == '__main__':
+     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                    exit=False)
+--- pandas/util/testing.py.orig
++++ pandas/util/testing.py
+@@ -86,7 +86,7 @@ def set_trace():
+ #------------------------------------------------------------------------------
+ # contextmanager to ensure the file cleanup
+ @contextmanager
+-def ensure_clean(filename = None):
++def ensure_clean(filename=None):
+     # if we are not passed a filename, generate a temporary
+     if filename is None:
+         filename = tempfile.mkstemp()[1]
diff --git a/math/py-pandas/files/pkg-message.in b/math/py-pandas/files/pkg-message.in
@@ -9,6 +9,20 @@ matplotlib	math/py-matplotlib
 xlrd		textproc/py-xlrd
 xlwt		textproc/py-xlwt
 
+To use the HTML table parsing capabilities that are new to 0.12.0, you
+will need to install
+
+beautifulsoup	www/py-beautifulsoup
+
+and one or the other or both of
+
+html5lib	www/py-html5lib
+lxml		devel/py-lxml
+
+Please read the pandas documentation before deciding:
+
+http://pandas.pydata.org/pandas-docs/stable/gotchas.html#html-gotchas
+
 Also recommended, but not yet available in ports are:
 
 bottleneck		http://berkeleyanalytics.com/bottleneck/