From c40c50a7ccfb940ef01f2ffd23fd698dd797d4ea Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 13:27:42 -0700 Subject: [PATCH 1/6] Fix the Makefile --- python/docs/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/docs/Makefile b/python/docs/Makefile index 903009790ba3b..905e0215c20c2 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -2,10 +2,10 @@ # # You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +PAPER ?= +BUILDDIR ?= _build export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.9.2-src.zip) From d5b148d0999c73d269b046f7a8b50571ccb05943 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 13:33:28 -0700 Subject: [PATCH 2/6] Fix __all__ list in ml/regression.py --- python/pyspark/ml/regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index bc88f88b7f1e3..57cb52e382dee 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -28,7 +28,7 @@ __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel', 'DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor', 'GBTRegressionModel', - 'GeneralizedLinearRegression', 'GeneralizedLinearRegressionModel' + 'GeneralizedLinearRegression', 'GeneralizedLinearRegressionModel', 'IsotonicRegression', 'IsotonicRegressionModel', 'LinearRegression', 'LinearRegressionModel', 'LinearRegressionSummary', 'LinearRegressionTrainingSummary', From 3f1930917c9535e027b5882db542a0a285ddf5d5 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 14:16:53 -0700 Subject: [PATCH 3/6] Fix hightlighting issues in context.py --- python/pyspark/sql/context.py | 1 - python/pyspark/sql/dataframe.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 4008332c84d0a..effc5b51820ae 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -405,7 +405,6 @@ def createDataFrame(self, data, schema=None, samplingRatio=None): >>> sqlContext.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... - Py4JJavaError:... """ if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index d473d6b534647..b4fa8368936a4 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -60,7 +60,7 @@ class DataFrame(object): people = sqlContext.read.parquet("...") department = sqlContext.read.parquet("...") - people.filter(people.age > 30).join(department, people.deptId == department.id)) \ + people.filter(people.age > 30).join(department, people.deptId == department.id)\ .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"}) .. note:: Experimental From 3b85ed0173b037ffd7fbb7c58b4190a7529336ca Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 14:52:54 -0700 Subject: [PATCH 4/6] Include error type in doctest --- python/pyspark/sql/context.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index effc5b51820ae..e9a26fef1474d 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -405,6 +405,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None): >>> sqlContext.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... + Py4JJavaError """ if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") From a35b811621c44e7155a6f2d2d7a38b88c822f08a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 16:05:36 -0700 Subject: [PATCH 5/6] Now its a TypeError --- python/pyspark/sql/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index e9a26fef1474d..53248a822e17d 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -405,7 +405,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None): >>> sqlContext.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... - Py4JJavaError + TypeError """ if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame") From d4f9687e5f17dd7e76ba3b5ccf1f25c56f5aadd2 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 12 Apr 2016 16:14:19 -0700 Subject: [PATCH 6/6] More closely match error --- python/pyspark/sql/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 53248a822e17d..11dfcfe13ee0d 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -405,7 +405,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None): >>> sqlContext.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... - TypeError + Py4JJavaError: ... """ if isinstance(data, DataFrame): raise TypeError("data is already a DataFrame")