From 7ae5a1cf53a68a05f7b36e06d4f90f48485c0db4 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 13 Nov 2014 10:24:54 -0800 Subject: [PATCH 1/2] [SPARK-4348] [PySpark] [MLlib] rename random.py to rand.py This PR rename random.py to rand.py to avoid the side affects of conflict with random module, but still keep the same interface as before. ``` >>> from pyspark.mllib.random import RandomRDDs ``` ``` $ pydoc pyspark.mllib.random Help on module random in pyspark.mllib: NAME random - Python package for random data generation. FILE /Users/davies/work/spark/python/pyspark/mllib/rand.py CLASSES __builtin__.object pyspark.mllib.random.RandomRDDs class RandomRDDs(__builtin__.object) | Generator methods for creating RDDs comprised of i.i.d samples from | some distribution. | | Static methods defined here: | | normalRDD(sc, size, numPartitions=None, seed=None) ``` cc mengxr reference link: http://xion.org.pl/2012/05/06/hacking-python-imports/ Author: Davies Liu Closes #3216 from davies/random and squashes the following commits: 7ac4e8b [Davies Liu] rename random.py to rand.py (cherry picked from commit ce0333f9a008348692bb9a200449d2d992e7825e) Signed-off-by: Josh Rosen Conflicts: python/pyspark/mllib/feature.py python/run-tests --- python/pyspark/__init__.py | 10 ------ python/pyspark/mllib/__init__.py | 34 +++++++++++++++++++++ python/pyspark/mllib/linalg.py | 4 --- python/pyspark/mllib/{random.py => rand.py} | 0 python/run-tests | 2 +- 5 files changed, 35 insertions(+), 15 deletions(-) rename python/pyspark/mllib/{random.py => rand.py} (100%) diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index c58555fc9d2c5..312c75d112cbf 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -49,16 +49,6 @@ Main entry point for accessing data stored in Apache Hive.. """ -# The following block allows us to import python's random instead of mllib.random for scripts in -# mllib that depend on top level pyspark packages, which transitively depend on python's random. -# Since Python's import logic looks for modules in the current package first, we eliminate -# mllib.random as a candidate for C{import random} by removing the first search path, the script's -# location, in order to force the loader to look in Python's top-level modules for C{random}. -import sys -s = sys.path.pop(0) -import random -sys.path.insert(0, s) - from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql import SQLContext diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 4149f54931d1f..8bff2668e2e16 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -24,3 +24,37 @@ import numpy if numpy.version.version < '1.4': raise Exception("MLlib requires NumPy 1.4+") + +__all__ = ['classification', 'clustering', 'linalg', 'random', + 'recommendation', 'regression', 'stat', 'tree', 'util'] + +import sys +import rand as random +random.__name__ = 'random' +random.RandomRDDs.__module__ = __name__ + '.random' + + +class RandomModuleHook(object): + """ + Hook to import pyspark.mllib.random + """ + fullname = __name__ + '.random' + + def find_module(self, name, path=None): + # skip all other modules + if not name.startswith(self.fullname): + return + return self + + def load_module(self, name): + if name == self.fullname: + return random + + cname = name.rsplit('.', 1)[-1] + try: + return getattr(random, cname) + except AttributeError: + raise ImportError + + +sys.meta_path.append(RandomModuleHook()) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index f485a69db1fa2..21d46249a73b5 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -267,8 +267,4 @@ def _test(): exit(-1) if __name__ == "__main__": - # remove current path from list of search paths to avoid importing mllib.random - # for C{import random}, which is done in an external dependency of pyspark during doctests. - import sys - sys.path.pop(0) _test() diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/rand.py similarity index 100% rename from python/pyspark/mllib/random.py rename to python/pyspark/mllib/rand.py diff --git a/python/run-tests b/python/run-tests index d671da40031c8..51df52dbde0f7 100755 --- a/python/run-tests +++ b/python/run-tests @@ -73,7 +73,7 @@ run_test "pyspark/mllib/_common.py" run_test "pyspark/mllib/classification.py" run_test "pyspark/mllib/clustering.py" run_test "pyspark/mllib/linalg.py" -run_test "pyspark/mllib/random.py" +run_test "pyspark/mllib/rand.py" run_test "pyspark/mllib/recommendation.py" run_test "pyspark/mllib/regression.py" run_test "pyspark/mllib/stat.py" From ace4cb614588e7f8423d854ace6bf2b1c61fd1dc Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Wed, 17 Dec 2014 14:12:46 -0800 Subject: [PATCH 2/2] [SPARK-4821] [mllib] [python] [docs] Fix for pyspark.mllib.rand doc + small doc edit + include edit to make IntelliJ happy CC: davies mengxr Note to davies -- this does not fix the "WARNING: Literal block expected; none found." warnings since that seems to involve spacing which IntelliJ does not like. (Those warnings occur when generating the Python docs.) Author: Joseph K. Bradley Closes #3669 from jkbradley/python-warnings and squashes the following commits: 4587868 [Joseph K. Bradley] fixed warning 8cb073c [Joseph K. Bradley] Updated based on davies recommendation c51eca4 [Joseph K. Bradley] Updated rst file for pyspark.mllib.rand doc. Small doc edit. Small include edit to make IntelliJ happy. Conflicts: python/docs/pyspark.streaming.rst python/pyspark/mllib/feature.py --- python/pyspark/mllib/__init__.py | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 8bff2668e2e16..0c7f4910747d2 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -32,29 +32,4 @@ import rand as random random.__name__ = 'random' random.RandomRDDs.__module__ = __name__ + '.random' - - -class RandomModuleHook(object): - """ - Hook to import pyspark.mllib.random - """ - fullname = __name__ + '.random' - - def find_module(self, name, path=None): - # skip all other modules - if not name.startswith(self.fullname): - return - return self - - def load_module(self, name): - if name == self.fullname: - return random - - cname = name.rsplit('.', 1)[-1] - try: - return getattr(random, cname) - except AttributeError: - raise ImportError - - -sys.meta_path.append(RandomModuleHook()) +sys.modules[__name__ + '.random'] = random