From 7ae5a1cf53a68a05f7b36e06d4f90f48485c0db4 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Thu, 13 Nov 2014 10:24:54 -0800
Subject: [PATCH 1/2] [SPARK-4348] [PySpark] [MLlib] rename random.py to
 rand.py

This PR rename random.py to rand.py to avoid the side affects of conflict with random module, but still keep the same interface as before.

```
>>> from pyspark.mllib.random import RandomRDDs
```

```
$ pydoc pyspark.mllib.random
Help on module random in pyspark.mllib:
NAME
    random - Python package for random data generation.

FILE
    /Users/davies/work/spark/python/pyspark/mllib/rand.py

CLASSES
    __builtin__.object
        pyspark.mllib.random.RandomRDDs

    class RandomRDDs(__builtin__.object)
     |  Generator methods for creating RDDs comprised of i.i.d samples from
     |  some distribution.
     |
     |  Static methods defined here:
     |
     |  normalRDD(sc, size, numPartitions=None, seed=None)
```

cc mengxr

reference link: http://xion.org.pl/2012/05/06/hacking-python-imports/

Author: Davies Liu <davies@databricks.com>

Closes #3216 from davies/random and squashes the following commits:

7ac4e8b [Davies Liu] rename random.py to rand.py

(cherry picked from commit ce0333f9a008348692bb9a200449d2d992e7825e)
Signed-off-by: Josh Rosen <joshrosen@databricks.com>

Conflicts:
	python/pyspark/mllib/feature.py
	python/run-tests
---
 python/pyspark/__init__.py                  | 10 ------
 python/pyspark/mllib/__init__.py            | 34 +++++++++++++++++++++
 python/pyspark/mllib/linalg.py              |  4 ---
 python/pyspark/mllib/{random.py => rand.py} |  0
 python/run-tests                            |  2 +-
 5 files changed, 35 insertions(+), 15 deletions(-)
 rename python/pyspark/mllib/{random.py => rand.py} (100%)

diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index c58555fc9d2c5..312c75d112cbf 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -49,16 +49,6 @@
       Main entry point for accessing data stored in Apache Hive..
 """
 
-# The following block allows us to import python's random instead of mllib.random for scripts in
-# mllib that depend on top level pyspark packages, which transitively depend on python's random.
-# Since Python's import logic looks for modules in the current package first, we eliminate
-# mllib.random as a candidate for C{import random} by removing the first search path, the script's
-# location, in order to force the loader to look in Python's top-level modules for C{random}.
-import sys
-s = sys.path.pop(0)
-import random
-sys.path.insert(0, s)
-
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
 from pyspark.sql import SQLContext
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 4149f54931d1f..8bff2668e2e16 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -24,3 +24,37 @@
 import numpy
 if numpy.version.version < '1.4':
     raise Exception("MLlib requires NumPy 1.4+")
+
+__all__ = ['classification', 'clustering', 'linalg', 'random',
+           'recommendation', 'regression', 'stat', 'tree', 'util']
+
+import sys
+import rand as random
+random.__name__ = 'random'
+random.RandomRDDs.__module__ = __name__ + '.random'
+
+
+class RandomModuleHook(object):
+    """
+    Hook to import pyspark.mllib.random
+    """
+    fullname = __name__ + '.random'
+
+    def find_module(self, name, path=None):
+        # skip all other modules
+        if not name.startswith(self.fullname):
+            return
+        return self
+
+    def load_module(self, name):
+        if name == self.fullname:
+            return random
+
+        cname = name.rsplit('.', 1)[-1]
+        try:
+            return getattr(random, cname)
+        except AttributeError:
+            raise ImportError
+
+
+sys.meta_path.append(RandomModuleHook())
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index f485a69db1fa2..21d46249a73b5 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -267,8 +267,4 @@ def _test():
         exit(-1)
 
 if __name__ == "__main__":
-    # remove current path from list of search paths to avoid importing mllib.random
-    # for C{import random}, which is done in an external dependency of pyspark during doctests.
-    import sys
-    sys.path.pop(0)
     _test()
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/rand.py
similarity index 100%
rename from python/pyspark/mllib/random.py
rename to python/pyspark/mllib/rand.py
diff --git a/python/run-tests b/python/run-tests
index d671da40031c8..51df52dbde0f7 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -73,7 +73,7 @@ run_test "pyspark/mllib/_common.py"
 run_test "pyspark/mllib/classification.py"
 run_test "pyspark/mllib/clustering.py"
 run_test "pyspark/mllib/linalg.py"
-run_test "pyspark/mllib/random.py"
+run_test "pyspark/mllib/rand.py"
 run_test "pyspark/mllib/recommendation.py"
 run_test "pyspark/mllib/regression.py"
 run_test "pyspark/mllib/stat.py"

From ace4cb614588e7f8423d854ace6bf2b1c61fd1dc Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 17 Dec 2014 14:12:46 -0800
Subject: [PATCH 2/2] [SPARK-4821] [mllib] [python] [docs] Fix for
 pyspark.mllib.rand doc

+ small doc edit
+ include edit to make IntelliJ happy

CC: davies  mengxr

Note to davies  -- this does not fix the "WARNING: Literal block expected; none found." warnings since that seems to involve spacing which IntelliJ does not like.  (Those warnings occur when generating the Python docs.)

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #3669 from jkbradley/python-warnings and squashes the following commits:

4587868 [Joseph K. Bradley] fixed warning
8cb073c [Joseph K. Bradley] Updated based on davies recommendation
c51eca4 [Joseph K. Bradley] Updated rst file for pyspark.mllib.rand doc.  Small doc edit.  Small include edit to make IntelliJ happy.

Conflicts:
	python/docs/pyspark.streaming.rst
	python/pyspark/mllib/feature.py
---
 python/pyspark/mllib/__init__.py | 27 +--------------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 8bff2668e2e16..0c7f4910747d2 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -32,29 +32,4 @@
 import rand as random
 random.__name__ = 'random'
 random.RandomRDDs.__module__ = __name__ + '.random'
-
-
-class RandomModuleHook(object):
-    """
-    Hook to import pyspark.mllib.random
-    """
-    fullname = __name__ + '.random'
-
-    def find_module(self, name, path=None):
-        # skip all other modules
-        if not name.startswith(self.fullname):
-            return
-        return self
-
-    def load_module(self, name):
-        if name == self.fullname:
-            return random
-
-        cname = name.rsplit('.', 1)[-1]
-        try:
-            return getattr(random, cname)
-        except AttributeError:
-            raise ImportError
-
-
-sys.meta_path.append(RandomModuleHook())
+sys.modules[__name__ + '.random'] = random