From cc5bf45308825c95a044027697369f5a56ad45c8 Mon Sep 17 00:00:00 2001 From: Kan Zhang Date: Mon, 16 Jun 2014 13:51:32 -0700 Subject: [PATCH 1/2] [SPARK-2130] End-user friendly String representation for StorageLevel in Python --- python/pyspark/rdd.py | 2 ++ python/pyspark/storagelevel.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index ddd22850a819c..4eba80d31e6fd 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1451,6 +1451,8 @@ def getStorageLevel(self): >>> rdd1 = sc.parallelize([1,2]) >>> rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1) + >>> print(rdd1.getStorageLevel()) + Serialized 1x Replicated """ java_storage_level = self._jrdd.getStorageLevel() storage_level = StorageLevel(java_storage_level.useDisk(), diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py index 7b6660eab231b..3a18ea54eae4c 100644 --- a/python/pyspark/storagelevel.py +++ b/python/pyspark/storagelevel.py @@ -36,6 +36,15 @@ def __repr__(self): return "StorageLevel(%s, %s, %s, %s, %s)" % ( self.useDisk, self.useMemory, self.useOffHeap, self.deserialized, self.replication) + def __str__(self): + result = "" + result += "Disk " if self.useDisk else "" + result += "Memory " if self.useMemory else "" + result += "Tachyon " if self.useOffHeap else "" + result += "Deserialized " if self.deserialized else "Serialized " + result += "%sx Replicated" % self.replication + return result + StorageLevel.DISK_ONLY = StorageLevel(True, False, False, False) StorageLevel.DISK_ONLY_2 = StorageLevel(True, False, False, False, 2) StorageLevel.MEMORY_ONLY = StorageLevel(False, True, False, True) From 7c8b98bf45d203277ec8ecd197e24f6675a7096b Mon Sep 17 00:00:00 2001 From: Kan Zhang Date: Mon, 16 Jun 2014 17:15:36 -0700 Subject: [PATCH 2/2] [SPARK-2130] Prettier epydoc output --- python/pyspark/rdd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 4eba80d31e6fd..bb4d035edcdeb 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -1448,6 +1448,7 @@ def toDebugString(self): def getStorageLevel(self): """ Get the RDD's current storage level. + >>> rdd1 = sc.parallelize([1,2]) >>> rdd1.getStorageLevel() StorageLevel(False, False, False, False, 1)