Permalink
Browse files

Added missing Python APIs

  • Loading branch information...
1 parent 181ec50 commit cc3c6cd8e552bbf5e3fa34c529f4dd72c50f716c @prabinb prabinb committed Jan 23, 2014
Showing with 38 additions and 0 deletions.
  1. +31 −0 python/pyspark/context.py
  2. +7 −0 python/pyspark/rdd.py
View
@@ -372,6 +372,37 @@ def _getJavaStorageLevel(self, storageLevel):
return newStorageLevel(storageLevel.useDisk, storageLevel.useMemory,
storageLevel.deserialized, storageLevel.replication)
+ def setJobGroup(self, groupId, description):
+ """
+ Assigns a group ID to all the jobs started by this thread until the group ID is set to a
+ different value or cleared.
+
+ Often, a unit of execution in an application consists of multiple Spark actions or jobs.
+ Application programmers can use this method to group all those jobs together and give a
+ group description. Once set, the Spark web UI will associate such jobs with this group.
+ """
+ self._jsc.setJobGroup(groupId, description)
+
+ def setLocalProperty(self, key, value):
+ """
+ Set a local property that affects jobs submitted from this thread, such as the
+ Spark fair scheduler pool.
+ """
+ self._jsc.setLocalProperty(key, value)
+
+ def getLocalProperty(self, key):
+ """
+ Get a local property set in this thread, or null if it is missing. See
+ L{setLocalProperty}
+ """
+ return self._jsc.getLocalProperty(key)
+
+ def sparkUser(self):
+ """
+ Get SPARK_USER for user who is running SparkContext.
+ """
+ return self._jsc.sc().sparkUser()
+
def _test():
import atexit
import doctest
View
@@ -95,6 +95,13 @@ def __init__(self, jrdd, ctx, jrdd_deserializer):
self.is_checkpointed = False
self.ctx = ctx
self._jrdd_deserializer = jrdd_deserializer
+ self._id = jrdd.id()
+
+ def id(self):
+ """
+ A unique ID for this RDD (within its SparkContext).
+ """
+ return self._id
def __repr__(self):
return self._jrdd.toString()

0 comments on commit cc3c6cd

Please sign in to comment.