Skip to content

Commit

Permalink
add range() in Python
Browse files Browse the repository at this point in the history
  • Loading branch information
Davies Liu committed May 18, 2015
1 parent 4590208 commit 789eda5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
16 changes: 16 additions & 0 deletions python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,22 @@ def stop(self):
with SparkContext._lock:
SparkContext._active_spark_context = None

def range(self, start, end, step=1, numSlices=None):
"""
Create a new RDD of int containing elements from `start` to `end`
(exclusive), increased by `step` every element.
:param start: the start value
:param end: the end value (exclusive)
:param step: the incremental step (default: 1)
:param numSlices: the number of partitions of the new RDD
:return: An RDD of int
>>> sc.range(1, 7, 2).collect()
[1, 3, 5]
"""
return self.parallelize(xrange(start, end, step), numSlices)

def parallelize(self, c, numSlices=None):
"""
Distribute a local Python collection to form an RDD. Using xrange
Expand Down
20 changes: 20 additions & 0 deletions python/pyspark/sql/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,26 @@ def udf(self):
"""Returns a :class:`UDFRegistration` for UDF registration."""
return UDFRegistration(self)

def range(self, start, end, step=1, numPartitions=None):
"""
Create a :class:`DataFrame` with single LongType column named `id`,
containing elements in a range from `start` to `end` (exclusive) with
step value `step`.
:param start: the start value
:param end: the end value (exclusive)
:param step: the incremental step (default: 1)
:param numPartitions: the number of partitions of the DataFrame
:return: A new DataFrame
>>> sqlContext.range(1, 7, 2).collect()
[Row(id=1), Row(id=3), Row(id=5)]
"""
if numPartitions is None:
numPartitions = self._sc.defaultParallelism
jdf = self._ssql_ctx.range(int(start), int(end), int(step), int(numPartitions))
return DataFrame(jdf, self)

@ignore_unicode_prefix
def registerFunction(self, name, f, returnType=StringType()):
"""Registers a lambda function as a UDF so it can be used in SQL statements.
Expand Down

0 comments on commit 789eda5

Please sign in to comment.