Skip to content

Commit

Permalink
typos
Browse files Browse the repository at this point in the history
  • Loading branch information
dorx committed Jul 29, 2014
1 parent 07ddff2 commit bd2df13
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,8 @@ class PythonMLLibAPI extends Serializable {
/**
* Java stub for Python mllib RandomRDDGenerators.poissonVectorRDD()
*/
def poissonVectorRDD(jsc: JavaSparkContext,mean: Double,
def poissonVectorRDD(jsc: JavaSparkContext,
mean: Double,
numRows: Long,
numCols: Int,
numPartitions: Int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ import org.apache.spark.util.Utils

/**
* :: Experimental ::
* Generator methods for creating RDDs comprised of i.i.d samples from some distribution.
* Generator methods for creating RDDs comprised of i.i.d. samples from some distribution.
*/
@Experimental
object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the uniform distribution on [0.0, 1.0].
* Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
*
* To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
* `RandomRDDGenerators.uniformRDD(sc, n, p, seed).map(v => (b - a) * v)`
* `RandomRDDGenerators.uniformRDD(sc, n, p, seed).map(v => a + (b - a) * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -52,10 +52,10 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the uniform distribution on [0.0, 1.0].
* Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
*
* To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
* `RandomRDDGenerators.uniformRDD(sc, n, p).map(v => (b - a) * v)`
* `RandomRDDGenerators.uniformRDD(sc, n, p).map(v => a + (b - a) * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -69,11 +69,11 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the uniform distribution on [0.0, 1.0].
* Generates an RDD comprised of i.i.d. samples from the uniform distribution on [0.0, 1.0].
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* To transform the distribution in the generated RDD from U[0.0, 1.0] to U[a, b], use
* `RandomRDDGenerators.uniformRDD(sc, n).map(v => (b - a) * v)`
* `RandomRDDGenerators.uniformRDD(sc, n).map(v => a + (b - a) * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -86,10 +86,10 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the standard normal distribution.
* Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
*
* To transform the distribution in the generated RDD from standard normal to some other normal
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p, seed).map(v => mean + sigma * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -105,10 +105,10 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the standard normal distribution.
* Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
*
* To transform the distribution in the generated RDD from standard normal to some other normal
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p).map(v => mean + sigma * v)`
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n, p).map(v => mean + sigma * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -122,11 +122,11 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the standard normal distribution.
* Generates an RDD comprised of i.i.d. samples from the standard normal distribution.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* To transform the distribution in the generated RDD from standard normal to some other normal
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n).map(v => mean + sigma * v)`
* N(mean, sigma), use `RandomRDDGenerators.normalRDD(sc, n).map(v => mean + sigma * v)`.
*
* @param sc SparkContext used to create the RDD.
* @param size Size of the RDD.
Expand All @@ -139,7 +139,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the Poisson distribution with the input mean.
* Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
*
* @param sc SparkContext used to create the RDD.
* @param mean Mean, or lambda, for the Poisson distribution.
Expand All @@ -160,7 +160,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the Poisson distribution with the input mean.
* Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
*
* @param sc SparkContext used to create the RDD.
* @param mean Mean, or lambda, for the Poisson distribution.
Expand All @@ -175,7 +175,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples from the Poisson distribution with the input mean.
* Generates an RDD comprised of i.i.d. samples from the Poisson distribution with the input mean.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
Expand All @@ -190,7 +190,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples produced by the input DistributionGenerator.
* Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
*
* @param sc SparkContext used to create the RDD.
* @param generator DistributionGenerator used to populate the RDD.
Expand All @@ -210,7 +210,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples produced by the input DistributionGenerator.
* Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
*
* @param sc SparkContext used to create the RDD.
* @param generator DistributionGenerator used to populate the RDD.
Expand All @@ -228,7 +228,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD comprised of i.i.d samples produced by the input DistributionGenerator.
* Generates an RDD comprised of i.i.d. samples produced by the input DistributionGenerator.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
Expand All @@ -247,7 +247,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* uniform distribution on [0.0 1.0].
*
* @param sc SparkContext used to create the RDD.
Expand All @@ -269,14 +269,14 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* uniform distribution on [0.0 1.0].
*
* @param sc SparkContext used to create the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @return RDD[Vector] with vectors containing i.i.d samples ~ U[0.0, 1.0].
* @return RDD[Vector] with vectors containing i.i.d. samples ~ U[0.0, 1.0].
*/
@Experimental
def uniformVectorRDD(sc: SparkContext,
Expand All @@ -288,14 +288,14 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* uniform distribution on [0.0 1.0].
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @return RDD[Vector] with vectors containing i.i.d samples ~ U[0.0, 1.0].
* @return RDD[Vector] with vectors containing i.i.d. samples ~ U[0.0, 1.0].
*/
@Experimental
def uniformVectorRDD(sc: SparkContext, numRows: Long, numCols: Int): RDD[Vector] = {
Expand All @@ -304,15 +304,15 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* standard normal distribution.
*
* @param sc SparkContext used to create the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @param seed Seed for the RNG that generates the seed for the generator in each partition.
* @return RDD[Vector] with vectors containing i.i.d samples ~ N(0.0, 1.0).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
*/
@Experimental
def normalVectorRDD(sc: SparkContext,
Expand All @@ -326,14 +326,14 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* standard normal distribution.
*
* @param sc SparkContext used to create the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @return RDD[Vector] with vectors containing i.i.d samples ~ N(0.0, 1.0).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
*/
@Experimental
def normalVectorRDD(sc: SparkContext,
Expand All @@ -345,14 +345,14 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* standard normal distribution.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @return RDD[Vector] with vectors containing i.i.d samples ~ N(0.0, 1.0).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ N(0.0, 1.0).
*/
@Experimental
def normalVectorRDD(sc: SparkContext, numRows: Long, numCols: Int): RDD[Vector] = {
Expand All @@ -361,7 +361,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* Poisson distribution with the input mean.
*
* @param sc SparkContext used to create the RDD.
Expand All @@ -370,7 +370,7 @@ object RandomRDDGenerators {
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @param seed Seed for the RNG that generates the seed for the generator in each partition.
* @return RDD[Vector] with vectors containing i.i.d samples ~ Pois(mean).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
*/
@Experimental
def poissonVectorRDD(sc: SparkContext,
Expand All @@ -385,15 +385,15 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* Poisson distribution with the input mean.
*
* @param sc SparkContext used to create the RDD.
* @param mean Mean, or lambda, for the Poisson distribution.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @return RDD[Vector] with vectors containing i.i.d samples ~ Pois(mean).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
*/
@Experimental
def poissonVectorRDD(sc: SparkContext,
Expand All @@ -406,15 +406,15 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples drawn from the
* Generates an RDD[Vector] with vectors containing i.i.d. samples drawn from the
* Poisson distribution with the input mean.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
* @param mean Mean, or lambda, for the Poisson distribution.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @return RDD[Vector] with vectors containing i.i.d samples ~ Pois(mean).
* @return RDD[Vector] with vectors containing i.i.d. samples ~ Pois(mean).
*/
@Experimental
def poissonVectorRDD(sc: SparkContext,
Expand All @@ -426,7 +426,7 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples produced by the
* Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
* input DistributionGenerator.
*
* @param sc SparkContext used to create the RDD.
Expand All @@ -435,7 +435,7 @@ object RandomRDDGenerators {
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @param seed Seed for the RNG that generates the seed for the generator in each partition.
* @return RDD[Vector] with vectors containing i.i.d samples produced by generator.
* @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
*/
@Experimental
def randomVectorRDD(sc: SparkContext,
Expand All @@ -449,15 +449,15 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples produced by the
* Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
* input DistributionGenerator.
*
* @param sc SparkContext used to create the RDD.
* @param generator DistributionGenerator used to populate the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @param numPartitions Number of partitions in the RDD.
* @return RDD[Vector] with vectors containing i.i.d samples produced by generator.
* @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
*/
@Experimental
def randomVectorRDD(sc: SparkContext,
Expand All @@ -470,15 +470,15 @@ object RandomRDDGenerators {

/**
* :: Experimental ::
* Generates an RDD[Vector] with vectors containing i.i.d samples produced by the
* Generates an RDD[Vector] with vectors containing i.i.d. samples produced by the
* input DistributionGenerator.
* sc.defaultParallelism used for the number of partitions in the RDD.
*
* @param sc SparkContext used to create the RDD.
* @param generator DistributionGenerator used to populate the RDD.
* @param numRows Number of Vectors in the RDD.
* @param numCols Number of elements in each Vector.
* @return RDD[Vector] with vectors containing i.i.d samples produced by generator.
* @return RDD[Vector] with vectors containing i.i.d. samples produced by generator.
*/
@Experimental
def randomVectorRDD(sc: SparkContext,
Expand Down
7 changes: 4 additions & 3 deletions python/pyspark/mllib/randomRDD.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from pyspark.rdd import RDD
from pyspark.mllib._common import _deserialize_double, _deserialize_double_vector
from pyspark.serializers import NoOpSerializer
from pyspark.statcounter import StatCounter

class RandomRDDGenerators:
"""
Expand All @@ -31,12 +30,12 @@ class RandomRDDGenerators:
@staticmethod
def uniformRDD(sc, size, numPartitions=None, seed=None):
"""
Generates an RDD comprised of i.i.d samples from the
Generates an RDD comprised of i.i.d. samples from the
uniform distribution on [0.0, 1.0].
To transform the distribution in the generated RDD from U[0.0, 1.0]
to U[a, b], use
C{RandomRDDGenerators.uniformRDD(sc, n, p, seed).map(lambda v: (b - a) * v)}
C{RandomRDDGenerators.uniformRDD(sc, n, p, seed).map(lambda v: a + (b - a) * v)}
>>> x = RandomRDDGenerators.uniformRDD(sc, 100).collect()
>>> len(x)
Expand Down Expand Up @@ -65,6 +64,7 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
C{RandomRDDGenerators.normal(sc, n, p, seed).map(lambda v: mean + sigma * v)}
>>> x = RandomRDDGenerators.normalRDD(sc, 1000, seed=1L).collect()
>>> from pyspark.statcounter import StatCounter
>>> stats = StatCounter(x)
>>> stats.count()
1000L
Expand All @@ -86,6 +86,7 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
>>> mean = 100.0
>>> x = RandomRDDGenerators.poissonRDD(sc, mean, 1000, seed=1L).collect()
>>> from pyspark.statcounter import StatCounter
>>> stats = StatCounter(x)
>>> stats.count()
1000L
Expand Down

0 comments on commit bd2df13

Please sign in to comment.