From 991f26f4ca51d8e7a214c0da51cabde3ced9169d Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Thu, 11 Jun 2015 18:49:29 -0700 Subject: [PATCH] fix seed --- python/pyspark/sql/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index a4a375f0a0000..68e33f89f28c8 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -459,7 +459,7 @@ def sampleBy(self, col, fractions, seed=None): >>> from pyspark.sql.functions import col >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key")) - >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0L) + >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0) >>> sampled.groupBy("key").count().orderBy("key").show() +---+-----+ |key|count|