From ae9e01d48b0451baa219943449d5532ab2226708 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Sun, 3 May 2015 23:43:45 -0700 Subject: [PATCH] fix test --- python/pyspark/sql/dataframe.py | 2 +- python/pyspark/sql/tests.py | 2 +- .../scala/org/apache/spark/sql/DataFrameStatFunctions.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 6fa322cfd3a58..832c758b592a7 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -915,7 +915,7 @@ def crosstab(self, col1, col2): Computes a pair-wise frequency table of the given columns. Also known as a contingency table. The number of distinct values for each column should be less than 1e4. The first column of each row will be the distinct values of `col1` and the column names will be the - distinct values of `col2`. Pairs that have no occurrences will have `null` as their values. + distinct values of `col2`. Pairs that have no occurrences will have `null` as their counts. :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases. :param col1: The name of the first column. Distinct items will make the first item of diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index d6cbd0a046d6b..7ea6656d31c4e 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -408,7 +408,7 @@ def test_cov(self): def test_crosstab(self): df = self.sc.parallelize([Row(a=i % 3, b=i % 2) for i in range(1, 7)]).toDF() ct = df.stat.crosstab("a", "b").collect() - ct = sorted(ct, lambda r: r[0]) + ct = sorted(ct, key=lambda x: x[0]) for i, row in enumerate(ct): self.assertEqual(row[0], str(i)) self.assertTrue(row[1], 1) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 67327ad5da8c9..6b4e68dfe60ad 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -68,7 +68,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * The number of distinct values for each column should be less than 1e4. The first * column of each row will be the distinct values of `col1` and the column names will be the * distinct values of `col2`. Counts will be returned as `Long`s. Pairs that have no occurrences - * will have `null` as their values. + * will have `null` as their counts. * * @param col1 The name of the first column. Distinct items will make the first item of * each row.