Skip to content

Commit

Permalink
add crosstab pyTest
Browse files Browse the repository at this point in the history
  • Loading branch information
brkyvz committed May 1, 2015
1 parent fd53b00 commit 7f098bc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
7 changes: 1 addition & 6 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,17 +899,12 @@ def crosstab(self, col1, col2):
:param col1: The name of the first column
:param col2: The name of the second column
>>> df3.crosstab("age", "height").show()
age_height 80 85
2 1 1
5 1 1
"""
if not isinstance(col1, str):
raise ValueError("col1 should be a string.")
if not isinstance(col2, str):
raise ValueError("col2 should be a string.")
return self._jdf.stat().crosstab(col1, col2)
return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx)

@ignore_unicode_prefix
def withColumn(self, colName, col):
Expand Down
8 changes: 8 additions & 0 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,14 @@ def test_cov(self):
cov = df.stat.cov("a", "b")
self.assertTrue(abs(cov - 55.0 / 3) < 1e-6)

def test_crosstab(self):
df = self.sc.parallelize([Row(a=i % 3, b=i % 2) for i in range(1, 7)]).toDF()
ct = df.stat.crosstab("a", "b")
for i, row in enumerate(ct.collect()):
self.assertEqual(row[0], str(i))
self.assertTrue(row[1], 1)
self.assertTrue(row[2], 1)

def test_math_functions(self):
df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
from pyspark.sql import mathfunctions as functions
Expand Down

0 comments on commit 7f098bc

Please sign in to comment.