Skip to content

Commit

Permalink
Updated rest of the files
Browse files Browse the repository at this point in the history
  • Loading branch information
rxin committed May 8, 2015
1 parent 1e6e666 commit d910141
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion python/pyspark/sql/dataframe.py
Expand Up @@ -1069,7 +1069,7 @@ def agg(self, *exprs):
>>> from pyspark.sql import functions as F
>>> gdf.agg(F.min(df.age)).collect()
[Row(MIN(age)=2), Row(MIN(age)=5)]
[Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
"""
assert exprs, "exprs should not be empty"
if len(exprs) == 1 and isinstance(exprs[0], dict):
Expand Down
Expand Up @@ -135,8 +135,9 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
}

/**
* Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this
* class, the resulting [[DataFrame]] won't automatically include the grouping columns.
* Compute aggregates by specifying a series of aggregate columns. Note that this function by
* default retains the grouping columns in its output. To not retain grouping columns, set
* `spark.sql.retainGroupColumns` to false.
*
* The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
*
Expand Down
Expand Up @@ -102,7 +102,7 @@ private[sql] object StatFunctions extends Logging {
/** Generate a table of frequencies for the elements of two columns. */
private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
val tableName = s"${col1}_$col2"
val counts = df.groupBy(col1, col2).agg(col(col1), col(col2), count("*")).take(1e6.toInt)
val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt)
if (counts.length == 1e6.toInt) {
logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " +
"the pairs. Please try reducing the amount of distinct items in your columns.")
Expand Down
Expand Up @@ -62,7 +62,7 @@ class DataFrameSuite extends QueryTest {
val df = Seq((1,(1,1))).toDF()

checkAnswer(
df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),
df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"),
Row(1, 1) :: Nil)
}

Expand Down Expand Up @@ -127,7 +127,7 @@ class DataFrameSuite extends QueryTest {
df2
.select('_1 as 'letter, 'number)
.groupBy('letter)
.agg('letter, countDistinct('number)),
.agg(countDistinct('number)),
Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
)
}
Expand Down

0 comments on commit d910141

Please sign in to comment.