From 663d43da3dd69beddbc958d63cf11f18737e40a2 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 29 Jan 2015 01:04:38 +0900 Subject: [PATCH 1/3] Add missing DSL for ApproxCountDistinct. --- sql/core/src/main/scala/org/apache/spark/sql/dsl/package.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/dsl/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/dsl/package.scala index 4c44e178b9976..16ffd81982bf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/dsl/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/dsl/package.scala @@ -48,6 +48,8 @@ package object dsl { def countDistinct(expr: Column, exprs: Column*): Column = CountDistinct((expr +: exprs).map(_.expr)) + def approxCountDistinct(e: Column, rsd: Double = 0.05): Column = + ApproxCountDistinct(e.expr, rsd) def avg(e: Column): Column = Average(e.expr) def first(e: Column): Column = First(e.expr) def last(e: Column): Column = Last(e.expr) From faea19d7f0f541d40c49034abac999369ccbc3b4 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 30 Jan 2015 12:27:15 +0900 Subject: [PATCH 2/3] Use overload instead of default value for Java support. --- sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala index 343e189f92987..214dac605353c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala @@ -105,8 +105,11 @@ object Dsl { def countDistinct(expr: Column, exprs: Column*): Column = CountDistinct((expr +: exprs).map(_.expr)) - def approxCountDistinct(e: Column, rsd: Double = 0.05): Column = + def approxCountDistinct(e: Column): Column = + ApproxCountDistinct(e.expr, 0.05) + def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd) + def avg(e: Column): Column = Average(e.expr) def first(e: Column): Column = First(e.expr) def last(e: Column): Column = Last(e.expr) From 3c05e5912309881f297f36758535da10a100707f Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 30 Jan 2015 12:31:31 +0900 Subject: [PATCH 3/3] Remove parameter to use default value of ApproxCountDistinct. --- sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala index 214dac605353c..3499956023d11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dsl.scala @@ -106,7 +106,7 @@ object Dsl { CountDistinct((expr +: exprs).map(_.expr)) def approxCountDistinct(e: Column): Column = - ApproxCountDistinct(e.expr, 0.05) + ApproxCountDistinct(e.expr) def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd)