From 48f552cb087b0f8e3a87d43191450474734cae06 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 2 Feb 2018 20:35:59 -0800
Subject: [PATCH 1/6] update create function.

---
 .../sql/catalyst/catalog/SessionCatalog.scala      |  7 +++++--
 .../spark/sql/execution/command/functions.scala    | 14 +++++++++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a129896230775..4b119c75260a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -988,8 +988,11 @@ class SessionCatalog(
   // -------------------------------------------------------
 
   /**
-   * Create a metastore function in the database specified in `funcDefinition`.
+   * Create a function in the database specified in `funcDefinition`.
    * If no such database is specified, create it in the current database.
+   *
+   * @param ignoreIfExists: When true, ignore if the function with the specified name exists
+   *                        in the specified database.
    */
   def createFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
     val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
@@ -1061,7 +1064,7 @@ class SessionCatalog(
   }
 
   /**
-   * Check if the specified function exists.
+   * Check if the function with the specified name exists
    */
   def functionExists(name: FunctionIdentifier): Boolean = {
     val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index 4f92ffee687aa..1f7808c2f8e80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -40,6 +40,10 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
  *    CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [databaseName.]functionName
  *    AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
  * }}}
+ *
+ * @param ignoreIfExists: When true, ignore if the function with the specified name exists
+ *                        in the specified database.
+ * @param replace: When true, alter the function with the specified name
  */
 case class CreateFunctionCommand(
     databaseName: Option[String],
@@ -47,17 +51,17 @@ case class CreateFunctionCommand(
     className: String,
     resources: Seq[FunctionResource],
     isTemp: Boolean,
-    ifNotExists: Boolean,
+    ignoreIfExists: Boolean,
     replace: Boolean)
   extends RunnableCommand {
 
-  if (ifNotExists && replace) {
+  if (ignoreIfExists && replace) {
     throw new AnalysisException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE" +
       " is not allowed.")
   }
 
   // Disallow to define a temporary function with `IF NOT EXISTS`
-  if (ifNotExists && isTemp) {
+  if (ignoreIfExists && isTemp) {
     throw new AnalysisException(
       "It is not allowed to define a TEMPORARY function with IF NOT EXISTS.")
   }
@@ -79,12 +83,12 @@ case class CreateFunctionCommand(
       // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
       if (replace && catalog.functionExists(func.identifier)) {
         // alter the function in the metastore
-        catalog.alterFunction(CatalogFunction(func.identifier, className, resources))
+        catalog.alterFunction(func)
       } else {
         // For a permanent, we will store the metadata into underlying external catalog.
         // This function will be loaded into the FunctionRegistry when a query uses it.
         // We do not load it into FunctionRegistry right now.
-        catalog.createFunction(CatalogFunction(func.identifier, className, resources), ifNotExists)
+        catalog.createFunction(func, ignoreIfExists)
       }
     }
     Seq.empty[Row]

From 21ea233b6ef44bac4aefe13ae9014badac3450b1 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 2 Feb 2018 22:51:03 -0800
Subject: [PATCH 2/6] update the comment for repartitionByRange

---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d47cd0aecf56a..0aee1d7be5788 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2825,6 +2825,7 @@ class Dataset[T] private[sql](
    *
    * At least one partition-by expression must be specified.
    * When no explicit sort order is specified, "ascending nulls first" is assumed.
+   * Note, the rows are not sorted in each partition of the resulting Dataset.
    *
    * @group typedrel
    * @since 2.3.0
@@ -2848,6 +2849,7 @@ class Dataset[T] private[sql](
    *
    * At least one partition-by expression must be specified.
    * When no explicit sort order is specified, "ascending nulls first" is assumed.
+   * Note, the rows are not sorted in each partition of the resulting Dataset.
    *
    * @group typedrel
    * @since 2.3.0

From 9ecc809056800058cc95a1341fd9b85fa247867f Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 2 Feb 2018 23:24:08 -0800
Subject: [PATCH 3/6] update the comment of length

---
 python/pyspark/sql/functions.py                  |  8 +++++---
 .../catalyst/expressions/stringExpressions.scala | 16 +++++++++-------
 .../scala/org/apache/spark/sql/functions.scala   |  4 +++-
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3c8fb4c4d19e7..3867b06db14bf 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1705,10 +1705,12 @@ def unhex(col):
 @ignore_unicode_prefix
 @since(1.5)
 def length(col):
-    """Calculates the length of a string or binary expression.
+    """Computes the character length of a given string or number of bytes or a binary string.
+    The length of character strings include the trailing spaces. The length of binary strings
+    includes binary zeros.
 
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
-    [Row(length=3)]
+    >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
+    [Row(length=4)]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.length(_to_java_column(col)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5cf783f1a5979..af06b9eacd8b7 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1655,15 +1655,17 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data.",
+  usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data. " +
+    "The length of character strings include the trailing spaces. The length of binary strings " +
+    "includes binary zeros.",
   examples = """
     Examples:
-      > SELECT _FUNC_('Spark SQL');
-       9
-      > SELECT CHAR_LENGTH('Spark SQL');
-       9
-      > SELECT CHARACTER_LENGTH('Spark SQL');
-       9
+      > SELECT _FUNC_('Spark SQL ');
+       10
+      > SELECT CHAR_LENGTH('Spark SQL ');
+       10
+      > SELECT CHARACTER_LENGTH('Spark SQL ');
+       10
   """)
 // scalastyle:on line.size.limit
 case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0d11682d80a3c..edd4c54d44587 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2267,7 +2267,9 @@ object functions {
   }
 
   /**
-   * Computes the length of a given string or binary column.
+   * Computes the character length of a given string or number of bytes or a binary string.
+   * The length of character strings include the trailing spaces. The length of binary strings
+   * includes binary zeros.
    *
    * @group string_funcs
    * @since 1.5.0

From c33cc9af13cfe5daf193c23a31f9f97bdda463b4 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 3 Feb 2018 00:00:17 -0800
Subject: [PATCH 4/6] fix the test

---
 .../spark/sql/execution/command/DDLParserSuite.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 2b1aea08b1223..e0ccae15f1d05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -236,7 +236,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
       Seq(
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
-      isTemp = true, ifNotExists = false, replace = false)
+      isTemp = true, ignoreIfExists = false, replace = false)
     val expected2 = CreateFunctionCommand(
       Some("hello"),
       "world",
@@ -244,7 +244,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
       Seq(
         FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
         FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ifNotExists = false, replace = false)
+      isTemp = false, ignoreIfExists = false, replace = false)
     val expected3 = CreateFunctionCommand(
       None,
       "helloworld3",
@@ -252,7 +252,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
       Seq(
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
-      isTemp = true, ifNotExists = false, replace = true)
+      isTemp = true, ignoreIfExists = false, replace = true)
     val expected4 = CreateFunctionCommand(
       Some("hello"),
       "world1",
@@ -260,7 +260,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
       Seq(
         FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
         FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ifNotExists = false, replace = true)
+      isTemp = false, ignoreIfExists = false, replace = true)
     val expected5 = CreateFunctionCommand(
       Some("hello"),
       "world2",
@@ -268,7 +268,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
       Seq(
         FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
         FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ifNotExists = true, replace = false)
+      isTemp = false, ignoreIfExists = true, replace = false)
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
     comparePlans(parsed3, expected3)

From 9e97db9da89c9d9f8bb467eb025239041b3231db Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 5 Feb 2018 16:02:48 -0800
Subject: [PATCH 5/6] address comments.

---
 R/pkg/R/functions.R                                          | 4 +++-
 python/pyspark/sql/functions.py                              | 2 +-
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 55365a41d774b..5d7da69333644 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1026,7 +1026,9 @@ setMethod("last_day",
           })
 
 #' @details
-#' \code{length}: Computes the length of a given string or binary column.
+#' \code{length}: Computes the character length of a given string or number of bytes
+#' of a binary string. The length of character strings include the trailing spaces.
+#' The length of binary strings includes binary zeros.
 #'
 #' @rdname column_string_functions
 #' @aliases length length,Column-method
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3867b06db14bf..4f138df251015 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1705,7 +1705,7 @@ def unhex(col):
 @ignore_unicode_prefix
 @since(1.5)
 def length(col):
-    """Computes the character length of a given string or number of bytes or a binary string.
+    """Computes the character length of a given string or number of bytes of a binary string.
     The length of character strings include the trailing spaces. The length of binary strings
     includes binary zeros.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index edd4c54d44587..0d54c02c3d06f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2267,7 +2267,7 @@ object functions {
   }
 
   /**
-   * Computes the character length of a given string or number of bytes or a binary string.
+   * Computes the character length of a given string or number of bytes of a binary string.
    * The length of character strings include the trailing spaces. The length of binary strings
    * includes binary zeros.
    *

From b83705367b15c16b7208ea24085e063760770caa Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 6 Feb 2018 11:26:07 -0800
Subject: [PATCH 6/6] address comments.

---
 R/pkg/R/functions.R                                 |  6 +++---
 python/pyspark/sql/functions.py                     |  4 ++--
 .../catalyst/expressions/stringExpressions.scala    | 13 ++++++-------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5d7da69333644..9f7c6317cd924 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1026,9 +1026,9 @@ setMethod("last_day",
           })
 
 #' @details
-#' \code{length}: Computes the character length of a given string or number of bytes
-#' of a binary string. The length of character strings include the trailing spaces.
-#' The length of binary strings includes binary zeros.
+#' \code{length}: Computes the character length of a string data or number of bytes
+#' of a binary data. The length of string data includes the trailing spaces.
+#' The length of binary data includes binary zeros.
 #'
 #' @rdname column_string_functions
 #' @aliases length length,Column-method
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 4f138df251015..05031f5ec87d7 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1705,8 +1705,8 @@ def unhex(col):
 @ignore_unicode_prefix
 @since(1.5)
 def length(col):
-    """Computes the character length of a given string or number of bytes of a binary string.
-    The length of character strings include the trailing spaces. The length of binary strings
+    """Computes the character length of string data or number of bytes of binary data.
+    The length of character data includes the trailing spaces. The length of binary data
     includes binary zeros.
 
     >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index af06b9eacd8b7..d7612e30b4c57 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1653,11 +1653,10 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
  * A function that returns the char length of the given string expression or
  * number of bytes of the given binary expression.
  */
-// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data. " +
-    "The length of character strings include the trailing spaces. The length of binary strings " +
-    "includes binary zeros.",
+  usage = "_FUNC_(expr) - Returns the character length of string data or number of bytes of " +
+    "binary data. The length of string data includes the trailing spaces. The length of binary " +
+    "data includes binary zeros.",
   examples = """
     Examples:
       > SELECT _FUNC_('Spark SQL ');
@@ -1667,7 +1666,6 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
       > SELECT CHARACTER_LENGTH('Spark SQL ');
        10
   """)
-// scalastyle:on line.size.limit
 case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1689,7 +1687,7 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
  * A function that returns the bit length of the given string or binary expression.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the bit length of `expr` or number of bits in binary data.",
+  usage = "_FUNC_(expr) - Returns the bit length of string data or number of bits of binary data.",
   examples = """
     Examples:
       > SELECT _FUNC_('Spark SQL');
@@ -1718,7 +1716,8 @@ case class BitLength(child: Expression) extends UnaryExpression with ImplicitCas
  * A function that returns the byte length of the given string or binary expression.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the byte length of `expr` or number of bytes in binary data.",
+  usage = "_FUNC_(expr) - Returns the byte length of string data or number of bytes of binary " +
+    "data.",
   examples = """
     Examples:
       > SELECT _FUNC_('Spark SQL');