From 3b69777924d0ac54bc4b6ec9c740cb20774bf033 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 20 Nov 2017 07:13:32 +0000
Subject: [PATCH 1/3] Add document for udf.

---
 python/pyspark/sql/functions.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 087ce7caa89c8..829451191153d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2205,6 +2205,10 @@ def udf(f=None, returnType=StringType()):
         rows that do not satisfy the conditions, the suggested workaround is to incorporate the
         condition logic into the functions.
 
+    .. note:: Users can't rely on short-curcuit evaluation of boolean expressions to execute
+        conditionally user-defined functions too. For example, the two functions in an expression
+        like udf1(x) && udf2(y) will be both executed on all rows.
+
     :param f: python function if used as a standalone function
     :param returnType: a :class:`pyspark.sql.types.DataType` object
 

From 8efb9c2f132704b0a16f205c470800ebf725c939 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 21 Nov 2017 03:18:46 +0000
Subject: [PATCH 2/3] Revise doc.

---
 python/pyspark/sql/functions.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 829451191153d..e49b1fb889b42 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2198,16 +2198,9 @@ def udf(f=None, returnType=StringType()):
         duplicate invocations may be eliminated or the function may even be invoked more times than
         it is present in the query.
 
-    .. note:: The user-defined functions do not support conditional execution by using them with
-        SQL conditional expressions such as `when` or `if`. The functions still apply on all rows no
-        matter the conditions are met or not. So the output is correct if the functions can be
-        correctly run on all rows without failure. If the functions can cause runtime failure on the
-        rows that do not satisfy the conditions, the suggested workaround is to incorporate the
-        condition logic into the functions.
-
-    .. note:: Users can't rely on short-curcuit evaluation of boolean expressions to execute
-        conditionally user-defined functions too. For example, the two functions in an expression
-        like udf1(x) && udf2(y) will be both executed on all rows.
+    .. note:: The user-defined functions do not support conditional expressions or short curcuiting
+        in boolean expressions and it ends up with being executed all internally. If the functions
+        can fail on special rows, the workaround is to incorporate the condition into the functions.
 
     :param f: python function if used as a standalone function
     :param returnType: a :class:`pyspark.sql.types.DataType` object

From e6775809d80d110f814615223c5800d94f595195 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 21 Nov 2017 07:59:21 +0000
Subject: [PATCH 3/3] Revise doc for pandas_udf.

---
 python/pyspark/sql/functions.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e49b1fb889b42..425a3fdf4446a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2296,12 +2296,9 @@ def pandas_udf(f=None, returnType=StringType()):
 
     .. note:: The user-defined function must be deterministic.
 
-    .. note:: The user-defined functions do not support conditional execution by using them with
-        SQL conditional expressions such as `when` or `if`. The functions still apply on all rows no
-        matter the conditions are met or not. So the output is correct if the functions can be
-        correctly run on all rows without failure. If the functions can cause runtime failure on the
-        rows that do not satisfy the conditions, the suggested workaround is to incorporate the
-        condition logic into the functions.
+    .. note:: The user-defined functions do not support conditional expressions or short curcuiting
+        in boolean expressions and it ends up with being executed all internally. If the functions
+        can fail on special rows, the workaround is to incorporate the condition into the functions.
     """
     return _create_udf(f, returnType=returnType, pythonUdfType=PythonUdfType.PANDAS_UDF)