From fdf3ddb28253d34f9c29f52fc9e26584363837b8 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Fri, 24 Feb 2017 15:39:06 -0800
Subject: [PATCH 1/8] Added documentation for like, rlike, startswith, and
 endswith

---
 python/pyspark/sql/column.py | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index ec05c18d4f062..62e14bb21bfca 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -250,11 +250,39 @@ def __iter__(self):
         raise TypeError("Column is not iterable")
 
     # string methods
+    _rlike_doc = """ Return a Boolean :class:`Column` based on a regex match.\n
+                :param other: an extended regex expression\n
+
+                >>> df.filter( df.name.rlike('ice$') ).collect()
+                [Row(name=u'Alice', age=1)]
+                """
+    _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.\n
+               :param other: a SQL LIKE pattern\n
+               See :func:`pyspark.sql.Column.rlike` for a regex version\n
+
+               >>> df.filter( df.name.like('Al%') ).collect()
+               [Row(name=u'Alice', age=1)]
+                """
+    _startswith_doc = ''' Return a Boolean :class:`Column` based on a string match.\n
+                     :param other: string at end of line (do not use a regex `^`)\n
+                     >>> df.filter(df.name.startswith('Al')).collect()
+                     [Row(name=u'Alice', age=1)]
+                     >>> df.filter(df.name.startswith('^Al')).collect()
+                     []
+                     '''
+    _endswith_doc = ''' Return a Boolean :class:`Column` based on matching end of string.\n
+                   :param other: string at end of line (do not use a regex `$`)\n
+                   >>> df.filter(df.name.endswith('ice')).collect()
+                   [Row(name=u'Alice', age=1)]
+                   >>> df.filter(df.name.endswith('ice$')).collect()
+                   []
+                   '''
+
     contains = _bin_op("contains")
-    rlike = _bin_op("rlike")
-    like = _bin_op("like")
-    startswith = _bin_op("startsWith")
-    endswith = _bin_op("endsWith")
+    rlike = _bin_op("rlike", _rlike_doc)
+    like = _bin_op("like", _like_doc)
+    startswith = _bin_op("startsWith", _startswith_doc)
+    endswith = _bin_op("endsWith", _endswith_doc)
 
     @ignore_unicode_prefix
     @since(1.3)

From c22c0d9e3bb46be5265a832079cc951cfc738f10 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Fri, 31 Mar 2017 11:02:10 -0700
Subject: [PATCH 2/8] Fixed formatting issues: indents, and block quotes

---
 python/pyspark/sql/column.py | 46 +++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 62e14bb21bfca..27c0a8f0a9f5f 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -251,32 +251,34 @@ def __iter__(self):
 
     # string methods
     _rlike_doc = """ Return a Boolean :class:`Column` based on a regex match.\n
-                :param other: an extended regex expression\n
+                     :param other: an extended regex expression
 
-                >>> df.filter( df.name.rlike('ice$') ).collect()
-                [Row(name=u'Alice', age=1)]
-                """
+                     >>> df.filter( df.name.rlike('ice$') ).collect()
+                     [Row(name=u'Alice', age=1)]
+                 """
     _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.\n
-               :param other: a SQL LIKE pattern\n
-               See :func:`pyspark.sql.Column.rlike` for a regex version\n
+                    :param other: a SQL LIKE pattern\n
+                    See :func:`pyspark.sql.Column.rlike` for a regex version
 
-               >>> df.filter( df.name.like('Al%') ).collect()
-               [Row(name=u'Alice', age=1)]
+                    >>> df.filter( df.name.like('Al%') ).collect()
+                    [Row(name=u'Alice', age=1)]
                 """
-    _startswith_doc = ''' Return a Boolean :class:`Column` based on a string match.\n
-                     :param other: string at end of line (do not use a regex `^`)\n
-                     >>> df.filter(df.name.startswith('Al')).collect()
-                     [Row(name=u'Alice', age=1)]
-                     >>> df.filter(df.name.startswith('^Al')).collect()
-                     []
-                     '''
-    _endswith_doc = ''' Return a Boolean :class:`Column` based on matching end of string.\n
-                   :param other: string at end of line (do not use a regex `$`)\n
-                   >>> df.filter(df.name.endswith('ice')).collect()
-                   [Row(name=u'Alice', age=1)]
-                   >>> df.filter(df.name.endswith('ice$')).collect()
-                   []
-                   '''
+    _startswith_doc = """ Return a Boolean :class:`Column` based on a string match.\n
+                          :param other: string at end of line (do not use a regex `^`)
+
+                          >>> df.filter(df.name.startswith('Al')).collect()
+                          [Row(name=u'Alice', age=1)]
+                          >>> df.filter(df.name.startswith('^Al')).collect()
+                          []
+                     """
+    _endswith_doc = """ Return a Boolean :class:`Column` based on matching end of string.\n
+                        :param other: string at end of line (do not use a regex `$`)
+
+                        >>> df.filter(df.name.endswith('ice')).collect()
+                        [Row(name=u'Alice', age=1)]
+                        >>> df.filter(df.name.endswith('ice$')).collect()
+                        []
+                    """
 
     contains = _bin_op("contains")
     rlike = _bin_op("rlike", _rlike_doc)

From aaa365cc5d2a8a49b922187b5f996b2473011087 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Fri, 31 Mar 2017 13:04:56 -0700
Subject: [PATCH 3/8] Added docs for isNull and isNotNull

---
 python/pyspark/sql/column.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 27c0a8f0a9f5f..a3943760a97c0 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -258,7 +258,7 @@ def __iter__(self):
                  """
     _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.\n
                     :param other: a SQL LIKE pattern\n
-                    See :func:`pyspark.sql.Column.rlike` for a regex version
+                    See :func:`rlike` for a regex version
 
                     >>> df.filter( df.name.like('Al%') ).collect()
                     [Row(name=u'Alice', age=1)]
@@ -333,8 +333,25 @@ def isin(self, *cols):
     desc = _unary_op("desc", "Returns a sort expression based on the"
                              " descending order of the given column name.")
 
-    isNull = _unary_op("isNull", "True if the current expression is null.")
-    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
+    _isNull_doc = ''' True if the current expression is null. Often combined with 
+                      :func:`DataFrame.filter` to select rows with null values.
+
+                      >>> df2.collect()
+                      [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+                      >>> df2.filter( df2.height.isNull ).collect()
+                      [Row(name=u'Alice', height=None)]
+                  '''
+    _isNotNull_doc = ''' True if the current expression is null. Often combined with 
+                         :func:`DataFrame.filter` to select rows with non-null values.
+
+                         >>> df2.collect()
+                         [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+                         >>> df2.filter( df2.height.isNotNull ).collect()
+                         [Row(name=u'Tom', height=80)]
+                     '''
+
+    isNull = _unary_op("isNull", _isNull_doc ) 
+    isNotNull = _unary_op("isNotNull", _isNotNull_doc)
 
     @since(1.3)
     def alias(self, *alias, **kwargs):

From 83dc3d5a5e95920f83a57c1375776545a6291071 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Mon, 3 Apr 2017 10:35:03 -0700
Subject: [PATCH 4/8] Tuned whitespace

---
 python/pyspark/sql/column.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index a3943760a97c0..af14cec1d602f 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -250,17 +250,17 @@ def __iter__(self):
         raise TypeError("Column is not iterable")
 
     # string methods
-    _rlike_doc = """ Return a Boolean :class:`Column` based on a regex match.\n
+    _rlike_doc = """ Return a Boolean :class:`Column` based on a regex match.
                      :param other: an extended regex expression
 
-                     >>> df.filter( df.name.rlike('ice$') ).collect()
+                     >>> df.filter(df.name.rlike('ice$')).collect()
                      [Row(name=u'Alice', age=1)]
                  """
-    _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.\n
+    _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.
                     :param other: a SQL LIKE pattern\n
                     See :func:`rlike` for a regex version
 
-                    >>> df.filter( df.name.like('Al%') ).collect()
+                    >>> df.filter(df.name.like('Al%')).collect()
                     [Row(name=u'Alice', age=1)]
                 """
     _startswith_doc = """ Return a Boolean :class:`Column` based on a string match.\n
@@ -333,24 +333,24 @@ def isin(self, *cols):
     desc = _unary_op("desc", "Returns a sort expression based on the"
                              " descending order of the given column name.")
 
-    _isNull_doc = ''' True if the current expression is null. Often combined with 
+    _isNull_doc = """ True if the current expression is null. Often combined with 
                       :func:`DataFrame.filter` to select rows with null values.
 
                       >>> df2.collect()
                       [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
-                      >>> df2.filter( df2.height.isNull ).collect()
+                      >>> df2.filter(df2.height.isNull).collect()
                       [Row(name=u'Alice', height=None)]
-                  '''
-    _isNotNull_doc = ''' True if the current expression is null. Often combined with 
+                  """
+    _isNotNull_doc = """ True if the current expression is null. Often combined with 
                          :func:`DataFrame.filter` to select rows with non-null values.
 
                          >>> df2.collect()
                          [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
-                         >>> df2.filter( df2.height.isNotNull ).collect()
+                         >>> df2.filter(df2.height.isNotNull).collect()
                          [Row(name=u'Tom', height=80)]
-                     '''
+                     """
 
-    isNull = _unary_op("isNull", _isNull_doc ) 
+    isNull = _unary_op("isNull", _isNull_doc) 
     isNotNull = _unary_op("isNotNull", _isNotNull_doc)
 
     @since(1.3)

From 5afc442e33c7195fb0918773103cb939da8b1aee Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Wed, 5 Apr 2017 11:05:56 -0700
Subject: [PATCH 5/8] Fixed formatting issues: parentheses, block quotes, and
 newline

---
 python/pyspark/sql/column.py | 101 +++++++++++++++++++----------------
 1 file changed, 56 insertions(+), 45 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index af14cec1d602f..7859c74805323 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -250,35 +250,44 @@ def __iter__(self):
         raise TypeError("Column is not iterable")
 
     # string methods
-    _rlike_doc = """ Return a Boolean :class:`Column` based on a regex match.
-                     :param other: an extended regex expression
-
-                     >>> df.filter(df.name.rlike('ice$')).collect()
-                     [Row(name=u'Alice', age=1)]
-                 """
-    _like_doc = """ Return a Boolean :class:`Column` based on a SQL LIKE match.
-                    :param other: a SQL LIKE pattern\n
-                    See :func:`rlike` for a regex version
-
-                    >>> df.filter(df.name.like('Al%')).collect()
-                    [Row(name=u'Alice', age=1)]
-                """
-    _startswith_doc = """ Return a Boolean :class:`Column` based on a string match.\n
-                          :param other: string at end of line (do not use a regex `^`)
-
-                          >>> df.filter(df.name.startswith('Al')).collect()
-                          [Row(name=u'Alice', age=1)]
-                          >>> df.filter(df.name.startswith('^Al')).collect()
-                          []
-                     """
-    _endswith_doc = """ Return a Boolean :class:`Column` based on matching end of string.\n
-                        :param other: string at end of line (do not use a regex `$`)
-
-                        >>> df.filter(df.name.endswith('ice')).collect()
-                        [Row(name=u'Alice', age=1)]
-                        >>> df.filter(df.name.endswith('ice$')).collect()
-                        []
-                    """
+    _rlike_doc = """
+    Return a Boolean :class:`Column` based on a regex match.
+
+    :param other: an extended regex expression
+
+    >>> df.filter(df.name.rlike('ice$')).collect()
+    [Row(name=u'Alice', age=1)]
+    """
+    _like_doc = """
+    Return a Boolean :class:`Column` based on a SQL LIKE match.
+
+    :param other: a SQL LIKE pattern
+
+    See :func:`rlike` for a regex version
+
+    >>> df.filter(df.name.like('Al%')).collect()
+    [Row(name=u'Alice', age=1)]
+    """
+    _startswith_doc = """
+    Return a Boolean :class:`Column` based on a string match.
+
+    :param other: string at end of line (do not use a regex `^`)
+
+    >>> df.filter(df.name.startswith('Al')).collect()
+    [Row(name=u'Alice', age=1)]
+    >>> df.filter(df.name.startswith('^Al')).collect()
+    []
+    """
+    _endswith_doc = """
+    Return a Boolean :class:`Column` based on matching end of string.
+
+    :param other: string at end of line (do not use a regex `$`)
+
+    >>> df.filter(df.name.endswith('ice')).collect()
+    [Row(name=u'Alice', age=1)]
+    >>> df.filter(df.name.endswith('ice$')).collect()
+    []
+    """
 
     contains = _bin_op("contains")
     rlike = _bin_op("rlike", _rlike_doc)
@@ -333,24 +342,26 @@ def isin(self, *cols):
     desc = _unary_op("desc", "Returns a sort expression based on the"
                              " descending order of the given column name.")
 
-    _isNull_doc = """ True if the current expression is null. Often combined with 
-                      :func:`DataFrame.filter` to select rows with null values.
-
-                      >>> df2.collect()
-                      [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
-                      >>> df2.filter(df2.height.isNull).collect()
-                      [Row(name=u'Alice', height=None)]
-                  """
-    _isNotNull_doc = """ True if the current expression is null. Often combined with 
-                         :func:`DataFrame.filter` to select rows with non-null values.
+    _isNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with null values.
 
-                         >>> df2.collect()
-                         [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
-                         >>> df2.filter(df2.height.isNotNull).collect()
-                         [Row(name=u'Tom', height=80)]
-                     """
+    >>> df2.collect()
+    [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+    >>> df2.filter(df2.height.isNull).collect()
+    [Row(name=u'Alice', height=None)]
+    """
+    _isNotNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with non-null values.
+
+    >>> df2.collect()
+    [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+    >>> df2.filter(df2.height.isNotNull).collect()
+    [Row(name=u'Tom', height=80)]
+    """
 
-    isNull = _unary_op("isNull", _isNull_doc) 
+    isNull = _unary_op("isNull", _isNull_doc)
     isNotNull = _unary_op("isNotNull", _isNotNull_doc)
 
     @since(1.3)

From 0e3007dd372f0d3f2d249ff503f1a35a60098545 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Tue, 11 Apr 2017 11:20:48 -0700
Subject: [PATCH 6/8] Fixed documentation to match unit tests

---
 python/pyspark/sql/column.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 7859c74805323..6b100c0dd48c5 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -256,7 +256,7 @@ def __iter__(self):
     :param other: an extended regex expression
 
     >>> df.filter(df.name.rlike('ice$')).collect()
-    [Row(name=u'Alice', age=1)]
+    [Row(age=2, name=u'Alice')]
     """
     _like_doc = """
     Return a Boolean :class:`Column` based on a SQL LIKE match.
@@ -266,7 +266,7 @@ def __iter__(self):
     See :func:`rlike` for a regex version
 
     >>> df.filter(df.name.like('Al%')).collect()
-    [Row(name=u'Alice', age=1)]
+    [Row(age=2, name=u'Alice')]
     """
     _startswith_doc = """
     Return a Boolean :class:`Column` based on a string match.
@@ -274,7 +274,7 @@ def __iter__(self):
     :param other: string at end of line (do not use a regex `^`)
 
     >>> df.filter(df.name.startswith('Al')).collect()
-    [Row(name=u'Alice', age=1)]
+    [Row(age=2, name=u'Alice')]
     >>> df.filter(df.name.startswith('^Al')).collect()
     []
     """
@@ -284,7 +284,7 @@ def __iter__(self):
     :param other: string at end of line (do not use a regex `$`)
 
     >>> df.filter(df.name.endswith('ice')).collect()
-    [Row(name=u'Alice', age=1)]
+    [Row(age=2, name=u'Alice')]
     >>> df.filter(df.name.endswith('ice$')).collect()
     []
     """
@@ -346,19 +346,17 @@ def isin(self, *cols):
     True if the current expression is null. Often combined with
     :func:`DataFrame.filter` to select rows with null values.
 
-    >>> df2.collect()
-    [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
     >>> df2.filter(df2.height.isNull).collect()
-    [Row(name=u'Alice', height=None)]
+    [Row(height=None, name=u'Alice')]
     """
     _isNotNull_doc = """
     True if the current expression is null. Often combined with
     :func:`DataFrame.filter` to select rows with non-null values.
 
-    >>> df2.collect()
-    [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
     >>> df2.filter(df2.height.isNotNull).collect()
-    [Row(name=u'Tom', height=80)]
+    [Row(height=80, name=u'Tom')]
     """
 
     isNull = _unary_op("isNull", _isNull_doc)

From e785dbc9bd32a45d0e415f58ee8446cd03276b10 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Thu, 13 Apr 2017 16:51:09 -0700
Subject: [PATCH 7/8] Added imports and fixed functions

---
 python/pyspark/sql/column.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 6b100c0dd48c5..e7c7aed796e50 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -346,16 +346,18 @@ def isin(self, *cols):
     True if the current expression is null. Often combined with
     :func:`DataFrame.filter` to select rows with null values.
 
+    >>> from pyspark.sql import Row
     >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
-    >>> df2.filter(df2.height.isNull).collect()
+    >>> df2.filter(df2.height.isNull()).collect()
     [Row(height=None, name=u'Alice')]
     """
     _isNotNull_doc = """
     True if the current expression is null. Often combined with
     :func:`DataFrame.filter` to select rows with non-null values.
 
+    >>> from pyspark.sql import Row
     >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
-    >>> df2.filter(df2.height.isNotNull).collect()
+    >>> df2.filter(df2.height.isNotNull()).collect()
     [Row(height=80, name=u'Tom')]
     """
 

From b52765f5ef156862bd3cc4793a0d3fbd4d334449 Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Thu, 20 Apr 2017 13:19:20 -0700
Subject: [PATCH 8/8] Added ignore_unicode_prefix to avoid string error

---
 python/pyspark/sql/column.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index e7c7aed796e50..46c1707cb6c37 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -290,10 +290,10 @@ def __iter__(self):
     """
 
     contains = _bin_op("contains")
-    rlike = _bin_op("rlike", _rlike_doc)
-    like = _bin_op("like", _like_doc)
-    startswith = _bin_op("startsWith", _startswith_doc)
-    endswith = _bin_op("endsWith", _endswith_doc)
+    rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))
+    like = ignore_unicode_prefix(_bin_op("like", _like_doc))
+    startswith = ignore_unicode_prefix(_bin_op("startsWith", _startswith_doc))
+    endswith = ignore_unicode_prefix(_bin_op("endsWith", _endswith_doc))
 
     @ignore_unicode_prefix
     @since(1.3)
@@ -361,8 +361,8 @@ def isin(self, *cols):
     [Row(height=80, name=u'Tom')]
     """
 
-    isNull = _unary_op("isNull", _isNull_doc)
-    isNotNull = _unary_op("isNotNull", _isNotNull_doc)
+    isNull = ignore_unicode_prefix(_unary_op("isNull", _isNull_doc))
+    isNotNull = ignore_unicode_prefix(_unary_op("isNotNull", _isNotNull_doc))
 
     @since(1.3)
     def alias(self, *alias, **kwargs):