From 279d6207be25cead5ef5b1c04f1719bd15f2dc9a Mon Sep 17 00:00:00 2001 From: asokadiggs Date: Mon, 28 Sep 2015 13:51:03 -0700 Subject: [PATCH 1/2] Update dataframe.py Documentation for dropDuplicates() and drop_duplicates() is one and the same. Resolved the error in the example for drop_duplicates using the same approach used for groupby and groupBy, by indicating that dropDuplicates and drop_duplicates are aliases. --- python/pyspark/sql/dataframe.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b09422aadef8e..4b8b7a58aaad6 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -930,6 +930,8 @@ def subtract(self, other): def dropDuplicates(self, subset=None): """Return a new :class:`DataFrame` with duplicate rows removed, optionally only considering certain columns. + + :func:`drop_duplicates` is an alias for :func:`dropDuplicates`. >>> from pyspark.sql import Row >>> df = sc.parallelize([ \ From 74324c49dcb020fe5674a4037f99aabb241bbc4f Mon Sep 17 00:00:00 2001 From: asokadiggs Date: Tue, 29 Sep 2015 11:11:43 -0700 Subject: [PATCH 2/2] [SPARK-10782][Python] Update dropDuplicates documentation Documentation for dropDuplicates() and drop_duplicates() is one and the same. Resolved the error in the example for drop_duplicates using the same approach used for groupby and groupBy, by indicating that dropDuplicates and drop_duplicates are aliases. --- python/pyspark/sql/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 4b8b7a58aaad6..033b31983ffac 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -930,7 +930,7 @@ def subtract(self, other): def dropDuplicates(self, subset=None): """Return a new :class:`DataFrame` with duplicate rows removed, optionally only considering certain columns. - + :func:`drop_duplicates` is an alias for :func:`dropDuplicates`. >>> from pyspark.sql import Row