From c671e4fe7f7a2dd08048e96c6c7c0a6485d063b9 Mon Sep 17 00:00:00 2001 From: Kevin Cox Date: Tue, 12 Apr 2016 17:31:37 -0400 Subject: [PATCH] Expose null checking function to Python land. This allows efficiently mapping a column that shouldn't contain any nulls to a columns that Spark knows doesn't have any nulls. --- python/pyspark/sql/functions.py | 6 ++++++ .../src/main/scala/org/apache/spark/sql/functions.scala | 2 ++ 2 files changed, 8 insertions(+) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 5017ab5b3646d..92755636688be 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -138,6 +138,10 @@ def _(): ' eliminated.' } +_functions_1_8 = { + 'assertNotNull': 'An identity function that throws an error if the argument is null' +} + # math functions that take two arguments as input _binary_mathfunctions = { 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + @@ -182,6 +186,8 @@ def _(): globals()[_name] = since(1.6)(_create_window_function(_name, _doc)) for _name, _doc in _functions_1_6.items(): globals()[_name] = since(1.6)(_create_function(_name, _doc)) +for _name, _doc in _functions_1_8.items(): + globals()[_name] = since(1.8)(_create_function(_name, _doc)) del _name, _doc diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 223122300dbb3..56d45996eb4e1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -970,6 +970,8 @@ object functions { */ def isnull(e: Column): Column = withExpr { IsNull(e.expr) } + def assertNotNull (e: Column): Column = withExpr { AssertNotNull(e.expr, Seq(e.toString)) } + /** * A column expression that generates monotonically increasing 64-bit integers. *