Skip to content

Commit

Permalink
revert java UDF changes (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
cloud-fan committed Jul 25, 2017
1 parent 8422c42 commit 1b3aa22
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 366 deletions.
24 changes: 8 additions & 16 deletions python/pyspark/sql/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.readwriter import DataFrameReader
from pyspark.sql.streaming import DataStreamReader
from pyspark.sql.types import DoubleType, IntegerType, Row, StringType
from pyspark.sql.types import IntegerType, Row, StringType
from pyspark.sql.utils import install_exception_handler

__all__ = ["SQLContext", "HiveContext", "UDFRegistration"]
Expand Down Expand Up @@ -208,37 +208,29 @@ def registerFunction(self, name, f, returnType=StringType()):

@ignore_unicode_prefix
@since(2.1)
def registerJavaFunction(self, name, javaClassName, returnType=None, deterministic=True):
def registerJavaFunction(self, name, javaClassName, returnType=None):
"""Register a java UDF so it can be used in SQL statements.
In addition to a name and the function itself, the return type can be optionally specified.
When the return type is not specified we would infer it via reflection.
:param name: name of the UDF.
:param javaClassName: fully qualified name of java class.
:param returnType: a :class:`pyspark.sql.types.DataType` object.
:param deterministic: a flag indicating if the UDF is deterministic. Deterministic UDF
returns same result each time it is invoked with a particular input.
:param name: name of the UDF
:param javaClassName: fully qualified name of java class
:param returnType: a :class:`pyspark.sql.types.DataType` object
>>> sqlContext.registerJavaFunction("javaStringLength",
... "test.org.apache.spark.sql.JavaStringLength", IntegerType())
>>> sqlContext.sql("SELECT javaStringLength('test')").collect()
[Row(UDF:javaStringLength(test)=4)]
[Row(UDF(test)=4)]
>>> sqlContext.registerJavaFunction("javaStringLength2",
... "test.org.apache.spark.sql.JavaStringLength")
>>> sqlContext.sql("SELECT javaStringLength2('test')").collect()
[Row(UDF:javaStringLength2(test)=4)]
>>> sqlContext.registerJavaFunction("javaRand",
... "test.org.apache.spark.sql.JavaRandUDF", DoubleType(), deterministic=False)
>>> sqlContext.sql("SELECT javaRand(3)").collect() # doctest: +SKIP
[Row(UDF:javaRand(3)=3.12345)]
[Row(UDF(test)=4)]
"""
jdt = None
if returnType is not None:
jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json())
self.sparkSession._jsparkSession.udf().registerJava(
name, javaClassName, jdt, deterministic)
self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt)

@ignore_unicode_prefix
@since(2.3)
Expand Down

0 comments on commit 1b3aa22

Please sign in to comment.