In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
import numpy as np
import pandas as pd

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, BooleanType, StringType, ArrayType, MapType, StructField, StructType

In [2]:
spark = SparkSession.builder \
    .master("local[*]") \
    .appName("sparksql") \
    .config("spark.jars", "/Users/liuf/scala/unicom/target/unicom.jar") \
    .getOrCreate()

## function

In [3]:
def register_java_udf(name, return_type, package="com.meritco.spark.udf"):
    """
    spark.udf.registerJavaFunction("toUpperCase", "com.meritco.spark.udf.toUpperCase", StringType())

    register_java_udf("intToLetter", ArrayType(StringType()))
    """
    class_name = name if name.find("_") <= 0 else name.split("_")[0]
    spark.udf.registerJavaFunction(name, f"{package}.{class_name}", return_type)

UDFs = {
    "intToLetter": ArrayType(StringType()),
    "reverseMap": MapType(IntegerType(), ArrayType(IntegerType())),
    "mapFromEntries": MapType(IntegerType(), ArrayType(IntegerType())),
    
    "filterMapByKey_Str": MapType(IntegerType(), StringType()),
    "filterMapByKey_Int": MapType(IntegerType(), IntegerType()),
    "filterMapByStartEnd_Str": MapType(IntegerType(), StringType()),
    "filterMapByStartEnd_Int": MapType(IntegerType(), IntegerType()),
    
    "arrayFlattenDistinct": ArrayType(IntegerType()),
    "arrayFilterStartEnd": ArrayType(IntegerType()),
    
    "daySetTo52": StringType(),
    "daySetFrom10": ArrayType(IntegerType()),
    "daycountFrom10": IntegerType(),
    "appDaysTo52": MapType(IntegerType(), StringType()),
    "appDaycountMapFrom52": MapType(IntegerType(), IntegerType()),
    
    "hostToIndex": IntegerType(),
    "collectionEqual": BooleanType(),
}

for f, t in UDFs.items():
    register_java_udf(f, t)

In [4]:
spark.sql("select daySetFrom10(1610612736) as dayset").show()

+------+
|dayset|
+------+
|[1, 2]|
+------+

