In [1]:
import findspark
findspark.init()
findspark.find()

'/usr/local/opt/apache-spark/libexec'

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

spark = (
            SparkSession
                .builder
                .appName("UdfApp")
                .master("local[4]")
    
                .config("spark.dynamicAllocation.enabled", "false")
                .config("spark.sql.adaptive.enabled", "false")
    
                .getOrCreate()
        )

sc = spark.sparkContext

spark

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


23/05/02 12:27:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/05/02 12:27:53 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
23/05/02 12:27:53 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
23/05/02 12:27:53 WARN Utils: Service 'SparkUI' could not bind on port 4042. Attempting port 4043.
23/05/02 12:27:53 WARN Utils: Service 'SparkUI' could not bind on port 4043. Attempting port 4044.
23/05/02 12:27:53 WARN Utils: Service 'SparkUI' could not bind on port 4044. Attempting port 4045.


In [3]:
from IPython.display import *
display(HTML("<style>pre { white-space: pre !important; }</style>"))

In [4]:
# Read Cabs file
cabsDF = (
              spark
                .read
                .option("header", "true")
                .option("inferSchema", "true")
    
                .csv("./../files/input/Cabs.csv")
         )


# Create temp view
cabsDF.createOrReplaceTempView("Cabs")


cabsDF.show(truncate=False)

                                                                                

+---------+--------------------+------------------------+----------------+------+-------------------+-----------------+--------------------+-----------+-----------+---------------+--------------------------+--------------------------------------------+---------------+
|CabNumber|VehicleLicenseNumber|Name                    |LicenseType     |Active|PermitLicenseNumber|VehicleVinNumber |WheelchairAccessible|VehicleYear|VehicleType|TelephoneNumber|Website                   |Address                                     |LastDateUpdated|
+---------+--------------------+------------------------+----------------+------+-------------------+-----------------+--------------------+-----------+-----------+---------------+--------------------------+--------------------------------------------+---------------+
|T802127C |C19641              |ABCON INC.              |OWNER MUST DRIVE|YES   |null               |5TDBK3EH0DS268018|null                |2016       |null       |(718)438-1100  |null         

### Create a function to convert case

In [5]:
def convertCase(str):
    
    result = ""
    
    nameWordsArray = str.split(",")
    
    for nameWord in nameWordsArray:        
       result = (result
                    + nameWord[0:1].upper()             # Ex- for word 'MOHIT', returns=> 'M'
                    + nameWord[1:len(nameWord)].lower() # Ex- for word 'MOHIT', returns=> 'ohit, '
                    + ", "   
                )
    
    result = result[0:len(result) - 2]
                                        # Ex- for name 'Batra, Mohit, ' returns => 'Batra, Mohit'
    
    return result 

### Option 1: Register function as a User Defined Function (UDF)

This registration option is for using UDF in Python/Scala

In [6]:
convertCaseUdf = udf( lambda str: convertCase(str), StringType() )

### Use UDF in DataFrame code

In [7]:
(
    cabsDF
        .select(
                    "Name",
            
                    convertCaseUdf(col("Name")).alias("Name_ConvertedCase")
               )
    
        # .withColumn ("Name_ConvertedCase", convertCaseUdf(col("Name")))
    
).show(truncate=False)

[Stage 3:>                                                          (0 + 1) / 1]

+------------------------+------------------------+
|Name                    |Name_ConvertedCase      |
+------------------------+------------------------+
|ABCON INC.              |Abcon inc.              |
|ACCEPTABLE TAXI LLC     |Acceptable taxi llc     |
|ALLIS CAB CORP          |Allis cab corp          |
|BENE CAB CORP           |Bene cab corp           |
|BOULOS TAXI CORP.       |Boulos taxi corp.       |
|CACERES,JAIME,P         |Caceres, Jaime, P       |
|CALCIUM ONE SERVICE INC.|Calcium one service inc.|
|CHARLES,WILBERT         |Charles, Wilbert        |
|CHAWKI,MICHAEL          |Chawki, Michael         |
|CHRYSOVALANTOU CORP,    |Chrysovalantou corp,    |
|COFI BOAT CORP.         |Cofi boat corp.         |
|DEKEL TAXI CAB CORP     |Dekel taxi cab corp     |
|FLORIAN & ROBERT INC    |Florian & robert inc    |
|GART CAB CORP           |Gart cab corp           |
|GAUTHIER,JACQUES        |Gauthier, Jacques       |
|GEORGAKOPOULOS, GEORGIA |Georgakopoulos,  georgia|
|GUJAR CAB C

                                                                                

### Option 2: Register function as a User Defined Function (UDF)

This registration option is for using UDF in SQL

In [8]:
spark.udf.register("convertCaseSqlUdf", convertCase, StringType())

<function __main__.convertCase(str)>

### Use UDF in SQL query

In [9]:
spark.sql("""

    SELECT Name
    
         , convertCaseSqlUdf(Name) AS Name_ConvertedCase
    
    FROM Cabs

""").show(truncate=False)

+------------------------+------------------------+
|Name                    |Name_ConvertedCase      |
+------------------------+------------------------+
|ABCON INC.              |Abcon inc.              |
|ACCEPTABLE TAXI LLC     |Acceptable taxi llc     |
|ALLIS CAB CORP          |Allis cab corp          |
|BENE CAB CORP           |Bene cab corp           |
|BOULOS TAXI CORP.       |Boulos taxi corp.       |
|CACERES,JAIME,P         |Caceres, Jaime, P       |
|CALCIUM ONE SERVICE INC.|Calcium one service inc.|
|CHARLES,WILBERT         |Charles, Wilbert        |
|CHAWKI,MICHAEL          |Chawki, Michael         |
|CHRYSOVALANTOU CORP,    |Chrysovalantou corp,    |
|COFI BOAT CORP.         |Cofi boat corp.         |
|DEKEL TAXI CAB CORP     |Dekel taxi cab corp     |
|FLORIAN & ROBERT INC    |Florian & robert inc    |
|GART CAB CORP           |Gart cab corp           |
|GAUTHIER,JACQUES        |Gauthier, Jacques       |
|GEORGAKOPOULOS, GEORGIA |Georgakopoulos,  georgia|
|GUJAR CAB C