### Two types of functions
- buit-in function - round,avg,sum,max,min
- User Defined function - allow custom logic when built-in SQL/Spark functions are not enough.

### Two types
 - Temporary function
 - Permanent function

In [0]:

df = spark.table("inceptez_catalog.inputdb.tbltxns")
#df = spark.read.format("csv").option("inferSchema",True).load("/Volumes/inceptez_catalog/inputdb/customerdata/txns")
#df = spark.sql("select * from inceptez_catalog.inputdb.tbltxns")
display(df)

In [0]:
def calculate_discount(amount):
    if amount > 100:
        disamt = amount * 0.1
    elif amount > 50:
        disamt = amount * 0.05
    else:
        disamt = 0.0
    return disamt

def calculate_sellingprice(amt, dis):
    sellprice = amt - dis
    return float(sellprice)

In [0]:
from pyspark.sql.functions import udf, col,round as s_round
from pyspark.sql.types import DoubleType

discount_udf = udf(calculate_discount, DoubleType())

#df1 = df.withColumn("discountamt", discount_udf(col("amount")))
df1 = df.withColumn("discountamt", s_round(discount_udf(col("amount")),2))

df1.display()

In [0]:
spark.udf.register("discount", calculate_discount)
df.createOrReplaceTempView("tbltxns")
df1 = spark.sql("select *, round(discount(amount),2) as discountamt from tbltxns")
df1.printSchema()


In [0]:
spark.udf.register("sellprice", calculate_sellingprice)
df1.createOrReplaceTempView("tbltxns")
spark.sql("select *, sellprice(amount,discountamt) as sellprice from tbltxns").display()

In [0]:
%sql
CREATE OR REPLACE FUNCTION inceptez_catalog.inputdb.calculate_discount(amount DOUBLE)
RETURNS DOUBLE
RETURN
    CASE 
        WHEN amount > 50 THEN amount * 0.05     -- 5% discount
        WHEN amount >= 100 THEN amount * 0.1    -- 10% discount
        ELSE 0.0                                  -- no discount
    END;

In [0]:
%sql

select amount,round(inceptez_catalog.inputdb.calculate_discount(amount),2) as sellprice from inceptez_catalog.inputdb.tbltxns

In [0]:
from pyspark.sql.functions import expr
txns_with_discount = df.withColumn("disc_amt", expr("round(inceptez_catalog.inputdb.calculate_discount(amount),2)"))
txns_with_discount.display()

In [0]:
%sql
-- Create a TEMPORARY UDF to calculate discount
CREATE OR REPLACE TEMPORARY FUNCTION calculate_discount_temp(amount DOUBLE)
RETURNS DOUBLE
RETURN
    CASE 
        WHEN amount > 5000 THEN amount * 0.10     -- 10% discount
        WHEN amount >= 2000 THEN amount * 0.05    -- 5% discount
        ELSE 0.0
    END;

In [0]:
%sql
SELECT 
    txnid,
    amount,
    calculate_discount_temp(amount) AS disc_amt
FROM inceptez_catalog.inputdb.tbltxns;

In [0]:
spark.sql("""
SELECT 
    txnid,
    amount,
    calculate_discount_temp(amount) AS disc_amt
FROM inceptez_catalog.inputdb.tbltxns
""").display()

In [0]:
%sql
CREATE OR REPLACE FUNCTION inceptez_catalog.inputdb.value_category_fn(pay_type STRING, amount INT)
RETURNS STRING
LANGUAGE PYTHON
AS $$
if pay_type == "cash" and amount > 200:
    return "HIGH_VALUE"
elif pay_type == "cash" and amount >= 100:
    return "MEDIUM_VALUE"
else:
    return "LOW_VALUE"
$$;


In [0]:
%sql
SELECT 
    txnid,
    amount,
    inceptez_catalog.inputdb.value_category_fn(paytype,amount) AS txn_value
FROM inceptez_catalog.inputdb.tbltxns;

In [0]:
from pyspark.sql.functions import expr
txns_with_discount = df.withColumn("disc_amt", expr("inceptez_catalog.inputdb.value_category_fn(paytype,amount) AS txn_value"))
txns_with_discount.display()