### Using udf decorator

In [0]:
@udf("double")
def multiply_2(v):
  return v * 2

In [0]:
from pyspark.sql.functions import col,rand
df = spark.range(0,1000).withColumn('v',(col('id')*rand()*100).cast('double'))
df = df.withColumn('v_2',multiply_2(df.v))
display(df.take(10))

id,v,v_2
0,0.0,0.0
1,30.797280132706284,61.59456026541257
2,34.9986172455228,69.9972344910456
3,64.49880226644699,128.99760453289397
4,206.97821282555523,413.95642565111046
5,227.7068830651621,455.4137661303241
6,180.82002439991217,361.64004879982434
7,190.67145336851848,381.34290673703697
8,786.5968604563585,1573.193720912717
9,59.660617950389415,119.32123590077885


### Alternate syntax register throught Sparksession

In [0]:
from pyspark.sql.types import DoubleType

In [0]:
def add_one(v):
  return v + 1

spark.udf.register("plus_one",add_one,DoubleType())

display(df.selectExpr('id','plus_one(v) as v').take(10))

id,v
0,1.0
1,31.797280132706284
2,35.9986172455228
3,65.49880226644699
4,207.97821282555523
5,228.7068830651621
6,181.82002439991217
7,191.67145336851848
8,787.5968604563585
9,60.660617950389415


### pandas vectorized udf

In [0]:
from pyspark.sql.functions import pandas_udf


@pandas_udf('double')
def subtract_one(v):
  return v -1

display(df.select(col('id'),subtract_one(col('v')).alias('v')).take(10))

id,v
0,-1.0
1,29.797280132706284
2,33.9986172455228
3,63.49880226644699
4,205.97821282555523
5,226.7068830651621
6,179.82002439991217
7,189.67145336851848
8,785.5968604563585
9,58.660617950389415
