In [0]:
spark

---------- Create DataFrame

In [0]:
emp_data = [
(1,'manish',26,'m',20000,'india','IT'),
(2,'rahul',None,'m',40000,'germany','engineering'),
(3,'pawan',12,'m',60000,'india','sales'),
(4,'roshini',44,'f',None,'uk','engineering'),
(5,'raushan',35,'f',70000,'india','sales'),
(6,None,29,None,200000,'uk','IT'),
(7,'adam',37,'m',65000,'us','IT'),
(8,'chris',16,'m',40000,'us','sales'),
(None,None,None,None,None,None,None),
(7,'adam',37,'m',65000,'us','IT')
]
schema = ['id', 'name', 'age', 'gender_code', 'salary', 'country', 'dept']
df = spark.createDataFrame(emp_data, schema)
df.show()

+----+-------+----+-----------+------+-------+-----------+
|  id|   name| age|gender_code|salary|country|       dept|
+----+-------+----+-----------+------+-------+-----------+
|   1| manish|  26|          m| 20000|  india|         IT|
|   2|  rahul|null|          m| 40000|germany|engineering|
|   3|  pawan|  12|          m| 60000|  india|      sales|
|   4|roshini|  44|          f|  null|     uk|engineering|
|   5|raushan|  35|          f| 70000|  india|      sales|
|   6|   null|  29|       null|200000|     uk|         IT|
|   7|   adam|  37|          m| 65000|     us|         IT|
|   8|  chris|  16|          m| 40000|     us|      sales|
|null|   null|null|       null|  null|   null|       null|
|   7|   adam|  37|          m| 65000|     us|         IT|
+----+-------+----+-----------+------+-------+-----------+



---------- Use When-Otherwise Statement

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *


# use when-otherwise statement only..
df.withColumn('is_adult', when(col('age')<18, 0).when(col('age')>=18, 1).otherwise('NoValue')).show()

# use when-otherwise statement but first set null field with default value.
df1 = df.withColumn('age', when((col('name').isNotNull()) & (col('age').isNull()), lit(18)).otherwise(col('age')))
df1.show()
df1.withColumn('is_adult', when(col('age')<18, 0).when(col('age')>=18, 1).otherwise('NoValue')).show()

# use when-otherwise with multiple conditions..
df.withColumn('age_cat', when((col('age')>=1) & (col('age')<18), 'minor')
                        .when((col('age')>=18) & (col('age')<30), 'mid')
                        .when(col('age')>=30, 'major').otherwise('NoValue')).show()

+----+-------+----+-----------+------+-------+-----------+--------+
|  id|   name| age|gender_code|salary|country|       dept|is_adult|
+----+-------+----+-----------+------+-------+-----------+--------+
|   1| manish|  26|          m| 20000|  india|         IT|       1|
|   2|  rahul|null|          m| 40000|germany|engineering| NoValue|
|   3|  pawan|  12|          m| 60000|  india|      sales|       0|
|   4|roshini|  44|          f|  null|     uk|engineering|       1|
|   5|raushan|  35|          f| 70000|  india|      sales|       1|
|   6|   null|  29|       null|200000|     uk|         IT|       1|
|   7|   adam|  37|          m| 65000|     us|         IT|       1|
|   8|  chris|  16|          m| 40000|     us|      sales|       0|
|null|   null|null|       null|  null|   null|       null| NoValue|
|   7|   adam|  37|          m| 65000|     us|         IT|       1|
+----+-------+----+-----------+------+-------+-----------+--------+

+----+-------+----+-----------+------+-------+-

------------ Case When With SQL

In [0]:
# use case-when statement with spark SQL
df.createOrReplaceTempView('df_tbl')

spark.sql("""
          select *,
          case
            when age < 18 then 0
            when age >= 18 then 1
            else 'NoValue'
          end as is_active
          from df_tbl
          """).show()


+----+-------+----+-----------+------+-------+-----------+---------+
|  id|   name| age|gender_code|salary|country|       dept|is_active|
+----+-------+----+-----------+------+-------+-----------+---------+
|   1| manish|  26|          m| 20000|  india|         IT|        1|
|   2|  rahul|null|          m| 40000|germany|engineering|  NoValue|
|   3|  pawan|  12|          m| 60000|  india|      sales|        0|
|   4|roshini|  44|          f|  null|     uk|engineering|        1|
|   5|raushan|  35|          f| 70000|  india|      sales|        1|
|   6|   null|  29|       null|200000|     uk|         IT|        1|
|   7|   adam|  37|          m| 65000|     us|         IT|        1|
|   8|  chris|  16|          m| 40000|     us|      sales|        0|
|null|   null|null|       null|  null|   null|       null|  NoValue|
|   7|   adam|  37|          m| 65000|     us|         IT|        1|
+----+-------+----+-----------+------+-------+-----------+---------+

