In [0]:
from pyspark.sql.functions import first

In [0]:
from pyspark.sql.types import MapType, StringType, IntegerType
from pyspark.sql import Row

# A list of rows, each with a map
data = [
    Row(name="Albert", properties={"age": 25, "height": 12, "Gender": "Male", "Country": "India", "eyeColor": "blue"}),
    Row(name="Bobby", properties={"age": 30, "height": 15, "Gender": "Male", "Country": "India", "eyeColor": None}),
    Row(name="Anand", properties={"age": 28, "height": 18, "Gender": "Male", "Country": "India", "eyeColor": "black"}),
    Row(name="Chandra", properties={"age": None, "height": 22, "Gender": "Male", "Country": "India", "eyeColor": "white"}),
    Row(name="Seetha", properties={"age": 45, "height": 32, "Gender": "Male", "Country": "India", "eyeColor": "yellow"}),
    Row(name="Varun", properties={"age": 32, "height": 10, "Gender": "Male", "Country": "India", "eyeColor": None}),
    Row(name="Bobby", properties={"age": None, "height": 19, "Gender": "Male", "Country": "India", "eyeColor": "blue"}),
    Row(name="Bobby", properties={"age": 30, "height": 8, "Gender": "Male", "Country": "India", "eyeColor": "brown"}),
    Row(name="Anand", properties={"age": None, "height": 16, "Gender": "Male", "Country": "India", "eyeColor": None}),
    Row(name="Chandra", properties={"age": 35, "height": 13, "Gender": "Male", "Country": "India", "eyeColor": "white"}),
    Row(name="Seetha", properties={"age": 45, "height": 27, "Gender": "Male", "Country": "India", "eyeColor": "yellow"}),
    Row(name="Anand", properties={"age": 32, "height": 29, "Gender": "Male", "Country": "India", "eyeColor": None})
]

# Define the schema
schema = ["name", "properties"]

# Create a DataFrame
df = spark.createDataFrame(data, schema)

display(df)

name,properties
Albert,"Map(Country -> India, height -> 12, age -> 25, Gender -> Male, eyeColor -> blue)"
Bobby,"Map(Country -> India, height -> 15, age -> 30, Gender -> Male, eyeColor -> null)"
Anand,"Map(Country -> India, height -> 18, age -> 28, Gender -> Male, eyeColor -> black)"
Chandra,"Map(Country -> India, height -> 22, age -> null, Gender -> Male, eyeColor -> white)"
Seetha,"Map(Country -> India, height -> 32, age -> 45, Gender -> Male, eyeColor -> yellow)"
Varun,"Map(Country -> India, height -> 10, age -> 32, Gender -> Male, eyeColor -> null)"
Bobby,"Map(Country -> India, height -> 19, age -> null, Gender -> Male, eyeColor -> blue)"
Bobby,"Map(Country -> India, height -> 8, age -> 30, Gender -> Male, eyeColor -> brown)"
Anand,"Map(Country -> India, height -> 16, age -> null, Gender -> Male, eyeColor -> null)"
Chandra,"Map(Country -> India, height -> 13, age -> 35, Gender -> Male, eyeColor -> white)"


#### **1) Using `select()` and `col()` Functions**

In [0]:
from pyspark.sql.functions import col

# Select the name column, and the entries of the map as separate columns
df_col = df.select(
    "name",
    col("properties")["age"].alias("age"),
    col("properties")["eyeColor"].alias("eyeColor"),
    col("properties")["height"].alias("height"),
    col("properties")["Gender"].alias("Gender"),
    col("properties")["Country"].alias("Country")
)

display(df_col)

name,age,eyeColor,height,Gender,Country
Albert,25.0,blue,12,Male,India
Bobby,30.0,,15,Male,India
Anand,28.0,black,18,Male,India
Chandra,,white,22,Male,India
Seetha,45.0,yellow,32,Male,India
Varun,32.0,,10,Male,India
Bobby,,blue,19,Male,India
Bobby,30.0,brown,8,Male,India
Anand,,,16,Male,India
Chandra,35.0,white,13,Male,India


#### **2) Using `explode()` Function**

- To turn each **key-value pair** into a **separate row** and then **pivot** the data to create columns.

In [0]:
from pyspark.sql.functions import explode

# Explode the map into a new row for each key-value pair
df_exploded = df.select("name", explode("properties"))

display(df_exploded)

name,key,value
Albert,Country,India
Albert,Gender,Male
Albert,eyeColor,blue
Albert,age,25
Albert,height,12
Bobby,Country,India
Bobby,Gender,Male
Bobby,eyeColor,
Bobby,age,30
Bobby,height,15


In [0]:
# Pivot the DataFrame to have distinct keys as separate columns
df_pivoted = df_exploded.groupBy("name").pivot("key").agg(first("value"))
display(df_pivoted)

name,Country,Gender,age,eyeColor,height
Albert,India,Male,25,blue,12
Anand,India,Male,28,black,18
Bobby,India,Male,30,blue,15
Chandra,India,Male,35,white,22
Seetha,India,Male,45,yellow,32
Varun,India,Male,32,,10


#### **3) Handling Null Values**

- Null values in the map can cause issues when trying to access map keys directly. To deal with nulls, you can use the `coalesce()` function in combination with `lit()` to provide a default value where necessary

In [0]:
from pyspark.sql.functions import coalesce, lit

df_with_defaults = df.select(
    "name",
    coalesce(col("properties")["age"], lit(0)).alias("age"),
    coalesce(col("properties")["eyeColor"], lit("unknown")).alias("eyeColor")
)

display(df_with_defaults)

name,age,eyeColor
Albert,25,blue
Bobby,30,unknown
Anand,28,black
Chandra,0,white
Seetha,45,yellow
Varun,32,unknown
Bobby,0,blue
Bobby,30,brown
Anand,0,unknown
Chandra,35,white
