In [0]:
from pyspark.sql.types import StructField, StructType, StringType, MapType

schema = StructType([
    StructField('name', StringType(), True),
    StructField('properties', MapType(StringType(), StringType()), True)
])

In [0]:
Data_Dictionary = [
        ('Ram',{'hair':'brown','eye':'blue'}),
        ('Shyam',{'hair':'black','eye':'black'}),
        ('Amit',{'hair':'grey','eye':None}),
        ('Aupam',{'hair':'red','eye':'black'}),
        ('Rahul',{'hair':'black','eye':'grey'})
        ]

df_map = spark.createDataFrame(data = Data_Dictionary, schema = schema)
df_map.printSchema()
display(df_map)

root
 |-- name: string (nullable = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



name,properties
Ram,"Map(eye -> blue, hair -> brown)"
Shyam,"Map(eye -> black, hair -> black)"
Amit,"Map(eye -> null, hair -> grey)"
Aupam,"Map(eye -> black, hair -> red)"
Rahul,"Map(eye -> grey, hair -> black)"


#### **map_keys()**

- Getting **keys and values** using **map_key** function

**Syntax**

     map_values(*column)

In [0]:
from pyspark.sql.functions import map_keys

In [0]:
# To extract keys we can use map_key() 
display(df_map.select(df_map.name, map_keys(df_map.properties).alias('map_values')))

name,map_values
Ram,"List(eye, hair)"
Shyam,"List(eye, hair)"
Amit,"List(eye, hair)"
Aupam,"List(eye, hair)"
Rahul,"List(eye, hair)"


In [0]:
df_map.select(df_map.name, map_keys(df_map.properties).alias('map_values')).show()

+-----+-----------+
| name| map_values|
+-----+-----------+
|  Ram|[eye, hair]|
|Shyam|[eye, hair]|
| Amit|[eye, hair]|
|Aupam|[eye, hair]|
|Rahul|[eye, hair]|
+-----+-----------+



#### **map_values()**
- Getting **keys and values** using **map_values**.

In [0]:
from pyspark.sql.functions import map_values

display(df_map.select(df_map.name, map_values(df_map.properties).alias('map_values')))

name,map_values
Ram,"List(blue, brown)"
Shyam,"List(black, black)"
Amit,"List(null, grey)"
Aupam,"List(black, red)"
Rahul,"List(grey, black)"


In [0]:
df_map.select(df_map.name, map_values(df_map.properties).alias('map_values')).show()

+-----+--------------+
| name|    map_values|
+-----+--------------+
|  Ram| [blue, brown]|
|Shyam|[black, black]|
| Amit|  [NULL, grey]|
|Aupam|  [black, red]|
|Rahul| [grey, black]|
+-----+--------------+



#### **explode**

- Getting the **keys and values** using **explode** function.

In [0]:
from pyspark.sql.functions import explode

display(df_map.select(df_map.name, explode(df_map.properties)))

name,key,value
Ram,eye,blue
Ram,hair,brown
Shyam,eye,black
Shyam,hair,black
Amit,eye,
Amit,hair,grey
Aupam,eye,black
Aupam,hair,red
Rahul,eye,grey
Rahul,hair,black


- Getting **all the keys** MapType using **Explode** function.

In [0]:
keysDF = df_map.select(explode(map_keys(df_map.properties))).distinct()
keysList = keysDF.rdd.map(lambda x:x[0]).collect()
print(keysList)

['eye', 'hair']


**How to get unique values from a MapType column**

In [0]:
# Unique keys
unique_values_df = df_map.select(df_map.name, explode(df_map.properties).alias("key", "value")).distinct()\
                         .filter("value IS NOT NULL")
display(unique_values_df)

name,key,value
Ram,hair,brown
Ram,eye,blue
Shyam,eye,black
Shyam,hair,black
Amit,hair,grey
Aupam,eye,black
Aupam,hair,red
Rahul,hair,black
Rahul,eye,grey


In [0]:
# Collecting all the numeric value out of all values
unique_values_list = [record.value for record in unique_values_df.rdd.collect() if record.value.isnumeric()]
print(unique_values_list)

[]
