- To convert a **StructType (struct)** DataFrame column to a **MapType (map)** column in PySpark, you can use the **create_map** function.

In [0]:
from pyspark.sql.types import StructType,StructField, StringType, IntegerType

In [0]:
data = [("36636","Finance",(3000,"USA")), 
        ("40288","Finance",(5000,"IND")), 
        ("42114","Sales",(3900,"USA")), 
        ("39192","Marketing",(2500,"CAN")), 
        ("34534","Sales",(6500,"USA"))]

schema = StructType([
     StructField('id', StringType(), True),
     StructField('dept', StringType(), True),
     StructField('properties', StructType([
         StructField('salary', IntegerType(), True),
         StructField('location', StringType(), True)
         ]))
     ])

df = spark.createDataFrame(data=data,schema=schema)
df.printSchema()
display(df)

root
 |-- id: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- properties: struct (nullable = true)
 |    |-- salary: integer (nullable = true)
 |    |-- location: string (nullable = true)



id,dept,properties
36636,Finance,"List(3000, USA)"
40288,Finance,"List(5000, IND)"
42114,Sales,"List(3900, USA)"
39192,Marketing,"List(2500, CAN)"
34534,Sales,"List(6500, USA)"


#### **Convert StructType to MapType (map) Column**

In [0]:
# Convert struct type to Map
from pyspark.sql.functions import col, lit, create_map

df_map = df.withColumn("propertiesMap", create_map(
  lit("salary"), col("properties.salary"),
  lit("location"), col("properties.location")
  )).drop("properties")

df.printSchema()
display(df)

root
 |-- id: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- properties: struct (nullable = true)
 |    |-- salary: integer (nullable = true)
 |    |-- location: string (nullable = true)



id,dept,properties
36636,Finance,"List(3000, USA)"
40288,Finance,"List(5000, IND)"
42114,Sales,"List(3900, USA)"
39192,Marketing,"List(2500, CAN)"
34534,Sales,"List(6500, USA)"
