#### **How to Convert DataFrame Columns to MapType**

- To convert DataFrame columns to a **MapType (dictionary)** column in PySpark, you can use the **create_map** function from the pyspark.sql.functions module.
- This function allows you to create a **map** from a set of **key-value pairs**, where the **keys and values** are columns from the DataFrame.

#### **Syntax**

     create_map(*columns)
- It represents the **column names** or Columns that are **grouped** as **key-value pairs**.   

In [0]:
import pyspark.sql.functions as f
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
from pyspark.sql.functions import col, lit, create_map, concat

#### **Sample dataframe**

In [0]:
data = [("Vikas", "36636", "IT", 300000, "USA", 25), 
        ("Mukul" ,"40288", "Finance", 500000, "IND", 35), 
        ("Kannan" ,"42114", "Sales", 390000, "AUS", 23), 
        ("Rishab" ,"39192", "Marketing", 250000, "CAN", 31), 
        ("Amaresh" ,"34534", "Maintenance", 650000, "NZ", 37),
        ("Prakash" ,"69114", "Sales", 390000, "AUS", 23), 
        ("Pramod" ,"78192", "Marketing", 250000, "CAN", 31), 
        ("Prasad" ,"56534", "Maintenance", 650000, "NZ", 37),
        ]

schema = StructType([
     StructField('Name', StringType(), True),
     StructField('ProductId', StringType(), True),
     StructField('deptartment', StringType(), True),
     StructField('budget', IntegerType(), True),
     StructField('location', StringType(), True),
     StructField('Age', IntegerType(), True)
     ])

df = spark.createDataFrame(data=data, schema=schema)
df.printSchema()
display(df)

root
 |-- Name: string (nullable = true)
 |-- ProductId: string (nullable = true)
 |-- deptartment: string (nullable = true)
 |-- budget: integer (nullable = true)
 |-- location: string (nullable = true)
 |-- Age: integer (nullable = true)



Name,ProductId,deptartment,budget,location,Age
Vikas,36636,IT,300000,USA,25
Mukul,40288,Finance,500000,IND,35
Kannan,42114,Sales,390000,AUS,23
Rishab,39192,Marketing,250000,CAN,31
Amaresh,34534,Maintenance,650000,NZ,37
Prakash,69114,Sales,390000,AUS,23
Pramod,78192,Marketing,250000,CAN,31
Prasad,56534,Maintenance,650000,NZ,37


**How to create a MapType column in PySpark Azure Databricks using various methods**
- All below codes generates the same output.

      from pyspark.sql.functions import create_map, col

      # Method 1:
      df.select("*", create_map("name", "age")).show()

      # Method 2:
      df.select("*", create_map(["name", "age"])).show()

      # Method 3:
      df.select("*", create_map(col("name"), col("age"))).show()

      # Method 4:
      df.select("*", create_map([col("name"), col("age")])).show()


#### **Convert Columns to dictionary**

In [0]:
df_dict = df.select('*', create_map(col('deptartment'), col('budget')).alias('dept_sal'))
display(df_dict)

Name,ProductId,deptartment,budget,location,Age,dept_sal
Vikas,36636,IT,300000,USA,25,Map(IT -> 300000)
Mukul,40288,Finance,500000,IND,35,Map(Finance -> 500000)
Kannan,42114,Sales,390000,AUS,23,Map(Sales -> 390000)
Rishab,39192,Marketing,250000,CAN,31,Map(Marketing -> 250000)
Amaresh,34534,Maintenance,650000,NZ,37,Map(Maintenance -> 650000)
Prakash,69114,Sales,390000,AUS,23,Map(Sales -> 390000)
Pramod,78192,Marketing,250000,CAN,31,Map(Marketing -> 250000)
Prasad,56534,Maintenance,650000,NZ,37,Map(Maintenance -> 650000)


In [0]:
df_dict.show(truncate=False)

+-------+---------+-----------+------+--------+---+-----------------------+
|Name   |ProductId|deptartment|budget|location|Age|dept_sal               |
+-------+---------+-----------+------+--------+---+-----------------------+
|Vikas  |36636    |IT         |300000|USA     |25 |{IT -> 300000}         |
|Mukul  |40288    |Finance    |500000|IND     |35 |{Finance -> 500000}    |
|Kannan |42114    |Sales      |390000|AUS     |23 |{Sales -> 390000}      |
|Rishab |39192    |Marketing  |250000|CAN     |31 |{Marketing -> 250000}  |
|Amaresh|34534    |Maintenance|650000|NZ      |37 |{Maintenance -> 650000}|
|Prakash|69114    |Sales      |390000|AUS     |23 |{Sales -> 390000}      |
|Pramod |78192    |Marketing  |250000|CAN     |31 |{Marketing -> 250000}  |
|Prasad |56534    |Maintenance|650000|NZ      |37 |{Maintenance -> 650000}|
+-------+---------+-----------+------+--------+---+-----------------------+



**AVRO Schema for Dept_Metadata**

     {
       "name": "Dept_Metadata",
       "type": [
         "null",
         {
           "type": "map",
           "values": "string"
         }
       ],
       "doc": "key value pair, e.g deptartment, budget",
       "default": null
     }

In [0]:
# Convert columns to Map
df_map = df.withColumn("Dept_Metadata", create_map(lit("deptartment"), col("deptartment"),
                                                   lit("budget"), col("budget"))
                       )

df_map.printSchema()
display(df_map)

root
 |-- Name: string (nullable = true)
 |-- ProductId: string (nullable = true)
 |-- deptartment: string (nullable = true)
 |-- budget: integer (nullable = true)
 |-- location: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- Dept_Metadata: map (nullable = false)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



Name,ProductId,deptartment,budget,location,Age,Dept_Metadata
Vikas,36636,IT,300000,USA,25,"Map(deptartment -> IT, budget -> 300000)"
Mukul,40288,Finance,500000,IND,35,"Map(deptartment -> Finance, budget -> 500000)"
Kannan,42114,Sales,390000,AUS,23,"Map(deptartment -> Sales, budget -> 390000)"
Rishab,39192,Marketing,250000,CAN,31,"Map(deptartment -> Marketing, budget -> 250000)"
Amaresh,34534,Maintenance,650000,NZ,37,"Map(deptartment -> Maintenance, budget -> 650000)"
Prakash,69114,Sales,390000,AUS,23,"Map(deptartment -> Sales, budget -> 390000)"
Pramod,78192,Marketing,250000,CAN,31,"Map(deptartment -> Marketing, budget -> 250000)"
Prasad,56534,Maintenance,650000,NZ,37,"Map(deptartment -> Maintenance, budget -> 650000)"


In [0]:
df_map.show(truncate=False)

+-------+---------+-----------+------+--------+---+----------------------------------------------+
|Name   |ProductId|deptartment|budget|location|Age|Dept_Metadata                                 |
+-------+---------+-----------+------+--------+---+----------------------------------------------+
|Vikas  |36636    |IT         |300000|USA     |25 |{deptartment -> IT, budget -> 300000}         |
|Mukul  |40288    |Finance    |500000|IND     |35 |{deptartment -> Finance, budget -> 500000}    |
|Kannan |42114    |Sales      |390000|AUS     |23 |{deptartment -> Sales, budget -> 390000}      |
|Rishab |39192    |Marketing  |250000|CAN     |31 |{deptartment -> Marketing, budget -> 250000}  |
|Amaresh|34534    |Maintenance|650000|NZ      |37 |{deptartment -> Maintenance, budget -> 650000}|
|Prakash|69114    |Sales      |390000|AUS     |23 |{deptartment -> Sales, budget -> 390000}      |
|Pramod |78192    |Marketing  |250000|CAN     |31 |{deptartment -> Marketing, budget -> 250000}  |
|Prasad |5

**AVRO Schema for Dept_Metadata**

     {
       "name": "Dept_Metadata",
       "type": [
         "null",
         {
           "type": "map",
           "values": "string"
         }
       ],
       "doc": "key value pair, e.g Name, deptartment, budget, location",
       "default": null
     }

In [0]:
# Convert columns to Map
df_map1 = df.withColumn("propertiesMap", create_map(lit("Name"), col("Name"),
                                                    lit("deptartment"), col("deptartment"),
                                                    lit("budget"), col("budget"),
                                                    lit("location"), col("location")))\
            .drop('Name', 'deptartment', 'budget', 'location')
df_map1.printSchema()
display(df_map1)

root
 |-- ProductId: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- propertiesMap: map (nullable = false)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



ProductId,Age,propertiesMap
36636,25,"Map(Name -> Vikas, deptartment -> IT, budget -> 300000, location -> USA)"
40288,35,"Map(Name -> Mukul, deptartment -> Finance, budget -> 500000, location -> IND)"
42114,23,"Map(Name -> Kannan, deptartment -> Sales, budget -> 390000, location -> AUS)"
39192,31,"Map(Name -> Rishab, deptartment -> Marketing, budget -> 250000, location -> CAN)"
34534,37,"Map(Name -> Amaresh, deptartment -> Maintenance, budget -> 650000, location -> NZ)"
69114,23,"Map(Name -> Prakash, deptartment -> Sales, budget -> 390000, location -> AUS)"
78192,31,"Map(Name -> Pramod, deptartment -> Marketing, budget -> 250000, location -> CAN)"
56534,37,"Map(Name -> Prasad, deptartment -> Maintenance, budget -> 650000, location -> NZ)"


In [0]:
df_map1.show(truncate=False)

+---------+---+-------------------------------------------------------------------------------+
|ProductId|Age|propertiesMap                                                                  |
+---------+---+-------------------------------------------------------------------------------+
|36636    |25 |{Name -> Vikas, deptartment -> IT, budget -> 300000, location -> USA}          |
|40288    |35 |{Name -> Mukul, deptartment -> Finance, budget -> 500000, location -> IND}     |
|42114    |23 |{Name -> Kannan, deptartment -> Sales, budget -> 390000, location -> AUS}      |
|39192    |31 |{Name -> Rishab, deptartment -> Marketing, budget -> 250000, location -> CAN}  |
|34534    |37 |{Name -> Amaresh, deptartment -> Maintenance, budget -> 650000, location -> NZ}|
|69114    |23 |{Name -> Prakash, deptartment -> Sales, budget -> 390000, location -> AUS}     |
|78192    |31 |{Name -> Pramod, deptartment -> Marketing, budget -> 250000, location -> CAN}  |
|56534    |37 |{Name -> Prasad, deptartm

In [0]:
# Convert columns to Map
df_map2 = df.withColumn("propertiesMap", create_map(lit("Name"), concat(lit("Mr."), col("Name")),
                                                    lit("deptartment"), col("deptartment"),
                                                    lit("budget"), concat(lit("Rs"), col("budget"), lit("/-")),
                                                    lit("location"), col("location")))\
            .drop('Name', 'deptartment', 'budget', 'location')
df_map2.printSchema()
display(df_map2)

root
 |-- ProductId: string (nullable = true)
 |-- Age: integer (nullable = true)
 |-- propertiesMap: map (nullable = false)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



ProductId,Age,propertiesMap
36636,25,"Map(Name -> Mr.Vikas, deptartment -> IT, budget -> Rs300000/-, location -> USA)"
40288,35,"Map(Name -> Mr.Mukul, deptartment -> Finance, budget -> Rs500000/-, location -> IND)"
42114,23,"Map(Name -> Mr.Kannan, deptartment -> Sales, budget -> Rs390000/-, location -> AUS)"
39192,31,"Map(Name -> Mr.Rishab, deptartment -> Marketing, budget -> Rs250000/-, location -> CAN)"
34534,37,"Map(Name -> Mr.Amaresh, deptartment -> Maintenance, budget -> Rs650000/-, location -> NZ)"
69114,23,"Map(Name -> Mr.Prakash, deptartment -> Sales, budget -> Rs390000/-, location -> AUS)"
78192,31,"Map(Name -> Mr.Pramod, deptartment -> Marketing, budget -> Rs250000/-, location -> CAN)"
56534,37,"Map(Name -> Mr.Prasad, deptartment -> Maintenance, budget -> Rs650000/-, location -> NZ)"


In [0]:
df_map2.show(truncate=False)

+---------+---+--------------------------------------------------------------------------------------+
|ProductId|Age|propertiesMap                                                                         |
+---------+---+--------------------------------------------------------------------------------------+
|36636    |25 |{Name -> Mr.Vikas, deptartment -> IT, budget -> Rs300000/-, location -> USA}          |
|40288    |35 |{Name -> Mr.Mukul, deptartment -> Finance, budget -> Rs500000/-, location -> IND}     |
|42114    |23 |{Name -> Mr.Kannan, deptartment -> Sales, budget -> Rs390000/-, location -> AUS}      |
|39192    |31 |{Name -> Mr.Rishab, deptartment -> Marketing, budget -> Rs250000/-, location -> CAN}  |
|34534    |37 |{Name -> Mr.Amaresh, deptartment -> Maintenance, budget -> Rs650000/-, location -> NZ}|
|69114    |23 |{Name -> Mr.Prakash, deptartment -> Sales, budget -> Rs390000/-, location -> AUS}     |
|78192    |31 |{Name -> Mr.Pramod, deptartment -> Marketing, budget -> Rs