<a href="https://colab.research.google.com/github/amrit6878/Learning-PySpark/blob/main/MapType.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 It is used when a column stores key-value pairs


```
MapType(keyType, valueType, valueContainsNull=True)
```
Parameters:

	•	keyType → Data type of keys (e.g., StringType(), IntegerType())
	•	valueType → Data type of values (e.g., StringType(),IntegerType(), ArrayType())
	•	valueContainsNull → Whether values can be null (default True)

Notes:

	•	Keys cannot be null.
	•	Values can be null if valueContainsNull=True.



In [2]:
from pyspark.sql.types import MapType, StringType, IntegerType, ArrayType, StructType, StructField
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("MapType").getOrCreate()

data = [
    (1, "Amrit", {"city": "Delhi", "dept": "IT"}),
    (2, "Riya", {"city": "Mumbai", "dept": "HR"}),
    (3, "John", {"city": "Bangalore", "dept": None})
]

schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("properties", MapType(StringType(), StringType()), True)
])

df = spark.createDataFrame(data, schema)

from IPython.display import display

pdf = df.toPandas()
display(pdf)

Unnamed: 0,id,name,properties
0,1,Amrit,"{'city': 'Delhi', 'dept': 'IT'}"
1,2,Riya,"{'city': 'Mumbai', 'dept': 'HR'}"
2,3,John,"{'city': 'Bangalore', 'dept': None}"


In [5]:
from pyspark.sql.functions import col

df_transformed = df.withColumn("city", col("properties")["city"]) \
                   .withColumn("department", col("properties")["dept"])

pdf_transformed = df_transformed.toPandas()
display(pdf_transformed)

Unnamed: 0,id,name,properties,city,department
0,1,Amrit,"{'city': 'Delhi', 'dept': 'IT'}",Delhi,IT
1,2,Riya,"{'city': 'Mumbai', 'dept': 'HR'}",Mumbai,HR
2,3,John,"{'city': 'Bangalore', 'dept': None}",Bangalore,


# Common MapType Functions in PySpark

1. `map_keys()` - Returns an array of keys.
2. `map_values()` - Returns an array of values.
3. `element_at()` - Access a specific key’s value in the map.
4. `map_entries()` - Converts the map into an array of structs (key, value).
5. `map_from_arrays()` - Create a map column from two array columns.
6. `map_concat()` - Concatenate two map columns.



In [7]:
from pyspark.sql.functions import map_keys

df.select("name", map_keys("properties").alias("keys")).show(truncate=False)

+-----+------------+
|name |keys        |
+-----+------------+
|Amrit|[city, dept]|
|Riya |[city, dept]|
|John |[city, dept]|
+-----+------------+



In [13]:
from pyspark.sql.functions import map_values

df.select("name", map_values("properties").alias("values")).show(truncate=False)

+-----+-----------------+
|name |values           |
+-----+-----------------+
|Amrit|[Delhi, IT]      |
|Riya |[Mumbai, HR]     |
|John |[Bangalore, NULL]|
+-----+-----------------+



In [9]:
from pyspark.sql.functions import element_at

df.select("name", element_at("properties", "city").alias("city")).show()

+-----+---------+
| name|     city|
+-----+---------+
|Amrit|    Delhi|
| Riya|   Mumbai|
| John|Bangalore|
+-----+---------+



In [14]:
from pyspark.sql.functions import map_from_arrays

df2 = spark.createDataFrame([
    (1, ["city", "dept"], ["Delhi", "IT"]),
    (2, ["city", "dept"], ["Mumbai", "HR"])
], ["id", "keys", "values"])

df2.withColumn("map_col", map_from_arrays("keys", "values")).show(truncate=False)

+---+------------+------------+----------------------------+
|id |keys        |values      |map_col                     |
+---+------------+------------+----------------------------+
|1  |[city, dept]|[Delhi, IT] |{city -> Delhi, dept -> IT} |
|2  |[city, dept]|[Mumbai, HR]|{city -> Mumbai, dept -> HR}|
+---+------------+------------+----------------------------+



In [17]:
from pyspark.sql.functions import map_concat, array, lit

df.withColumn(
    "extended_properties",
    map_concat("properties", map_from_arrays(array(lit("country")), array(lit("India"))))
).show(truncate=False)

+---+-----+---------------------------------+---------------------------------------------------+
|id |name |properties                       |extended_properties                                |
+---+-----+---------------------------------+---------------------------------------------------+
|1  |Amrit|{city -> Delhi, dept -> IT}      |{city -> Delhi, dept -> IT, country -> India}      |
|2  |Riya |{city -> Mumbai, dept -> HR}     |{city -> Mumbai, dept -> HR, country -> India}     |
|3  |John |{city -> Bangalore, dept -> NULL}|{city -> Bangalore, dept -> NULL, country -> India}|
+---+-----+---------------------------------+---------------------------------------------------+

