In [0]:
jsonString="""{"Zipcode":704,"ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}"""
df=spark.createDataFrame([(1, jsonString)],["id","value"])
df.show(truncate=False)

+---+--------------------------------------------------------------------------+
|id |value                                                                     |
+---+--------------------------------------------------------------------------+
|1  |{"Zipcode":704,"ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}|
+---+--------------------------------------------------------------------------+



In [0]:
from pyspark.sql.functions import from_json, json_tuple, get_json_object, to_json, schema_of_json, col, lit
from pyspark.sql.types import StructType, MapType, StructField, StringType, IntegerType

##1. from_json()

**PySpark from_json() function is used to convert JSON string into Struct type or Map type.**

In [0]:
df2 = df.withColumn('value', from_json('value', MapType(StringType(), StringType())))

df2.printSchema()

root
 |-- id: long (nullable = true)
 |-- value: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [0]:
df2.show(truncate=False)

+---+---------------------------------------------------------------------------+
|id |value                                                                      |
+---+---------------------------------------------------------------------------+
|1  |{Zipcode -> 704, ZipCodeType -> STANDARD, City -> PARC PARQUE, State -> PR}|
+---+---------------------------------------------------------------------------+



##2. to_json()

**to_json() function is used to convert DataFrame columns MapType or Struct type to JSON string.**

In [0]:
df2 = df2.withColumn('value', to_json(col('value')))
df2.printSchema()
df2.show(truncate=False)

root
 |-- id: long (nullable = true)
 |-- value: string (nullable = true)

+---+----------------------------------------------------------------------------+
|id |value                                                                       |
+---+----------------------------------------------------------------------------+
|1  |{"Zipcode":"704","ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}|
+---+----------------------------------------------------------------------------+



# JSON to StructType

In [0]:
schema  = StructType([StructField('Zipcode', StringType()),
                     StructField('ZipCodeType', StringType()),
                     StructField('City', StringType()),
                     StructField('State', StringType())])
df2 = df.withColumn('value', from_json('value', schema=schema))
df2.printSchema()
df2.show(truncate=False)

root
 |-- id: long (nullable = true)
 |-- value: struct (nullable = true)
 |    |-- Zipcode: string (nullable = true)
 |    |-- ZipCodeType: string (nullable = true)
 |    |-- City: string (nullable = true)
 |    |-- State: string (nullable = true)

+---+--------------------------------+
|id |value                           |
+---+--------------------------------+
|1  |{704, STANDARD, PARC PARQUE, PR}|
+---+--------------------------------+



In [0]:
df2 = df2.withColumn('value', to_json('value'))
df2.printSchema()
df2.show(truncate=False)

root
 |-- id: long (nullable = true)
 |-- value: string (nullable = true)

+---+----------------------------------------------------------------------------+
|id |value                                                                       |
+---+----------------------------------------------------------------------------+
|1  |{"Zipcode":"704","ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}|
+---+----------------------------------------------------------------------------+



##3. json_tuple()
**Function json_tuple() is used the query or extract the elements from JSON column and create the result as a new columns.**

In [0]:
df.select('id', json_tuple('value', "Zipcode", "ZipCodeType", "City", "State")).toDF("id", "Zipcode", "ZipCodeType", "City", "State").show(truncate=False)

+---+-------+-----------+-----------+-----+
|id |Zipcode|ZipCodeType|City       |State|
+---+-------+-----------+-----------+-----+
|1  |704    |STANDARD   |PARC PARQUE|PR   |
+---+-------+-----------+-----------+-----+



##4. get_json_object()

**get_json_object() is used to extract the JSON string based on path from the JSON column.**

In [0]:
df.select('id', get_json_object('value', '$.City').alias('city')).show(truncate=False)

+---+-----------+
|id |city       |
+---+-----------+
|1  |PARC PARQUE|
+---+-----------+



# 5. schema_of_json()

**Use schema_of_json() to create schema string from JSON string column.**

In [0]:
schemaStr = spark.range(1).select(schema_of_json(lit("""{"Zipcode":704,"ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}"""))).collect()[0][0]

print(schemaStr)

STRUCT<City: STRING, State: STRING, ZipCodeType: STRING, Zipcode: BIGINT>
