## Import Required Library

In [1]:
import findspark
findspark.init("/Users/DOU2274/spark/spark-3.1.1-bin-hadoop2.7")
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *

## Create Spark Session

In [2]:
spark = SparkSession.builder.appName("flatten json").getOrCreate()

## Read the json file into spark

In [70]:
path = 'person.json'

rawDF = spark.read.json(path, multiLine = "true")

## Preview the schema

In [71]:
rawDF.printSchema()

root
 |-- address: struct (nullable = true)
 |    |-- city: string (nullable = true)
 |    |-- state: string (nullable = true)
 |    |-- streetAddress: string (nullable = true)
 |-- age: long (nullable = true)
 |-- firstName: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- lastName: string (nullable = true)
 |-- phoneNumbers: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- number: string (nullable = true)
 |    |    |-- type: string (nullable = true)



## Flatten the schema

In [76]:
#Explode for arrays and dot(.) for struct type
df = rawDF.select("firstName","lastName","age","address.*","gender",explode("phoneNumbers").alias("phoneNumber"))
                 

In [77]:
df.printSchema()

root
 |-- firstName: string (nullable = true)
 |-- lastName: string (nullable = true)
 |-- age: long (nullable = true)
 |-- city: string (nullable = true)
 |-- state: string (nullable = true)
 |-- streetAddress: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- phoneNumber: struct (nullable = true)
 |    |-- number: string (nullable = true)
 |    |-- type: string (nullable = true)



#### Rename the column

In [79]:
finaldf =(df.select("firstName","lastName","age","city","state","streetAddress","gender","phoneNumber.*")\
        .withColumnRenamed("number", "phoneNo")\
        .withColumnRenamed("type", "phoneNo_type"))

In [80]:
finaldf.show()

+---------+--------+---+-----------+-----+------------------+------+----------+------------+
|firstName|lastName|age|       city|state|     streetAddress|gender|   phoneNo|phoneNo_type|
+---------+--------+---+-----------+-----+------------------+------+----------+------------+
|      Joe| Jackson| 28|  San Diego|   CA|               101|  male|7349282382|        home|
|    Gboye|   Peter| 24|San Antonio|   TX|1003 centre street|female|6549282380|      mobile|
|     Kash|   Olumo| 38|  Sir Lanka|   KA|     43 eko street|female|4789282332|      mobile|
|    Kunle|   Ajayi| 60|      Lagos|   LA|   22 Adkunle yaba|  male|2229282432|        home|
+---------+--------+---+-----------+-----+------------------+------+----------+------------+



In [91]:
dfx =finaldf.groupBy("gender").count()


In [92]:
dfx.show()

+------+-----+
|gender|count|
+------+-----+
|female|    2|
|  male|    2|
+------+-----+

