In [1]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
spark

In [50]:
arrayData = [
        ('Bob',['New York','Boston']),
        ('Kim',['Los Angeles','Chicago',None]),
        ('Lee',['Phoenix','']),
        ('Peter',None),
        ('Sam',['San Diego','Dallas'])]

col = ['name','location']

In [51]:
df = spark.createDataFrame(data=arrayData, schema=col)
df.printSchema()
df.show()

root
 |-- name: string (nullable = true)
 |-- location: array (nullable = true)
 |    |-- element: string (containsNull = true)

+-----+--------------------+
| name|            location|
+-----+--------------------+
|  Bob|  [New York, Boston]|
|  Kim|[Los Angeles, Chi...|
|  Lee|         [Phoenix, ]|
|Peter|                null|
|  Sam| [San Diego, Dallas]|
+-----+--------------------+



In [42]:
from pyspark.sql.functions import explode
df2 = df.select(df.name,explode(df.location))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)

+----+-----------+
|name|        col|
+----+-----------+
| Bob|   New York|
| Bob|     Boston|
| Kim|Los Angeles|
| Kim|    Chicago|
| Kim|       null|
| Lee|    Phoenix|
| Lee|           |
| Sam|  San Diego|
| Sam|     Dallas|
+----+-----------+



In [45]:
from pyspark.sql.functions import posexplode
df2 = df.select(df.name,posexplode(df.location))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- pos: integer (nullable = false)
 |-- col: string (nullable = true)

+----+---+-----------+
|name|pos|        col|
+----+---+-----------+
| Bob|  0|   New York|
| Bob|  1|     Boston|
| Kim|  0|Los Angeles|
| Kim|  1|    Chicago|
| Kim|  2|       null|
| Lee|  0|    Phoenix|
| Lee|  1|           |
| Sam|  0|  San Diego|
| Sam|  1|     Dallas|
+----+---+-----------+



In [43]:
from pyspark.sql.functions import explode_outer
df2 = df.select(df.name,explode_outer(df.location))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)

+-----+-----------+
| name|        col|
+-----+-----------+
|  Bob|   New York|
|  Bob|     Boston|
|  Kim|Los Angeles|
|  Kim|    Chicago|
|  Kim|       null|
|  Lee|    Phoenix|
|  Lee|           |
|Peter|       null|
|  Sam|  San Diego|
|  Sam|     Dallas|
+-----+-----------+



In [52]:
from pyspark.sql.functions import posexplode_outer
df2 = df.select(df.name,posexplode_outer(df.location))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- pos: integer (nullable = true)
 |-- col: string (nullable = true)

+-----+----+-----------+
| name| pos|        col|
+-----+----+-----------+
|  Bob|   0|   New York|
|  Bob|   1|     Boston|
|  Kim|   0|Los Angeles|
|  Kim|   1|    Chicago|
|  Kim|   2|       null|
|  Lee|   0|    Phoenix|
|  Lee|   1|           |
|Peter|null|       null|
|  Sam|   0|  San Diego|
|  Sam|   1|     Dallas|
+-----+----+-----------+



In [53]:
mapData = [
        ('Bob',{'cuisine':'Chinese','color':'blue'}),
        ('Kim',{'cuisine':'Indian','color':None}),
        ('Lee',{'cuisine':'Japanese','color':''}),
        ('Peter',None),
        ('Sam',{})]

col = ['name','favorites']

In [54]:
df = spark.createDataFrame(data=mapData, schema = col)
df.printSchema()
df.show()

root
 |-- name: string (nullable = true)
 |-- favorites: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+-----+--------------------+
| name|           favorites|
+-----+--------------------+
|  Bob|{color -> blue, c...|
|  Kim|{color -> null, c...|
|  Lee|{color -> , cuisi...|
|Peter|                null|
|  Sam|                  {}|
+-----+--------------------+



In [11]:
from pyspark.sql.functions import explode
df2 = df.select(df.name,explode(df.favorites))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- key: string (nullable = false)
 |-- value: string (nullable = true)

+----+-------+--------+
|name|    key|   value|
+----+-------+--------+
| Bob|  color|    blue|
| Bob|cuisine| Chinese|
| Kim|  color|    null|
| Kim|cuisine|  Indian|
| Lee|  color|        |
| Lee|cuisine|Japanese|
+----+-------+--------+



In [49]:
from pyspark.sql.functions import posexplode
df2 = df.select(df.name,posexplode(df.favorites))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- pos: integer (nullable = false)
 |-- key: string (nullable = false)
 |-- value: string (nullable = true)

+----+---+-------+--------+
|name|pos|    key|   value|
+----+---+-------+--------+
| Bob|  0|  color|    blue|
| Bob|  1|cuisine| Chinese|
| Kim|  0|  color|    null|
| Kim|  1|cuisine|  Indian|
| Lee|  0|  color|        |
| Lee|  1|cuisine|Japanese|
+----+---+-------+--------+



In [14]:
from pyspark.sql.functions import explode_outer
df2 = df.select(df.name,explode_outer(df.favorites))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- key: string (nullable = true)
 |-- value: string (nullable = true)

+-----+-------+--------+
| name|    key|   value|
+-----+-------+--------+
|  Bob|  color|    blue|
|  Bob|cuisine| Chinese|
|  Kim|  color|    null|
|  Kim|cuisine|  Indian|
|  Lee|  color|        |
|  Lee|cuisine|Japanese|
|Peter|   null|    null|
|  Sam|   null|    null|
+-----+-------+--------+



In [58]:
from pyspark.sql.functions import posexplode_outer
df2 = df.select(df.name,posexplode_outer(df.favorites))
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- pos: integer (nullable = true)
 |-- key: string (nullable = true)
 |-- value: string (nullable = true)

+-----+----+-------+--------+
| name| pos|    key|   value|
+-----+----+-------+--------+
|  Bob|   0|  color|    blue|
|  Bob|   1|cuisine| Chinese|
|  Kim|   0|  color|    null|
|  Kim|   1|cuisine|  Indian|
|  Lee|   0|  color|        |
|  Lee|   1|cuisine|Japanese|
|Peter|null|   null|    null|
|  Sam|null|   null|    null|
+-----+----+-------+--------+

