In [0]:
# Importando libs
from pyspark.sql.types import *
from pyspark.sql.functions import from_json, explode_outer, col, regexp_replace, lit, when

# Carregando base
order = spark.table('workspace.tabelas_ifood.order')

# Selecionando chave composta para join e coluna a ser expandida
order_details = order.select('order_id','order_created_at','items').dropDuplicates()

# Limpando caracteres incompatíveis para parsing em JSON
order_details = order_details.withColumn('items', regexp_replace(col('items'), r'\\\\\\\\"', '"'))
order_details = order_details.withColumn('items', regexp_replace(col('items'), r'\\\\"', ''))
order_details = order_details.withColumn('items', regexp_replace(col('items'), r'\\', ''))

# Definindo schemas dos arrays
price_struct = StructType([
    StructField('value', StringType(), True),
    StructField('currency', StringType(), True)
])

garnish_struct = StructType([
    StructField('name', StringType(), True),
    StructField('addition', price_struct, True),
    StructField('discount', price_struct, True),
    StructField('quantity', DoubleType(), True),
    StructField('sequence', IntegerType(), True),
    StructField('unitPrice', price_struct, True),
    StructField('categoryId', StringType(), True),
    StructField('externalId', StringType(), True),
    StructField('totalValue', price_struct, True),
    StructField('categoryName', StringType(), True),
    StructField('integrationId', StringType(), True)
])

item_schema = ArrayType(StructType([
    StructField('name', StringType(), True),
    StructField('addition', price_struct, True),
    StructField('discount', price_struct, True),
    StructField('quantity', DoubleType(), True),
    StructField('sequence', IntegerType(), True),
    StructField('unitPrice', price_struct, True),
    StructField('externalId', StringType(), True),
    StructField('totalValue', price_struct, True),
    StructField('customerNote', StringType(), True),
    StructField('garnishItems', ArrayType(garnish_struct), True),
    StructField('integrationId', StringType(), True),
    StructField('totalAddition', price_struct, True),
    StructField('totalDiscount', price_struct, True)
]))

# Parsing do JSON em colunas estruturadas
order_details_parsed = order_details.withColumn("items_parsed", from_json(col("items"), item_schema))

# Transformação do JSON em dataframe
order_details_items = order_details_parsed.select(
    col("order_id"), col('order_created_at'),
    explode_outer("items_parsed").alias("item")
)

order_details_items = order_details_items.withColumn(
    "garnish_items",
    when(col("item.garnishItems").isNull(), lit([])).otherwise(col("item.garnishItems"))
)

order_details_exploded = order_details_items.withColumn("garnish", explode_outer("garnish_items"))

# Selecionando o schema final
order_details_final = order_details_exploded.select(
    'order_id',
    'order_created_at',
    col('item.name').alias('item_name'),
    col('item.quantity').alias('item_quantity'),
    col('item.sequence').alias('item_sequence'),
    col('item.externalId').alias('item_externalId'),
    col('item.customerNote').alias('item_customerNote'),
    col('item.unitPrice.value').alias('unit_price'),
    col('item.unitPrice.currency').alias('unit_currency'),
    col('item.totalValue.value').alias('total_value'),
    col('item.totalValue.currency').alias('total_currency'),
    col('item.addition.value').alias('addition_value'),
    col('item.addition.currency').alias('addition_currency'),
    col('item.discount.value').alias('discount_value'),
    col('item.discount.currency').alias('discount_currency'),
    col('item.totalAddition.value').alias('total_addition_value'),
    col('item.totalAddition.currency').alias('total_addition_currency'),
    col('item.totalDiscount.value').alias('total_discount_value'),
    col('item.totalDiscount.currency').alias('total_discount_currency'),
    col('garnish.name').alias('garnish_name'),
    col('garnish.addition.value').alias('garnish_addition_price'),
    col('garnish.addition.currency').alias('garnish_addition_currency'),
    col('garnish.discount.value').alias('garnish_discount_price'),
    col('garnish.discount.currency').alias('garnish_discount_currency'),
    col('garnish.quantity').alias('garnish_quantity'),
    col('garnish.unitPrice.value').alias('garnish_unit_price'),
    col('garnish.unitPrice.currency').alias('garnish_unit_currency'),
    col('garnish.categoryId').alias('garnish_categoryId'),
    col('garnish.externalId').alias('garnish_externalId'),
    col('garnish.totalValue.value').alias('garnish_total_value'),
    col('garnish.totalValue.currency').alias('garnish_total_currency'),
    col('garnish.categoryName').alias('garnish_categoryName'),
    col('garnish.integrationId').alias('garnish_integrationId')
    )

# Exportação da view
order_details_final.write.mode('overwrite').saveAsTable('workspace.tabelas_ifood.order_details')