In [0]:
%python
# Create the dataframe and convert it to be used on SQL 

from pyspark.sql.types import StructType, StructField, StringType

json_data = [
    ('{"event_id":"12345","user": {"id":"001","name":"Arthur Luz"},"items": [{"product_id":"A1","quantity": 2}, {"product_id":"B2","quantity": 5}] }',),
    ('{"event_id":"67890","user": {"id":"002","name":"Carlos Souza"},"items": [{"product_id":"C3","quantity": 1}, {"product_id":"D4","quantity": 4}] }',)
]

schema = StructType([
    StructField("body", StringType(), True)
])

df_raw = spark.createDataFrame(json_data, schema)
df_raw.createOrReplaceTempView('raw_data')

In [0]:
-- Passing the schema manually
DECLARE OR REPLACE schema_from_column = 'STRUCT<event_id: STRING, user: STRUCT<id:STRING, name: STRING>, items: ARRAY<STRUCT<product_id: STRING, quantity: BIGINT>> >';

SELECT 
    FROM_JSON(
      body, 
      schema_from_column
    ) AS body_dict
FROM raw_data

In [0]:
-- Getting the schema dynamically
DECLARE OR REPLACE schema_from_column STRING;
SET VARIABLE schema_from_col = (
  SELECT DISTINCT
      SCHEMA_OF_JSON_AGG(body) AS schema_from_column
  FROM raw_data
);

SELECT 
    FROM_JSON(
      body, 
      schema_from_column
    ) AS body_dict
FROM raw_data