In [0]:
from pyspark.sql.types import (
    StructType, StructField, StringType, LongType, BooleanType, IntegerType, ArrayType, DoubleType
)
from pyspark.sql.utils import AnalysisException
from pyspark.sql import functions as F

# Definindo o schema para o JSON baseado na estrutura da tabela
schema = StructType([
    StructField("success", BooleanType(), True),
    StructField("data", StructType([
        StructField("type", StringType(), True),
        StructField("name", StringType(), True),
        StructField("steam_appid", LongType(), True),
        StructField("required_age", IntegerType(), True),
        StructField("is_free", BooleanType(), True),
        StructField("detailed_description", StringType(), True),
        StructField("about_the_game", StringType(), True),
        StructField("short_description", StringType(), True),
        StructField("supported_languages", StringType(), True),
        StructField("header_image", StringType(), True),
        StructField("capsule_image", StringType(), True),
        StructField("capsule_imagev5", StringType(), True),
        StructField("website", StringType(), True),
        StructField("pc_requirements", StructType([
            StructField("minimum", StringType(), True),
            StructField("recommended", StringType(), True)
        ]), True),
        StructField("mac_requirements", StructType([
            StructField("minimum", StringType(), True),
            StructField("recommended", StringType(), True)
        ]), True),
        StructField("linux_requirements", StructType([
            StructField("minimum", StringType(), True),
            StructField("recommended", StringType(), True)
        ]), True),
        StructField("legal_notice", StringType(), True),
        StructField("ext_user_account_notice", StringType(), True),
        StructField("developers", ArrayType(StringType()), True),
        StructField("publishers", ArrayType(StringType()), True),
        StructField("price_overview", StructType([
            StructField("currency", StringType(), True),
            StructField("initial", LongType(), True),
            StructField("final", LongType(), True),
            StructField("discount_percent", IntegerType(), True),
            StructField("initial_formatted", StringType(), True),
            StructField("final_formatted", StringType(), True)
        ]), True),
        StructField("packages", ArrayType(LongType()), True),
        StructField("package_groups", ArrayType(StructType([
            StructField("name", StringType(), True),
            StructField("title", StringType(), True),
            StructField("description", StringType(), True),
            StructField("selection_text", StringType(), True),
            StructField("save_text", StringType(), True),
            StructField("display_type", IntegerType(), True),
            StructField("is_recurring_subscription", StringType(), True),
            StructField("subs", ArrayType(StructType([
                StructField("packageid", LongType(), True),
                StructField("percent_savings_text", StringType(), True),
                StructField("percent_savings", IntegerType(), True),
                StructField("option_text", StringType(), True),
                StructField("option_description", StringType(), True),
                StructField("can_get_free_license", StringType(), True),
                StructField("is_free_license", BooleanType(), True),
                StructField("price_in_cents_with_discount", LongType(), True)
            ])), True)
        ])), True),
        StructField("platforms", StructType([
            StructField("windows", BooleanType(), True),
            StructField("mac", BooleanType(), True),
            StructField("linux", BooleanType(), True)
        ]), True),
        StructField("metacritic", StructType([
            StructField("score", IntegerType(), True),
            StructField("url", StringType(), True)
        ]), True),
        StructField("categories", ArrayType(StructType([
            StructField("id", IntegerType(), True),
            StructField("description", StringType(), True)
        ])), True),
        StructField("genres", ArrayType(StructType([
            StructField("id", StringType(), True),
            StructField("description", StringType(), True)
        ])), True),
        StructField("screenshots", ArrayType(StructType([
            StructField("id", IntegerType(), True),
            StructField("path_thumbnail", StringType(), True),
            StructField("path_full", StringType(), True)
        ])), True),
        StructField("movies", ArrayType(StructType([
            StructField("id", LongType(), True),
            StructField("name", StringType(), True),
            StructField("thumbnail", StringType(), True),
            StructField("webm", StructType([
                StructField("480", StringType(), True),
                StructField("max", StringType(), True)
            ]), True),
            StructField("mp4", StructType([
                StructField("480", StringType(), True),
                StructField("max", StringType(), True)
            ]), True),
            StructField("highlight", BooleanType(), True)
        ])), True),
        StructField("recommendations", StructType([
            StructField("total", LongType(), True)
        ]), True),
        StructField("achievements", StructType([
            StructField("total", IntegerType(), True),
            StructField("highlighted", ArrayType(StructType([
                StructField("name", StringType(), True),
                StructField("path", StringType(), True)
            ])), True)
        ]), True),
        StructField("release_date", StructType([
            StructField("coming_soon", BooleanType(), True),
            StructField("date", StringType(), True)
        ]), True),
        StructField("support_info", StructType([
            StructField("url", StringType(), True),
            StructField("email", StringType(), True)
        ]), True),
        StructField("background", StringType(), True),
        StructField("background_raw", StringType(), True),
        StructField("content_descriptors", StructType([
            StructField("ids", ArrayType(IntegerType()), True),
            StructField("notes", StringType(), True)
        ]), True),
        StructField("ratings", StructType([
            StructField("esrb", StructType([
                StructField("rating", StringType(), True),
                StructField("use_age_gate", StringType(), True),
                StructField("required_age", StringType(), True),
                StructField("descriptors", StringType(), True),
                StructField("interactive_elements", StringType(), True)
            ]), True),
            StructField("pegi", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("usk", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("oflc", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("nzoflc", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("kgrb", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("dejus", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("fpb", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("csrr", StructType([
                StructField("rating", StringType(), True),
                StructField("descriptors", StringType(), True)
            ]), True),
            StructField("crl", StructType([
                StructField("rating", StringType(), True)
            ]), True)
        ]), True)
    ]), True)
])



In [0]:
df_new = (
    spark.read
    .schema(schema)
    .option("multiline", "true")
    .json("abfss://steam@steamstorageaccount.dfs.core.windows.net/inbound/details/*.json")
)

df_new.cache()

In [0]:
try:
    df_old = spark.table("steam.bronze.details_games")
except AnalysisException:
    df_old = spark.createDataFrame([], schema)

df_old.cache()

In [0]:
df = df_new.unionByName(df_old)

In [0]:
df_distinct = df.groupBy(df.columns).count().filter(F.col("count") == 1).drop("count")

In [0]:
spark.sql(
    """
    CREATE SCHEMA IF NOT EXISTS steam.bronze
    """
)

In [0]:
df_distinct.write \
    .format("delta") \
    .mode("append") \
    .saveAsTable("steam.bronze.details_games")