In [0]:
from pyspark.sql.types import StructType, StructField, StringType
from pyspark.sql.utils import AnalysisException
from pyspark.sql import functions as F

# Definindo o schema para o JSON baseado na estrutura da tabela
schema = StructType([
    StructField("appid", StringType(), True),
    StructField("playtime_forever", StringType(), True),
    StructField("playtime_windows_forever", StringType(), True),
    StructField("playtime_mac_forever", StringType(), True),
    StructField("playtime_linux_forever", StringType(), True),
    StructField("playtime_deck_forever", StringType(), True),
    StructField("rtime_last_played", StringType(), True),
    StructField("playtime_disconnected", StringType(), True)
])

In [0]:
df_new = (
    spark.read
    .schema(schema)
    .option("multiline", "true")
    .json("abfss://steam@steamstorageaccount.dfs.core.windows.net/inbound/user/*.json")
)

df_new.cache()

In [0]:
try:
    df_old = spark.table("steam.bronze.user_games")
except AnalysisException:
    df_old = spark.createDataFrame([], schema)

df_old.cache()

In [0]:
df = df_new.unionByName(df_old)

In [0]:
df_distinct = df.groupBy(df.columns).count().filter(F.col("count") == 1).drop("count")

In [0]:
spark.sql(
    """
    CREATE SCHEMA IF NOT EXISTS steam.bronze
    """
)

In [0]:
df_distinct.write \
    .format("delta") \
    .mode("append") \
    .saveAsTable("steam.bronze.user_games")