In [0]:
from pyspark.sql.functions import col, count

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS steamdatabricks_workspace.external

In [0]:
%sql
CREATE EXTERNAL TABLE IF NOT EXISTS steamdatabricks_workspace.external.playtime (
  appid STRING,
  playtime_forever STRING,
  playtime_windows_forever STRING,
  playtime_mac_forever STRING,
  playtime_linux_forever STRING,
  playtime_deck_forever STRING,
  rtime_last_played STRING,
  playtime_disconnected STRING
)
USING DELTA
LOCATION 'abfss://steam@steamstorageaccount.dfs.core.windows.net/bronze/playtime/'
PARTITIONED BY (appid)

In [0]:
%sql
CREATE EXTERNAL TABLE IF NOT EXISTS steamdatabricks_workspace.external.details (
  about_the_game STRING,
  achievements STRING,
  background STRING,
  background_raw STRING,
  capsule_image STRING,
  capsule_imagev5 STRING,
  categories STRING,
  content_descriptors STRING,
  controller_support STRING,
  demos STRING,
  detailed_description STRING,
  developers STRING,
  dlc STRING,
  drm_notice STRING,
  ext_user_account_notice STRING,
  genres STRING,
  header_image STRING,
  is_free STRING,
  legal_notice STRING,
  linux_requirements STRING,
  mac_requirements STRING,
  metacritic STRING,
  movies STRING,
  name STRING,
  package_groups STRING,
  packages STRING,
  pc_requirements STRING,
  platforms STRING,
  price_overview STRING,
  publishers STRING,
  ratings STRING,
  recommendations STRING,
  release_date STRING,
  required_age STRING,
  reviews STRING,
  screenshots STRING,
  short_description STRING,
  steam_appid STRING,
  support_info STRING,
  supported_languages STRING,
  type STRING,
  website STRING
)
USING DELTA
LOCATION 'abfss://steam@steamstorageaccount.dfs.core.windows.net/bronze/details/'
PARTITIONED BY (steam_appid)

In [0]:
df_table = spark.table("steamdatabricks_workspace.steam.playtime_b")

df_external = spark.table("steamdatabricks_workspace.external.playtime")

df = df_table.unionByName(df_external)

df_incremental = df.groupBy(df.columns).count().filter(col("count") == 1).drop("count")

df_incremental.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("appid") \
    .saveAsTable("steamdatabricks_workspace.external.playtime")

In [0]:
df_table = spark.table("steamdatabricks_workspace.steam.details_b")

df_external = spark.table("steamdatabricks_workspace.external.details")

for field in df_table.schema["data"].dataType.fields:

    df_table = df_table.withColumn(field.name, col("data." + field.name))

df_table = df_table.drop('data', 'success')

df = df_table.unionByName(df_external)

df_incremental = df.groupBy(df.columns).count().filter(col("count") == 1).drop("count")

df_incremental.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("steam_appid") \
    .saveAsTable("steamdatabricks_workspace.external.details")