In [0]:
import dlt
import requests
from pyspark.sql.functions import col, count, explode, collect_list, desc, first, round, sum as sum_spark

@dlt.table(
    name="categorias",
    comment="dados da bigtables gerando uma tabela categorias"
)
def playtime_silver():
    df = dlt.read("steamdatabricks_workspace.gold.bigtable")

    df_proporcao = dlt.read("steamdatabricks_workspace.gold.playtime")

    categories = df.select('appid', 'id_categories', 'description_categories').distinct()

    categories = categories.withColumnRenamed("description_categories", "categories")

    df_categories_por_app = (
        categories.groupBy("categories")
            .agg(
                count("appid").alias("qtd_games"),
                collect_list("appid").alias("appids")
            )
            .orderBy(desc("qtd_games"))
    )

    df_exploded = df_categories_por_app.select("categories", "qtd_games", explode("appids").alias("appid"))

    df_joined = df_exploded.join(df_proporcao.select("appid", "playtime_forever"), on="appid", how="left")

    df_categories_por_app_join = (
        df_joined.groupBy("categories")
            .agg(
                first("qtd_games").alias("qtd_games"),
                round(sum_spark("playtime_forever"), 2).alias("playtime_total"),
                collect_list("appid").alias("appids")
            )
            .orderBy(desc("playtime_total"))
    )

    return df_categories_por_app_join