In [0]:
# Gold スキーマ設定
catalog_name = "users"
schema = "yukiteru_koide"
base_path = "/Volumes/users/yukiteru_koide/yukiterumart_etl"

# スキーマ切り替え
spark.sql(f"USE CATALOG {catalog_name}")
spark.sql(f"USE SCHEMA {schema}")


In [0]:
# Silver テーブル読み込み
df = spark.table(f"{catalog_name}.{schema}.transactions_enriched")

from pyspark.sql.functions import avg, count, sum as _sum

# レビューデータ読み込み（CSV）
df_reviews = (
    spark.read.format("csv")
    .option("header", True)
    .option("inferSchema", True)
    .load(f"{base_path}/reviews.csv")
)

# 平均スコアと件数を集計
review_stats = (
    df_reviews
    .groupBy("product_id")
    .agg(
        avg("rating").alias("avg_rating"),
        count("review_id").alias("num_reviews")
    )
)

# 商品売上を集計
product_sales = (
    df
    .groupBy("product_id", "product_name", "category")
    .agg(
        _sum("quantity").alias("total_units_sold"),
        _sum("total_price").alias("total_sales")
    )
)

# 売上とレビューを結合
sales_vs_reviews = product_sales.join(review_stats, "product_id", "left")

sales_vs_reviews.write.format("delta").mode("overwrite").saveAsTable("sales_vs_reviews")
