In [0]:
from pyspark.sql import functions as F
from pyspark.sql import Window

CATALOG = "workspace"
CITY_DAILY = f"{CATALOG}.airbnb_gold.city_daily_metrics"
CITY_OVERVIEW = f"{CATALOG}.airbnb_gold.city_overview"

# Read city-level daily metrics
city_daily = spark.table(CITY_DAILY)

# Aggregate to latest snapshot per city
city_overview = (
    city_daily
    .withColumn(
        "rank",
        F.row_number().over(
            Window.partitionBy("city").orderBy(F.col("last_scraped_dt").desc())
        )
    )
    .filter(F.col("rank") == 1)  # keep only most recent per city
    .select(
        "city",
        "last_scraped_dt",
        F.col("total_listings"),
        F.round("avg_price_per_night", 2).alias("avg_price_per_night"),
        F.round("occupancy_rate_estimate", 3).alias("avg_occupancy_rate"),
        F.round("avg_review_score", 2).alias("avg_review_score"),
        F.col("unique_hosts").alias("host_count")
    )
)

# Save to Delta
(
    city_overview
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(CITY_OVERVIEW)
)
