In [0]:
-- ============================================================
-- 04 - AIRBNB MARTS (SQL ONLY)
--  - airbnb_mart_daily_city_metrics          (incremental)
--  - airbnb_mart_daily_neighbourhood_metrics (incremental)
--  - airbnb_mart_host_performance            (full rebuild)
-- ============================================================

---------------------------------------------
-- 0. Ensure schema
---------------------------------------------
CREATE SCHEMA IF NOT EXISTS airbnb_project;
USE airbnb_project;

-- ============================================================
-- 1) DAILY CITY METRICS MART (INCREMENTAL BY DATE)
--    Table: airbnb_mart_daily_city_metrics
-- ============================================================

-- 1.1 Create table if not exists
CREATE TABLE IF NOT EXISTS airbnb_mart_daily_city_metrics (
  date_key                INT,
  date                    DATE,
  active_listings         BIGINT,
  avg_nightly_price       DOUBLE,
  total_potential_revenue DOUBLE
)
USING DELTA;

-- 1.2 Incremental insert: only rows where date > max(date) already in mart
WITH max_existing AS (
  SELECT COALESCE(MAX(date), DATE '1900-01-01') AS max_date
  FROM airbnb_mart_daily_city_metrics
),
city_daily_all AS (
  SELECT
    d.date_key,
    f.date,
    COUNT(DISTINCT f.listing_id) AS active_listings,
    AVG(f.nightly_price)         AS avg_nightly_price,
    SUM(
      CASE WHEN f.is_available = 1 THEN f.nightly_price ELSE 0 END
    ) AS total_potential_revenue
  FROM airbnb_fact_calendar f
  JOIN airbnb_dim_date d
    ON f.date = d.date
  GROUP BY d.date_key, f.date
)
INSERT INTO airbnb_mart_daily_city_metrics
SELECT
  c.date_key,
  c.date,
  c.active_listings,
  c.avg_nightly_price,
  c.total_potential_revenue
FROM city_daily_all c
CROSS JOIN max_existing m
WHERE c.date > m.max_date;

-- ============================================================
-- 2) DAILY NEIGHBOURHOOD METRICS MART (INCREMENTAL BY DATE)
--    Table: airbnb_mart_daily_neighbourhood_metrics
-- ============================================================

-- 2.1 Create table if not exists
CREATE TABLE IF NOT EXISTS airbnb_mart_daily_neighbourhood_metrics (
  date_key                INT,
  date                    DATE,
  neighbourhood           STRING,
  neighbourhood_group     STRING,
  active_listings         BIGINT,
  avg_nightly_price       DOUBLE,
  total_potential_revenue DOUBLE
)
USING DELTA;

-- 2.2 Incremental insert: only rows where date > max(date) already in mart
WITH max_existing AS (
  SELECT COALESCE(MAX(date), DATE '1900-01-01') AS max_date
  FROM airbnb_mart_daily_neighbourhood_metrics
),
neigh_daily_all AS (
  SELECT
    d.date_key,
    f.date,
    l.neighbourhood,
    l.neighbourhood_group,
    COUNT(DISTINCT f.listing_id) AS active_listings,
    AVG(f.nightly_price)         AS avg_nightly_price,
    SUM(
      CASE WHEN f.is_available = 1 THEN f.nightly_price ELSE 0 END
    ) AS total_potential_revenue
  FROM airbnb_fact_calendar f
  JOIN airbnb_dim_listing l
    ON f.listing_id = l.listing_id
  JOIN airbnb_dim_date d
    ON f.date = d.date
  GROUP BY
    d.date_key,
    f.date,
    l.neighbourhood,
    l.neighbourhood_group
)
INSERT INTO airbnb_mart_daily_neighbourhood_metrics
SELECT
  n.date_key,
  n.date,
  n.neighbourhood,
  n.neighbourhood_group,
  n.active_listings,
  n.avg_nightly_price,
  n.total_potential_revenue
FROM neigh_daily_all n
CROSS JOIN max_existing m
WHERE n.date > m.max_date;

-- ============================================================
-- 3) HOST PERFORMANCE MART (FULL REBUILD)
--    Table: airbnb_mart_host_performance
-- ============================================================

CREATE OR REPLACE TABLE airbnb_mart_host_performance
USING DELTA AS
WITH reviews_per_listing AS (
  SELECT
    listing_id,
    COUNT(*) AS total_reviews
  FROM airbnb_fact_review
  GROUP BY listing_id
)
SELECT
  h.host_id,
  h.host_name,
  COUNT(DISTINCT l.listing_id)          AS num_listings,
  AVG(l.base_price)                     AS avg_listing_price,
  COALESCE(SUM(r.total_reviews), 0)     AS total_reviews
FROM airbnb_dim_host h
JOIN airbnb_dim_listing l
  ON h.host_id = l.host_id
LEFT JOIN reviews_per_listing r
  ON l.listing_id = r.listing_id
GROUP BY
  h.host_id,
  h.host_name;

-- ============================================================
-- DONE
-- ============================================================
SELECT '04 marts refresh complete' AS status;
