# FACT_CONSUMPTION_DAILY - Daily Consumption Fact Table

Daily grain fact table containing consumption and revenue metrics for the Genie Space.

## 1. Create Table Schema

In [None]:
%sql
-- Drop table if exists (uncomment if needed)
-- DROP TABLE IF EXISTS ankit_yadav.demo.fact_consumption_daily;

CREATE TABLE IF NOT EXISTS ankit_yadav.demo.fact_consumption_daily (
  consumption_date DATE NOT NULL COMMENT 'Date of consumption',
  account_id STRING NOT NULL COMMENT 'Foreign key to dim_accounts',
  workspace_id STRING COMMENT 'Databricks workspace identifier',
  platform STRING COMMENT 'Cloud platform (AWS, Azure, GCP)',
  sku STRING COMMENT 'Product SKU (JOBS, SQL, SERVING, etc.)',
  account_name STRING COMMENT 'Customer account name',
  account_executive STRING COMMENT 'Account Executive name',
  account_executive_manager STRING COMMENT 'AE Manager name',
  workspace_name STRING COMMENT 'Workspace display name',
  list_price_per_dbu FLOAT COMMENT 'List price per DBU',
  revenue_dollars FLOAT COMMENT 'Revenue in dollars',
  dbus_consumed FLOAT COMMENT 'Databricks Units consumed',
  load_timestamp TIMESTAMP COMMENT 'ETL load timestamp'
)
COMMENT 'Daily grain fact table containing consumption and revenue metrics';

## 2. Insert Demo Data

Generate 90 days of consumption data for all demo accounts across multiple SKUs and workspaces.

In [None]:
%sql
-- Clear existing demo data
DELETE FROM ankit_yadav.demo.fact_consumption_daily WHERE account_id LIKE 'DEMO%';

-- Generate 90 days of daily consumption data
-- Using a CTE to create date range and cross join with accounts/SKUs

WITH date_range AS (
  SELECT EXPLODE(SEQUENCE(
    DATE_SUB(CURRENT_DATE(), 90),
    CURRENT_DATE(),
    INTERVAL 1 DAY
  )) as consumption_date
),
accounts AS (
  SELECT * FROM ankit_yadav.demo.dim_accounts WHERE account_id LIKE 'DEMO%'
),
skus AS (
  SELECT 'JOBS_COMPUTE' as sku, 0.40 as list_price, 'Production' as workspace_suffix UNION ALL
  SELECT 'JOBS_COMPUTE' as sku, 0.40 as list_price, 'Development' as workspace_suffix UNION ALL
  SELECT 'SQL_COMPUTE' as sku, 0.55 as list_price, 'Analytics' as workspace_suffix UNION ALL
  SELECT 'ALL_PURPOSE_COMPUTE' as sku, 0.55 as list_price, 'Development' as workspace_suffix UNION ALL
  SELECT 'MODEL_SERVING' as sku, 0.07 as list_price, 'Production' as workspace_suffix UNION ALL
  SELECT 'DELTA_LIVE_TABLES' as sku, 0.36 as list_price, 'Production' as workspace_suffix
),
base_consumption AS (
  SELECT 
    d.consumption_date,
    a.account_id,
    CONCAT('WS-', a.account_id, '-', s.workspace_suffix) as workspace_id,
    CASE 
      WHEN a.account_region LIKE '%California%' THEN 'AWS'
      WHEN a.account_region LIKE '%Pacific%' THEN 'AZURE'
      ELSE CASE WHEN RAND() > 0.5 THEN 'AWS' ELSE 'AZURE' END
    END as platform,
    s.sku,
    a.account_name,
    a.account_executive_name as account_executive,
    a.account_executive_manager,
    CONCAT(a.account_name, ' - ', s.workspace_suffix) as workspace_name,
    s.list_price as list_price_per_dbu,
    -- Base DBU consumption varies by account size and day of week
    ROUND(
      (
        -- Base consumption by account (larger accounts consume more)
        CASE 
          WHEN a.account_id IN ('DEMO003', 'DEMO008') THEN 800  -- Large accounts
          WHEN a.account_id IN ('DEMO001', 'DEMO002', 'DEMO007', 'DEMO010') THEN 500  -- Medium
          ELSE 250  -- Smaller accounts
        END
        -- SKU multiplier
        * CASE 
          WHEN s.sku = 'JOBS_COMPUTE' THEN 1.5
          WHEN s.sku = 'SQL_COMPUTE' THEN 1.2
          WHEN s.sku = 'ALL_PURPOSE_COMPUTE' THEN 0.8
          WHEN s.sku = 'MODEL_SERVING' THEN 2.0
          ELSE 0.6
        END
        -- Day of week effect (lower on weekends)
        * CASE 
          WHEN DAYOFWEEK(d.consumption_date) IN (1, 7) THEN 0.3
          ELSE 1.0
        END
        -- Random variation (+/- 30%)
        * (0.7 + RAND() * 0.6)
        -- Growth trend (newer dates have slightly more consumption)
        * (1 + (DATEDIFF(d.consumption_date, DATE_SUB(CURRENT_DATE(), 90)) / 300.0))
      ), 2
    ) as dbus_consumed
  FROM date_range d
  CROSS JOIN accounts a
  CROSS JOIN skus s
  -- Not all accounts have all SKUs - filter to realistic combinations
  WHERE NOT (
    (a.account_id IN ('DEMO010', 'DEMO012') AND s.sku = 'MODEL_SERVING') OR
    (a.account_id IN ('DEMO005', 'DEMO011') AND s.sku = 'DELTA_LIVE_TABLES')
  )
)
INSERT INTO ankit_yadav.demo.fact_consumption_daily
SELECT 
  consumption_date,
  account_id,
  workspace_id,
  platform,
  sku,
  account_name,
  account_executive,
  account_executive_manager,
  workspace_name,
  list_price_per_dbu,
  ROUND(dbus_consumed * list_price_per_dbu, 2) as revenue_dollars,
  dbus_consumed,
  CURRENT_TIMESTAMP() as load_timestamp
FROM base_consumption;

## 3. Verify Data

In [None]:
%sql
-- Sample data
SELECT * FROM ankit_yadav.demo.fact_consumption_daily 
WHERE account_id = 'DEMO001'
ORDER BY consumption_date DESC, sku
LIMIT 20;

In [None]:
%sql
-- Daily totals by account
SELECT 
  account_name,
  account_executive_manager,
  COUNT(DISTINCT consumption_date) as days_with_consumption,
  ROUND(SUM(revenue_dollars), 2) as total_revenue,
  ROUND(SUM(dbus_consumed), 2) as total_dbus,
  ROUND(AVG(revenue_dollars), 2) as avg_daily_revenue
FROM ankit_yadav.demo.fact_consumption_daily
GROUP BY account_name, account_executive_manager
ORDER BY total_revenue DESC;

In [None]:
%sql
-- Revenue by SKU
SELECT 
  sku,
  COUNT(*) as record_count,
  ROUND(SUM(revenue_dollars), 2) as total_revenue,
  ROUND(SUM(dbus_consumed), 2) as total_dbus,
  ROUND(AVG(list_price_per_dbu), 2) as avg_list_price
FROM ankit_yadav.demo.fact_consumption_daily
GROUP BY sku
ORDER BY total_revenue DESC;

In [None]:
%sql
-- Record count summary
SELECT 
  COUNT(*) as total_records,
  COUNT(DISTINCT account_id) as unique_accounts,
  COUNT(DISTINCT consumption_date) as unique_dates,
  COUNT(DISTINCT sku) as unique_skus,
  MIN(consumption_date) as earliest_date,
  MAX(consumption_date) as latest_date,
  ROUND(SUM(revenue_dollars), 2) as total_revenue
FROM ankit_yadav.demo.fact_consumption_daily;