# AI/BI Dashboard & Genie Space

Programmatically creates a Lakeview dashboard and prints setup instructions for a Genie Space
(natural-language Q&A for business users).

**Prereqs**: Run notebooks 00–07 first.

## 1 — Configuration

In [None]:
%pip install databricks-sdk --upgrade --quiet
dbutils.library.restartPython()

In [None]:
from databricks.sdk import WorkspaceClient
from pyspark.sql import functions as F

w = WorkspaceClient()

CATALOG = spark.catalog.currentCatalog()
GOLD    = f"{CATALOG}.retail_gold"

current_user = spark.sql("SELECT current_user()").collect()[0][0]
DASHBOARD_PARENT = f"/Users/{current_user}"

print(f"Catalog    : {CATALOG}")
print(f"Gold schema: {GOLD}")
print(f"Dashboard  : {DASHBOARD_PARENT}")

---
## 2 — Define Dashboard Queries

Each query powers one widget in the AI/BI Dashboard.

In [None]:
dashboard_queries = {
    # ── KPI Cards ──
    "kpi_total_revenue": f"""
        SELECT ROUND(SUM(net_revenue), 0) AS total_revenue,
               SUM(num_orders) AS total_orders,
               ROUND(SUM(total_profit), 0) AS total_profit,
               ROUND(SUM(total_profit) / SUM(net_revenue) * 100, 1) AS overall_margin_pct
        FROM {GOLD}.gold_daily_sales
    """,

    # ── Revenue Trend (Line Chart) ──
    "revenue_trend_monthly": f"""
        SELECT year_month, region,
               ROUND(SUM(net_revenue), 0) AS net_revenue
        FROM {GOLD}.gold_monthly_sales
        GROUP BY year_month, region
        ORDER BY year_month
    """,

    # ── Revenue by Region (Bar Chart) ──
    "revenue_by_region": f"""
        SELECT region,
               ROUND(SUM(net_revenue), 0) AS net_revenue,
               ROUND(SUM(total_profit), 0) AS total_profit,
               ROUND(SUM(total_profit) / SUM(net_revenue) * 100, 1) AS margin_pct
        FROM {GOLD}.gold_daily_sales
        GROUP BY region
        ORDER BY net_revenue DESC
    """,

    # ── Customer Segment Distribution (Pie) ──
    "customer_segments": f"""
        SELECT rfm_segment,
               COUNT(*) AS customers,
               ROUND(SUM(monetary), 0) AS total_ltv
        FROM {GOLD}.gold_customer_rfm
        GROUP BY rfm_segment
        ORDER BY total_ltv DESC
    """,

    # ── Top Brands (Horizontal Bar) ──
    "top_brands": f"""
        SELECT brand,
               ROUND(SUM(net_revenue), 0) AS net_revenue,
               ROUND(AVG(profit_margin_pct), 1) AS avg_margin_pct
        FROM {GOLD}.gold_product_performance
        GROUP BY brand
        ORDER BY net_revenue DESC
        LIMIT 15
    """,

    # ── Shipping Performance (Table) ──
    "shipping_performance": f"""
        SELECT ship_mode,
               SUM(total_shipments) AS shipments,
               ROUND(AVG(avg_delivery_delay_days), 1) AS avg_delay,
               ROUND(SUM(total_shipments * on_time_pct) / SUM(total_shipments), 1) AS on_time_pct,
               ROUND(SUM(net_revenue), 0) AS revenue
        FROM {GOLD}.gold_shipping_analysis
        GROUP BY ship_mode
        ORDER BY revenue DESC
    """,

    # ── Quarterly Executive Trend ──
    "executive_quarterly": f"""
        SELECT year_quarter,
               total_orders,
               active_customers,
               ROUND(gross_order_value, 0) AS gross_order_value,
               ROUND(revenue_per_customer, 0) AS rev_per_customer,
               qoq_revenue_growth_pct
        FROM {GOLD}.gold_executive_summary
        ORDER BY year_quarter
    """,

    # ── Churn Risk Distribution ──
    "churn_risk_dist": f"""
        SELECT risk_tier,
               COUNT(*) AS customers,
               ROUND(AVG(churn_probability), 3) AS avg_churn_prob,
               ROUND(AVG(lifetime_value), 0) AS avg_ltv,
               ROUND(SUM(lifetime_value), 0) AS total_ltv_at_risk
        FROM {GOLD}.gold_churn_scores
        GROUP BY risk_tier
        ORDER BY avg_churn_prob DESC
    """,

    # ── YoY Growth Heatmap ──
    "yoy_growth": f"""
        SELECT year_month, region,
               ROUND(AVG(yoy_growth_pct), 1) AS yoy_growth_pct
        FROM {GOLD}.gold_monthly_sales
        WHERE yoy_growth_pct IS NOT NULL
        GROUP BY year_month, region
        ORDER BY year_month, region
    """,
}

print(f"Defined {len(dashboard_queries)} dashboard queries.")
for name in dashboard_queries:
    print(f"  • {name}")

## 3 — Validate All Queries Execute Successfully

In [None]:
print(f"{'Query':<30} {'Rows':>8}  {'Cols':>5}  Status")
print("=" * 60)

all_ok = True
for name, sql in dashboard_queries.items():
    try:
        df = spark.sql(sql)
        cnt = df.count()
        print(f"{name:<30} {cnt:>8}  {len(df.columns):>5}  ✓")
    except Exception as e:
        print(f"{name:<30} {'':>8}  {'':>5}  ✗ {str(e)[:60]}")
        all_ok = False

print(f"\n{'All queries passed!' if all_ok else 'Some queries failed — check Gold layer tables.'}")

## 4 — Create AI/BI Dashboard via Lakeview API

Databricks AI/BI Dashboards (Lakeview) can be created programmatically via the SDK.

In [None]:
import json

# Build Lakeview dashboard definition using the correct lvdash.json schema.
# Key: each widget query needs explicit `fields` with `expression`, and
# each encoding needs a `scale` with `type`.

datasets = []
for name, sql in dashboard_queries.items():
    datasets.append({
        "name": name,
        "displayName": name.replace("_", " ").title(),
        "query": sql.strip(),
    })

def _w(name, dataset, wtype, fields, encodings, pos, title, version=3, disagg=False):
    """Build a positioned widget in Lakeview format."""
    return {
        "position": {"x": pos[0], "y": pos[1], "width": pos[2], "height": pos[3]},
        "widget": {
            "name": name,
            "queries": [{
                "name": "main_query",
                "query": {
                    "datasetName": dataset,
                    "fields": [{"name": f[0], "expression": f[1]} for f in fields],
                    "disaggregated": disagg,
                },
            }],
            "spec": {
                "version": version,
                "widgetType": wtype,
                "encodings": encodings,
                "frame": {"showTitle": True, "title": title},
            },
        },
    }

dashboard_def = {
    "datasets": datasets,
    "pages": [
        {
            "name": "overview",
            "displayName": "Executive Overview",
            "layout": [
                # KPI counter — full width
                _w("kpi_cards", "kpi_total_revenue", "counter",
                   fields=[("total_revenue", "`total_revenue`"), ("total_orders", "`total_orders`")],
                   encodings={"value": {"fieldName": "total_revenue", "displayName": "Total Revenue"}},
                   pos=(0, 0, 6, 2), title="Revenue KPIs", version=2, disagg=True),

                # Revenue trend line — left half
                _w("rev_trend", "revenue_trend_monthly", "line",
                   fields=[("year_month", "`year_month`"),
                           ("net_revenue", "SUM(`net_revenue`)"),
                           ("region", "`region`")],
                   encodings={
                       "x": {"fieldName": "year_month", "scale": {"type": "categorical"}, "displayName": "Month"},
                       "y": {"fieldName": "net_revenue", "scale": {"type": "quantitative"}, "displayName": "Net Revenue"},
                       "color": {"fieldName": "region", "scale": {"type": "categorical"}, "displayName": "Region"},
                   },
                   pos=(0, 2, 3, 5), title="Monthly Revenue Trend"),

                # Revenue by region bar — right half
                _w("rev_by_region", "revenue_by_region", "bar",
                   fields=[("region", "`region`"),
                           ("net_revenue", "`net_revenue`"),
                           ("margin_pct", "`margin_pct`")],
                   encodings={
                       "x": {"fieldName": "region", "scale": {"type": "categorical"}, "displayName": "Region"},
                       "y": {"fieldName": "net_revenue", "scale": {"type": "quantitative"}, "displayName": "Net Revenue"},
                   },
                   pos=(3, 2, 3, 5), title="Revenue by Region", disagg=True),

                # Customer segments pie — left
                _w("cust_segments", "customer_segments", "pie",
                   fields=[("rfm_segment", "`rfm_segment`"),
                           ("customers", "`customers`"),
                           ("total_ltv", "`total_ltv`")],
                   encodings={
                       "color": {"fieldName": "rfm_segment", "scale": {"type": "categorical"}, "displayName": "Segment"},
                       "angle": {"fieldName": "customers", "scale": {"type": "quantitative"}, "displayName": "Customers"},
                   },
                   pos=(0, 7, 3, 5), title="Customer Segments (RFM)", disagg=True),

                # Executive quarterly table — right
                _w("exec_quarterly", "executive_quarterly", "table",
                   fields=[("year_quarter", "`year_quarter`"),
                           ("total_orders", "`total_orders`"),
                           ("gross_order_value", "`gross_order_value`"),
                           ("rev_per_customer", "`rev_per_customer`"),
                           ("qoq_revenue_growth_pct", "`qoq_revenue_growth_pct`")],
                   encodings={},
                   pos=(3, 7, 3, 5), title="Quarterly Executive Summary", version=2, disagg=True),
            ],
        },
        {
            "name": "products_suppliers",
            "displayName": "Products & Suppliers",
            "layout": [
                # Top brands bar — left
                _w("top_brands_chart", "top_brands", "bar",
                   fields=[("brand", "`brand`"),
                           ("net_revenue", "`net_revenue`"),
                           ("avg_margin_pct", "`avg_margin_pct`")],
                   encodings={
                       "x": {"fieldName": "net_revenue", "scale": {"type": "quantitative"}, "displayName": "Revenue"},
                       "y": {"fieldName": "brand", "scale": {"type": "categorical", "sort": {"by": "x-reversed"}}, "displayName": "Brand"},
                   },
                   pos=(0, 0, 3, 6), title="Top 15 Brands by Revenue", disagg=True),

                # Shipping table — right
                _w("shipping_table", "shipping_performance", "table",
                   fields=[("ship_mode", "`ship_mode`"),
                           ("shipments", "`shipments`"),
                           ("avg_delay", "`avg_delay`"),
                           ("on_time_pct", "`on_time_pct`"),
                           ("revenue", "`revenue`")],
                   encodings={},
                   pos=(3, 0, 3, 6), title="Shipping Mode Performance", version=2, disagg=True),

                # Churn risk bar — left
                _w("churn_dist", "churn_risk_dist", "bar",
                   fields=[("risk_tier", "`risk_tier`"),
                           ("total_ltv_at_risk", "`total_ltv_at_risk`"),
                           ("customers", "`customers`")],
                   encodings={
                       "x": {"fieldName": "risk_tier", "scale": {"type": "categorical"}, "displayName": "Risk Tier"},
                       "y": {"fieldName": "total_ltv_at_risk", "scale": {"type": "quantitative"}, "displayName": "LTV at Risk"},
                   },
                   pos=(0, 6, 3, 5), title="Churn Risk — LTV at Risk", disagg=True),

                # YoY growth line — right
                _w("yoy_chart", "yoy_growth", "line",
                   fields=[("year_month", "`year_month`"),
                           ("yoy_growth_pct", "SUM(`yoy_growth_pct`)"),
                           ("region", "`region`")],
                   encodings={
                       "x": {"fieldName": "year_month", "scale": {"type": "categorical"}, "displayName": "Month"},
                       "y": {"fieldName": "yoy_growth_pct", "scale": {"type": "quantitative"}, "displayName": "YoY Growth %"},
                       "color": {"fieldName": "region", "scale": {"type": "categorical"}, "displayName": "Region"},
                   },
                   pos=(3, 6, 3, 5), title="YoY Revenue Growth by Region"),
            ],
        },
    ],
}

print(f"Dashboard definition: {len(dashboard_def['pages'])} pages, {len(datasets)} datasets")

In [None]:
# Create the dashboard via REST API (more reliable than SDK across versions)
import requests

db_token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
db_host  = spark.conf.get("spark.databricks.workspaceUrl")
headers  = {"Authorization": f"Bearer {db_token}", "Content-Type": "application/json"}

payload = {
    "display_name": "Retail Analytics — Executive Dashboard",
    "parent_path": DASHBOARD_PARENT,
    "serialized_dashboard": json.dumps(dashboard_def),
}

resp = requests.post(f"https://{db_host}/api/2.0/lakeview/dashboards", headers=headers, json=payload)

if resp.status_code == 200:
    data = resp.json()
    dash_id = data.get("dashboard_id", "")
    print(f"✓ Dashboard created: {dash_id}")
    print(f"  URL: https://{db_host}/sql/dashboardsv3/{dash_id}")
else:
    print(f"⚠ Dashboard creation ({resp.status_code}): {resp.text[:300]}")
    print(f"\nManual alternative:")
    print(f"  1. SQL → Dashboards → Create Dashboard")
    print(f"  2. Add each query from dashboard_queries above")
    print(f"  3. Choose chart types matching the widget names")

---
## 5 — Create Genie Space

**Genie** lets business users ask questions in plain English and get answers from the Gold layer.
We programmatically configure a Genie Space with curated instructions and table access.

In [None]:
genie_tables = [
    f"{GOLD}.gold_daily_sales",
    f"{GOLD}.gold_monthly_sales",
    f"{GOLD}.gold_customer_rfm",
    f"{GOLD}.gold_product_performance",
    f"{GOLD}.gold_supplier_scorecard",
    f"{GOLD}.gold_shipping_analysis",
    f"{GOLD}.gold_executive_summary",
    f"{GOLD}.gold_churn_scores",
]

genie_instructions = """
You are a retail analytics assistant. The data comes from a TPC-H retail dataset with the
following Gold-layer tables:

- gold_daily_sales: Daily revenue by region and market segment
- gold_monthly_sales: Monthly revenue with MoM and YoY growth
- gold_customer_rfm: Customer segmentation (Champions, Loyal, At Risk, Lost, etc.)
- gold_product_performance: Product revenue, margin, and return rates by brand/type
- gold_supplier_scorecard: Supplier reliability, on-time delivery, return rates
- gold_shipping_analysis: Shipping mode performance across regions
- gold_executive_summary: Quarterly KPI roll-up for executives
- gold_churn_scores: ML-predicted churn risk per customer

Key business terms:
- "Revenue" means net_revenue (after discounts, before tax)
- "Margin" means profit_margin_pct
- "Churn risk" uses the risk_tier column: Critical, High, Medium, Low
- Regions are: AMERICA, EUROPE, ASIA, AFRICA, MIDDLE EAST
- Market segments: AUTOMOBILE, BUILDING, FURNITURE, HOUSEHOLD, MACHINERY
- Time range: 1992 to 1998

When answering questions:
- Format currency with $ and commas
- Format percentages with %
- Use the most specific table available
"""

sample_questions = [
    "What was total revenue by region in 1997?",
    "Which market segment has the highest profit margin?",
    "How many customers are Champions vs At Risk?",
    "What brands have the highest return rate?",
    "Show me the quarterly revenue trend",
    "Which shipping mode is most reliable?",
    "How many customers are at critical churn risk?",
    "Compare YoY revenue growth across regions",
]

print(f"Genie config: {len(genie_tables)} tables, {len(sample_questions)} sample questions")

In [None]:
# ── Find a SQL warehouse ─────────────────────────────────────────────────────
import json as _json

warehouse_id = None
try:
    for wh in w.warehouses.list():
        state_val = wh.state.value if wh.state else ""
        if state_val in ("RUNNING", "STARTING", "STOPPED"):
            warehouse_id = wh.id
            if state_val == "RUNNING":
                break
    if warehouse_id:
        print(f"✓ Using SQL warehouse: {warehouse_id}")
    else:
        print("⚠ No SQL warehouse found.")
except Exception as e:
    print(f"⚠ Could not list warehouses: {e}")

# ── Create Genie Space via SDK with serialized_space ─────────────────────────
genie_created = False
if warehouse_id:
    serialized = _json.dumps({
        "table_identifiers": genie_tables,
        "curated_questions": sample_questions,
        "instructions": genie_instructions.strip(),
    })
    try:
        genie_space = w.genie.create_space(
            warehouse_id=warehouse_id,
            serialized_space=serialized,
            title="Retail Analytics Genie",
            description="Ask questions about retail sales, customers, products, and suppliers in plain English.",
            parent_path=DASHBOARD_PARENT,
        )
        space_id = genie_space.space_id
        db_host = spark.conf.get("spark.databricks.workspaceUrl")
        genie_url = f"https://{db_host}/genie/rooms/{space_id}"
        print(f"✓ Genie Space created!")
        print(f"  Name: Retail Analytics Genie")
        print(f"  ID:   {space_id}")
        print(f"  URL:  {genie_url}")
        print(f"\n  Try asking:")
        for q in sample_questions[:5]:
            print(f"    → {q}")
        genie_created = True
    except Exception as e:
        print(f"⚠ SDK Genie Space creation failed: {e}")

if not genie_created:
    print(f"\n{'─' * 60}")
    print(f"MANUAL GENIE SPACE SETUP (takes ~2 minutes)")
    print(f"{'─' * 60}")
    print(f"  1. Open: AI/BI → Genie Spaces → '+ New Genie Space'")
    print(f"  2. Title: Retail Analytics Genie")
    print(f"  3. Attach SQL warehouse: {warehouse_id or '(any Pro/Serverless warehouse)'}")
    print(f"  4. Under 'Tables', click '+ Add tables' and add:")
    for t in genie_tables:
        print(f"       {t}")
    print(f"  5. Under 'General instructions', paste the following:")
    print(f"{'─' * 60}")
    print(genie_instructions.strip())
    print(f"{'─' * 60}")
    print(f"  6. Under 'Sample questions', add:")
    for q in sample_questions:
        print(f"       • {q}")
    print(f"\n  Then click 'Save'. Your Genie Space is ready!")

## 6 — Preview Dashboard Queries

In [None]:
# Preview: Revenue Trend
print("Revenue Trend by Region (Monthly)")
display(spark.sql(dashboard_queries["revenue_trend_monthly"]))

In [None]:
# Preview: Customer Segments
print("Customer Segment Distribution")
display(spark.sql(dashboard_queries["customer_segments"]))

In [None]:
# Preview: Churn Risk
print("Churn Risk Distribution")
display(spark.sql(dashboard_queries["churn_risk_dist"]))

In [None]:
# Preview: Executive Quarterly
print("Executive Quarterly Summary")
display(spark.sql(dashboard_queries["executive_quarterly"]))

---
Dashboard created, Genie Space instructions printed above.

Continue with `09_databricks_app.ipynb`.