# Deploy Frontend App with Databricks Lakehouse Applications

Writes and deploys a Gradio application as a
[Databricks App](https://docs.databricks.com/en/dev-tools/databricks-apps/).
The app queries Gold and Silver-layer Delta tables through the Statement
Execution API and renders five interactive tabs: Customer 360, Revenue Trends,
Product Analytics, Churn Risk, and Executive Summary.

**Prerequisites** -- run notebooks `00` through `08` first so the Gold tables
and ML scores exist.

In [None]:
%pip install --quiet -U databricks-sdk>=0.59.0
dbutils.library.restartPython()

## Configuration

In [None]:
import os
from databricks.sdk import WorkspaceClient

CATALOG = spark.catalog.currentCatalog()
GOLD    = f"{CATALOG}.retail_gold"
SILVER  = f"{CATALOG}.retail_silver"

w = WorkspaceClient()

WAREHOUSE_ID = ""
for wh in w.warehouses.list():
    state_str = str(wh.state).upper()
    print(f"  Found warehouse: {wh.name} (id={wh.id}, state={wh.state})")
    if not WAREHOUSE_ID and ("RUNNING" in state_str or "STOPPED" in state_str):
        WAREHOUSE_ID = wh.id

if not WAREHOUSE_ID:
    print("No running/stopped warehouse detected. Set WAREHOUSE_ID manually below:")
    # WAREHOUSE_ID = "paste-your-warehouse-id-here"

current_user = spark.sql("SELECT current_user()").collect()[0][0]
APP_NAME = "retail-analytics-app"
APP_DIR  = f"/Workspace/Users/{current_user}/apps/retail_analytics"

print(f"\nCatalog   : {CATALOG}")
print(f"Warehouse : {WAREHOUSE_ID}")
print(f"App dir   : {APP_DIR}")
print(f"App name  : {APP_NAME}")

## Write App Files

The application lives in three files written to `APP_DIR`:

| File | Purpose |
|---|---|
| `app.py` | Gradio frontend with five tabs, backed by SQL queries |
| `app.yaml` | Runtime config: start command + environment variables |
| `requirements.txt` | Python dependencies installed at deploy time |

`DATABRICKS_WAREHOUSE_ID` is resolved from the app resource binding (`valueFrom`),
following the pattern from [databricks/app-templates](https://github.com/databricks/app-templates).

In [None]:
os.makedirs(APP_DIR, exist_ok=True)

yaml_content = f"""command:
  - python
  - app.py
env:
  - name: DATABRICKS_WAREHOUSE_ID
    valueFrom: sql-warehouse
  - name: CATALOG
    value: {CATALOG}
"""

with open(f"{APP_DIR}/app.yaml", "w") as f:
    f.write(yaml_content)
print(f"Wrote {APP_DIR}/app.yaml")

reqs = """gradio>=4.0
databricks-sdk
pandas
"""

with open(f"{APP_DIR}/requirements.txt", "w") as f:
    f.write(reqs)
print(f"Wrote {APP_DIR}/requirements.txt")

In [None]:
app_source = '''"""Retail Analytics Dashboard -- Gradio app backed by Gold-layer Delta tables."""

import os
import logging

import gradio as gr
import pandas as pd
from databricks.sdk import WorkspaceClient

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# -- Configuration ----------------------------------------------------------------
CATALOG      = os.getenv("CATALOG", "main")
GOLD         = f"{CATALOG}.retail_gold"
SILVER       = f"{CATALOG}.retail_silver"
WAREHOUSE_ID = os.getenv("DATABRICKS_WAREHOUSE_ID", "")

assert WAREHOUSE_ID, "DATABRICKS_WAREHOUSE_ID must be set in app.yaml."

w = WorkspaceClient()


def run_query(sql: str) -> pd.DataFrame:
    """Execute a SQL query via the Statement Execution API."""
    try:
        resp = w.statement_execution.execute_statement(
            warehouse_id=WAREHOUSE_ID, statement=sql, wait_timeout="30s"
        )
        if resp.result and resp.manifest:
            cols = [c.name for c in resp.manifest.schema.columns]
            rows = resp.result.data_array or []
            df = pd.DataFrame(rows, columns=cols)
            for c in df.columns:
                df[c] = pd.to_numeric(df[c], errors="ignore")
            return df
        return pd.DataFrame()
    except Exception as exc:
        logger.error("Query failed: %s", exc)
        return pd.DataFrame({"error": [str(exc)]})


# -- Tab: Customer 360 ------------------------------------------------------------
def customer_lookup(customer_id):
    """Look up a single customer by key."""
    if not customer_id or not str(customer_id).strip():
        return "Enter a customer ID to look up.", None
    try:
        cid = int(customer_id)
    except (ValueError, TypeError):
        return "Enter a valid integer customer ID.", None

    df = run_query(f"""
        SELECT c.customer_key, c.customer_name, c.market_segment,
               c.nation_name, c.region_name, c.balance_tier,
               r.rfm_segment, r.rfm_score,
               ROUND(r.monetary, 2) AS lifetime_value,
               r.frequency AS total_orders, r.recency_days,
               ROUND(r.avg_order_value, 2) AS avg_order_value
        FROM {SILVER}.dim_customer c
        LEFT JOIN {GOLD}.gold_customer_rfm r ON c.customer_key = r.customer_key
        WHERE c.customer_key = {cid}
    """)

    if "error" in df.columns:
        return f"Query error: {df['error'].iloc[0]}", None
    if df.empty:
        return f"No customer found with ID {cid}.", None

    r = df.iloc[0]
    summary = f"""## Customer #{r['customer_key']} -- {r['customer_name']}

| Attribute | Value |
|---|---|
| Segment | {r['market_segment']} |
| Region | {r['region_name']} ({r['nation_name']}) |
| Balance Tier | {r.get('balance_tier', 'N/A')} |
| RFM Segment | {r['rfm_segment']} |
| RFM Score | {r['rfm_score']} |
| Lifetime Value | ${float(r['lifetime_value']):,.2f} |
| Total Orders | {r['total_orders']} |
| Avg Order Value | ${float(r['avg_order_value']):,.2f} |
| Recency (days) | {r['recency_days']} |
"""
    return summary, df


# -- Tab: Revenue ------------------------------------------------------------------
def revenue_data(region, start_month, end_month):
    """Monthly revenue by region with optional filters."""
    clauses = []
    if region and region != "ALL":
        clauses.append(f"region = '{region}'")
    if start_month:
        clauses.append(f"year_month >= '{start_month}'")
    if end_month:
        clauses.append(f"year_month <= '{end_month}'")
    where = "WHERE " + " AND ".join(clauses) if clauses else ""

    df = run_query(f"""
        SELECT year_month, region,
               ROUND(SUM(net_revenue), 0) AS revenue,
               SUM(num_orders) AS orders,
               ROUND(AVG(profit_margin_pct), 1) AS margin_pct
        FROM {GOLD}.gold_monthly_sales
        {where}
        GROUP BY year_month, region ORDER BY year_month, region
    """)

    if "error" in df.columns:
        return f"Query error: {df['error'].iloc[0]}", df

    total = df["revenue"].sum()
    header = f"**Total Revenue**: ${total:,.0f}  |  **Rows**: {len(df)}"
    return header, df


# -- Tab: Products -----------------------------------------------------------------
def product_data(sort_by, top_n):
    """Top products by brand and price band."""
    return run_query(f"""
        SELECT brand, price_band,
               ROUND(SUM(net_revenue), 0) AS revenue,
               ROUND(AVG(profit_margin_pct), 1) AS margin_pct,
               ROUND(AVG(return_rate_pct), 1) AS return_rate,
               SUM(num_orders) AS orders
        FROM {GOLD}.gold_product_performance
        GROUP BY brand, price_band ORDER BY {sort_by} DESC LIMIT {int(top_n)}
    """)


# -- Tab: Churn Risk ---------------------------------------------------------------
def churn_data():
    """Churn risk tier breakdown and the 50 highest-risk customers."""
    summary_df = run_query(f"""
        SELECT risk_tier, COUNT(*) AS customers,
               ROUND(AVG(churn_probability), 3) AS avg_probability,
               ROUND(AVG(lifetime_value), 0) AS avg_ltv
        FROM {GOLD}.gold_churn_scores
        GROUP BY risk_tier
        ORDER BY CASE risk_tier
            WHEN 'Critical' THEN 1 WHEN 'High' THEN 2
            WHEN 'Medium' THEN 3 ELSE 4 END
    """)

    detail_df = run_query(f"""
        SELECT customer_key, risk_tier,
               ROUND(churn_probability, 3) AS churn_prob,
               ROUND(lifetime_value, 0) AS ltv, market_segment
        FROM {GOLD}.gold_churn_scores
        ORDER BY churn_probability DESC LIMIT 50
    """)

    return summary_df, detail_df


# -- Tab: Executive Summary --------------------------------------------------------
def exec_data():
    """Quarterly KPI roll-up with latest-quarter highlight."""
    df = run_query(f"""
        SELECT year_quarter, total_orders, active_customers,
               ROUND(gross_order_value, 0) AS gross_revenue,
               ROUND(avg_order_value, 0) AS avg_order_value,
               ROUND(revenue_per_customer, 0) AS rev_per_customer,
               qoq_revenue_growth_pct AS qoq_growth
        FROM {GOLD}.gold_executive_summary ORDER BY year_quarter
    """)

    if "error" in df.columns:
        return "Error loading data.", df

    if not df.empty:
        q = df.iloc[-1]
        header = f"""### Latest Quarter: {q['year_quarter']}

| KPI | Value |
|---|---|
| Orders | {int(q['total_orders']):,} |
| Active Customers | {int(q['active_customers']):,} |
| Gross Revenue | ${float(q['gross_revenue']):,.0f} |
| Avg Order Value | ${float(q['avg_order_value']):,.0f} |
| Rev / Customer | ${float(q['rev_per_customer']):,.0f} |
| QoQ Growth | {q['qoq_growth']}% |
"""
    else:
        header = "No data available."

    return header, df


# -- Gradio UI ---------------------------------------------------------------------
with gr.Blocks(title="Retail Analytics", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Retail Analytics Dashboard")
    gr.Markdown("Gold-layer Delta tables served through Databricks SQL")

    with gr.Tab("Customer 360"):
        gr.Markdown(
            "Look up any customer to see their profile, RFM segment, and lifetime value."
        )
        with gr.Row():
            c_id = gr.Textbox(label="Customer ID", placeholder="e.g. 42", scale=2)
            c_btn = gr.Button("Look Up", variant="primary", scale=1)
        c_md = gr.Markdown()
        c_df = gr.Dataframe(label="Profile")
        c_btn.click(customer_lookup, inputs=[c_id], outputs=[c_md, c_df])

    with gr.Tab("Revenue"):
        gr.Markdown("Monthly revenue trends. Filter by region and date range.")
        with gr.Row():
            r_reg = gr.Dropdown(
                ["ALL", "AMERICA", "EUROPE", "ASIA", "AFRICA", "MIDDLE EAST"],
                value="ALL", label="Region",
            )
            r_s = gr.Textbox(label="Start (yyyy-MM)", value="1995-01")
            r_e = gr.Textbox(label="End (yyyy-MM)", value="1997-12")
            r_btn = gr.Button("Query", variant="primary")
        r_hdr = gr.Markdown()
        r_tbl = gr.Dataframe(label="Revenue Data")
        r_btn.click(revenue_data, inputs=[r_reg, r_s, r_e], outputs=[r_hdr, r_tbl])

    with gr.Tab("Products"):
        gr.Markdown("Product performance aggregated by brand and price band.")
        with gr.Row():
            p_sort = gr.Dropdown(
                ["revenue", "margin_pct", "return_rate", "orders"],
                value="revenue", label="Sort By",
            )
            p_n = gr.Slider(5, 50, value=20, step=5, label="Top N")
            p_btn = gr.Button("Query", variant="primary")
        p_tbl = gr.Dataframe(label="Product Data")
        p_btn.click(product_data, inputs=[p_sort, p_n], outputs=[p_tbl])

    with gr.Tab("Churn Risk"):
        gr.Markdown(
            "ML-predicted churn risk tiers and the 50 highest-risk customers."
        )
        ch_btn = gr.Button("Load Churn Data", variant="primary")
        ch_summary = gr.Dataframe(label="Risk Tier Summary")
        ch_detail = gr.Dataframe(label="Top 50 At-Risk Customers")
        ch_btn.click(churn_data, outputs=[ch_summary, ch_detail])

    with gr.Tab("Executive Summary"):
        gr.Markdown("Quarterly KPI roll-up across the full business.")
        ex_btn = gr.Button("Load KPIs", variant="primary")
        ex_md = gr.Markdown()
        ex_tbl = gr.Dataframe(label="All Quarters")
        ex_btn.click(exec_data, outputs=[ex_md, ex_tbl])

demo.queue(default_concurrency_limit=100)

if __name__ == "__main__":
    logger.info("Starting Retail Analytics (catalog=%s)", CATALOG)
    demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "8000")))
'''

with open(f"{APP_DIR}/app.py", "w") as f:
    f.write(app_source)
print(f"Wrote {APP_DIR}/app.py  ({len(app_source):,} chars)")

## Create and Deploy

This section:
1. Creates the Databricks App with a SQL warehouse resource
2. Grants the app's auto-provisioned service principal `SELECT` on Gold and Silver schemas
3. Deploys the source code

The service principal gets `CAN_USE` on the warehouse automatically through
the resource binding.

In [None]:
from databricks.sdk.service.apps import (
    App, AppDeployment, AppResource,
    AppResourceSqlWarehouse, AppResourceSqlWarehouseSqlWarehousePermission,
)

sql_wh = AppResourceSqlWarehouse(
    id=WAREHOUSE_ID,
    permission=AppResourceSqlWarehouseSqlWarehousePermission.CAN_USE,
)

app_def = App(
    name=APP_NAME,
    description="Retail analytics dashboard -- Gold-layer Delta tables",
    default_source_code_path=APP_DIR,
    resources=[AppResource(name="sql-warehouse", sql_warehouse=sql_wh)],
)

try:
    w.apps.create_and_wait(app=app_def)
    print(f"App created: {APP_NAME}")
except Exception as e:
    if "already exists" in str(e):
        print(f"App '{APP_NAME}' already exists -- reusing")
    else:
        raise

In [None]:
app_info = w.apps.get(name=APP_NAME)
sp_id = getattr(app_info, "service_principal_id", None)
sp_name = getattr(app_info, "service_principal_name", "") or ""
print(f"Service principal name: {sp_name}")
print(f"Service principal id  : {sp_id}")

grant_principal = None
if sp_id:
    try:
        sp_details = w.service_principals.get(id=sp_id)
        grant_principal = sp_details.application_id
        print(f"Application ID        : {grant_principal}")
    except Exception:
        pass

if not grant_principal and sp_id:
    grant_principal = str(sp_id)

if grant_principal:
    for stmt in [
        f"GRANT USE CATALOG ON CATALOG {CATALOG} TO `{grant_principal}`",
        f"GRANT USE SCHEMA ON SCHEMA {GOLD} TO `{grant_principal}`",
        f"GRANT USE SCHEMA ON SCHEMA {SILVER} TO `{grant_principal}`",
        f"GRANT SELECT ON SCHEMA {GOLD} TO `{grant_principal}`",
        f"GRANT SELECT ON SCHEMA {SILVER} TO `{grant_principal}`",
    ]:
        try:
            spark.sql(stmt)
            tag = stmt.split("GRANT ")[1][:60]
            print(f"  Granted: {tag}")
        except Exception as ex:
            print(f"  Skipped: {str(ex)[:80]}")
else:
    print("Could not resolve app SP. Grant permissions manually if queries fail.")

In [None]:
deployment = AppDeployment(source_code_path=APP_DIR)

try:
    w.apps.deploy_and_wait(app_name=APP_NAME, app_deployment=deployment)
except Exception as e:
    if "active deployment in progress" in str(e).lower():
        print("A deployment is already running. Wait for it to finish, then re-run this cell.")
    else:
        print(f"Deployment issue: {e}")
        print(f"Manual fallback: open '{APP_NAME}' in Compute > Apps and click Deploy")

app_url = w.apps.get(name=APP_NAME).url
print(f"App URL: {app_url}")

## Smoke Test

Run the same queries the app executes to verify Gold-layer data is accessible.

In [None]:
print("1. Customer lookup (ID=42):")
display(spark.sql(f"""
    SELECT c.customer_key, c.customer_name, c.market_segment, c.region_name,
           r.rfm_segment, r.rfm_score, ROUND(r.monetary, 2) AS ltv
    FROM {SILVER}.dim_customer c
    LEFT JOIN {GOLD}.gold_customer_rfm r ON c.customer_key = r.customer_key
    WHERE c.customer_key = 42
"""))

print("\n2. Revenue (AMERICA, 1996):")
display(spark.sql(f"""
    SELECT year_month, region, ROUND(SUM(net_revenue), 0) AS revenue
    FROM {GOLD}.gold_monthly_sales
    WHERE region = 'AMERICA' AND year_month >= '1996-01' AND year_month <= '1996-12'
    GROUP BY year_month, region ORDER BY year_month
"""))

print("\n3. Top 5 brands:")
display(spark.sql(f"""
    SELECT brand, ROUND(SUM(net_revenue), 0) AS revenue
    FROM {GOLD}.gold_product_performance
    GROUP BY brand ORDER BY revenue DESC LIMIT 5
"""))

print("\n4. Churn risk tiers:")
display(spark.sql(f"""
    SELECT risk_tier, COUNT(*) AS customers,
           ROUND(AVG(churn_probability), 3) AS avg_prob
    FROM {GOLD}.gold_churn_scores
    GROUP BY risk_tier
    ORDER BY CASE risk_tier
        WHEN 'Critical' THEN 1 WHEN 'High' THEN 2
        WHEN 'Medium' THEN 3 ELSE 4 END
"""))

---
App deployed. Open the URL printed above to explore the dashboard.

This is the last notebook -- the full pipeline is now live.