# Overview

This notebook runs the setup for the agent. It guides you through the following steps:

1. **Schema and Volume Setup:** Creates a dedicated schema and volume for your data.
2. **Permissions:** Grants necessary permissions to all account users for collaborative access.
3. **Data Upload:** Uploads sample CSV files to the Databricks volume.
4. **Delta Table Creation:** Reads the uploaded CSV files and creates Delta tables, including enabling Change Data Feed for the `supplier_sops` table.
5. **Vector Search Endpoint:** Initializes and waits for a Databricks Vector Search endpoint.
6. **Index Creation:** Not in this notebook. Index will be created in `/uc_tools` during tool setup
By the end of this workflow, all the data dependencies for the agent are setup

In [0]:
%pip install -U -qqqq unitycatalog-ai[databricks] mlflow-skinny[databricks] langgraph==0.3.4 databricks-langchain databricks-agents python-dotenv uv
%restart_python

In [0]:
dbutils.widgets.removeAll()

In [0]:
# Create widgets in order to bind environment variables to the SQL session variables so they can be used in %sql cells
dbutils.widgets.text("TARGET_CATALOG", "workshop")
dbutils.widgets.text("TARGET_SCHEMA", "supply_chain")
dbutils.widgets.text("VS_INDEX_BASE_TABLE", "supplier_sops")
dbutils.widgets.text("VS_INDEX", "supplier_sops_vs_index")

TARGET_CATALOG = dbutils.widgets.get("TARGET_CATALOG")
TARGET_SCHEMA = dbutils.widgets.get("TARGET_SCHEMA")
VS_INDEX_BASE_TABLE = dbutils.widgets.get("VS_INDEX_BASE_TABLE")
VS_INDEX = dbutils.widgets.get("VS_INDEX")

print(f"TARGET_CATALOG: {TARGET_CATALOG}")
print(f"TARGET_SCHEMA: {TARGET_SCHEMA}")
print(f"VS_INDEX_BASE_TABLE: {VS_INDEX_BASE_TABLE}")
print(f"VS_INDEX: {VS_INDEX}")

In [0]:
%sql

-- Depending on your Unity Catalog setup, you may need to create manually, if not uncomment and rerun
CREATE CATALOG IF NOT EXISTS IDENTIFIER(:TARGET_CATALOG);

CREATE SCHEMA IF NOT EXISTS IDENTIFIER(:TARGET_CATALOG || "." || :TARGET_SCHEMA);
CREATE VOLUME IF NOT EXISTS IDENTIFIER(:TARGET_CATALOG || "." || :TARGET_SCHEMA || ".data");
USE IDENTIFIER(:TARGET_CATALOG || "." || :TARGET_SCHEMA);

In [0]:
spark.sql(
    f"GRANT USE SCHEMA, SELECT, EXECUTE, READ VOLUME ON SCHEMA {TARGET_CATALOG}.{TARGET_SCHEMA} TO `account users`"
)

In [0]:
import os

DATA_PATH = os.getcwd() + '/data'  # current path of csv files
VOLUME_PATH = f"/Volumes/{TARGET_CATALOG}/{TARGET_SCHEMA}/data"

print(f"Removing existing CSVs from {VOLUME_PATH}...")
os.system(f"rm -f {VOLUME_PATH}/*.csv")

print(f"Copying CSVs from {DATA_PATH} to {VOLUME_PATH}...")
os.system(f"cp {DATA_PATH}/*.csv {VOLUME_PATH}")

In [0]:
%sql
-- shipments.csv -> table: shipments
CREATE OR REPLACE TABLE shipments
SELECT * EXCEPT (_rescued_data)
FROM read_files(
  "/Volumes/" || :TARGET_CATALOG || "/" || :TARGET_SCHEMA || "/data/shipments.csv",
  FORMAT => "csv",
  HEADER => true
);

-- suppliers.csv -> table: suppliers
CREATE OR REPLACE TABLE suppliers
SELECT * EXCEPT (_rescued_data)
FROM read_files(
  "/Volumes/" || :TARGET_CATALOG || "/" || :TARGET_SCHEMA || "/data/suppliers.csv",
  FORMAT => "csv",
  HEADER => true
);

-- inventory.csv -> table: inventory
CREATE OR REPLACE TABLE inventory
SELECT * EXCEPT (_rescued_data)
FROM read_files(
  "/Volumes/" || :TARGET_CATALOG || "/" || :TARGET_SCHEMA || "/data/inventory.csv",
  FORMAT => "csv",
  HEADER => true
);

-- supplier_sops.csv -> table: supplier_sops  (multiline text with embedded quotes)
CREATE OR REPLACE TABLE IDENTIFIER(:VS_INDEX_BASE_TABLE)
TBLPROPERTIES (delta.enableChangeDataFeed = true)
SELECT * EXCEPT (_rescued_data)
FROM read_files(
  "/Volumes/" || :TARGET_CATALOG || "/" || :TARGET_SCHEMA || "/data/supplier_sops.csv",
  FORMAT => "csv",
  HEADER => true,
  MULTILINE => true,
  ESCAPE => '"'
);

In [0]:
from databricks.vector_search.client import VectorSearchClient

client = VectorSearchClient()

if TARGET_CATALOG not in [v["name"] for v in client.list_endpoints().get("endpoints", [])]:
    print(f"Creating endpoint {TARGET_CATALOG}")
    client.create_endpoint(name=TARGET_CATALOG, endpoint_type="STANDARD")

client.wait_for_endpoint(TARGET_CATALOG)
print(f"Created endpoint {TARGET_CATALOG}")