# Sample examples for Genie Space API calling
[Genie Space SDK References](https://openapi.dev.databricks.com/api/workspace/genie/getspace)

## Parameterize notebook with Genie Space ID

In [0]:
dbutils.widgets.text("space_id", "01f0bf82c140100bba2e5103692bbee3")
space_id = dbutils.widgets.get("space_id")
print(space_id)

## Initialize workspace client

In [0]:
from databricks.sdk import WorkspaceClient

workspace_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
workspace_token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

w = WorkspaceClient(
    host=workspace_url,
    token=workspace_token
)
# Print workspace URL and token
print(workspace_url)
print(workspace_token)

## Define a function to get genie space metadata

In [0]:
import requests

def get_genie_space(space_id: str, workspace_url: str, token: str, include_serialized_space: bool = True, timeout: int = 30):
    """
    Fetch Genie space details via the Databricks Workspace REST API.

    Args:
        space_id: The Genie space ID.
        workspace_url: Databricks workspace base URL (e.g., https://my-workspace.cloud.databricks.com).
        token: OAuth/PAT bearer token.
        include_serialized_space: Whether to include the serialized space definition.
        timeout: Request timeout in seconds.

    Returns:
        Parsed JSON response (dict).

    Raises:
        requests.HTTPError on non-2xx responses.
    """
    url = f"{workspace_url.rstrip('/')}/api/2.0/genie/spaces/{space_id}?include_serialized_space={'true' if include_serialized_space else 'false'}"
    print(url)
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/json",
    }
    resp = requests.get(url, headers=headers, timeout=timeout)
    resp.raise_for_status()
    return resp.json()

## Get full genie space metadata

In [0]:
details = get_genie_space(space_id, workspace_url, workspace_token, include_serialized_space=True)
# print(details)
serialized_blob = details["serialized_space"]
print(serialized_blob)

## Generate Genie Space description based on its full metadata

In [0]:
# Get a pre-configured OpenAI client for your workspace
openai_client = w.serving_endpoints.get_open_ai_client()

# Call a Databricks-hosted chat model by endpoint name
resp = openai_client.chat.completions.create(
    # model="databricks-meta-llama-3-3-70b-instruct",  # example endpoint name
    model = "databricks-gpt-5-1",
    # model = "databricks-claude-sonnet-4-5",
    # model = "databricks-gpt-oss-120b",
    # model = "databricks-gemini-2-5-flash",
    messages=[
        {"role": "system", "content": "You are an assistant that summarizes the databricks genie space json metadata.  Remove separation between sections. Try to be concise"},
        {"role": "user", "content": f"summarize the metadata in this json - {serialized_blob}"},
        {"role": "user", "content": "Identify the purpose of the Databricks Genie Space based on the JSON string specified"},
        {"role": "user", "content": "The JSON string contains list of tables, sql queries, english and SQL instructions"},
        {"role": "user", "content": "summarize the result as much as possible, and show the result in the following sections: Purpose, Data Sources, Industry Domains, Key Capabilities, and Business Use Cases"},
    ],
    max_tokens=1024,
    temperature=0.2,
)
content = resp.choices[0].message.content
print(content)

# INCOMPLETE: Summarize each section of the metadat separately

##1. Summarize the tables
Get the table description, column names/description and summarize those

In [0]:
import json

def escape_quotes(value: str) -> str:
    return value.replace('"', '\\"')


def build_yaml_like_output(genie_json: dict) -> str:
    lines = []

    tables = genie_json.get("data_sources", {}).get("tables", [])
    for table in tables:
        identifier = table.get("identifier", "")
        table_name = identifier.split(".")[-1] if identifier else ""

        # Table-level info
        lines.append(f'- table:')
        lines.append(f'  - name: "{escape_quotes(identifier)}"')
        lines.append(
            f'  - description: "This is the description of table {escape_quotes(table_name)}"'
        )
        lines.append('  columns:')

        # Columns
        for col in table.get("column_configs", []):
            col_name = col.get("column_name", "")

            # unify description (list → string)
            desc_value = col.get("description")
            if isinstance(desc_value, list):
                desc = " ".join(str(x) for x in desc_value)
            elif isinstance(desc_value, str):
                desc = desc_value
            else:
                desc = ""     # missing → empty string like your example

            lines.append('    - column:')
            lines.append(f'      - name: "{escape_quotes(col_name)}"')
            lines.append(f'      - description: "{escape_quotes(desc)}"')

    return "\n".join(lines)


In [0]:
import json

# Parse the JSON string into a dictionary
genie_dict = json.loads(serialized_blob)
out = build_yaml_like_output(genie_dict)
print(out)