In [0]:
%pip install databricks-langchain langgraph langchain mlflow python-dotenv
%restart_python

Collecting databricks-langchain
  Downloading databricks_langchain-0.5.1-py3-none-any.whl.metadata (2.7 kB)
Collecting langgraph
  Downloading langgraph-0.4.8-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting mlflow
  Downloading mlflow-2.22.1-py3-none-any.whl.metadata (30 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting databricks-ai-bridge>=0.4.2 (from databricks-langchain)
  Downloading databricks_ai_bridge-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Collecting databricks-connect>=16.1.1 (from databricks-langchain)
  Downloading databricks_connect-16.1.5-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting databricks-vectorsearch>=0.50 (from databricks-langchain)
  Downloading databricks_vectorsearch-0.56-py3-none-any.whl.metadata (2.8 kB)
Collecting pydantic>2.10.0 (from databricks-langchain)
  Downloading pydantic-2.11.5-py3-none-any.whl.metadata 

In [0]:
query = """
SELECT
  `data_id`,
  `date`,
 `accessibility_info`.`description`,
 `accessibility_info`.`is_available` AS `is_accessible`,
  SPLIT(`address`, ',')[0] AS `street_address`,
  get(SPLIT(`address`, ','), 1) AS `city`,
  get(SPLIT(SPLIT(`address`, ',')[2], ' '), 1) AS `state`,
  get(SPLIT(SPLIT(`address`, ',')[2], ' '), 2) AS `zip_code`
FROM
  (
    SELECT
      *, EXPLODE(`accessibility`) AS `accessibility_info`
    FROM
      `dais-hackathon`.`nimble`.`dbx_google_maps_place_daily`
    WHERE
      `country` = 'US'
  ) AS `filtered_data`
  """

In [0]:
df = spark.sql(query).toPandas()

# Best family friendly locations

In [0]:
%python
import pandas as pd
from langchain.chat_models import ChatDatabricks
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import json
import os
import mlflow
from databricks.sdk import WorkspaceClient

from dotenv import load_dotenv

mlflow.langchain.autolog()

load_dotenv()

w = WorkspaceClient()

os.environ["DATABRICKS_HOST"] = w.config.host
os.environ["DATABRICKS_TOKEN"] = w.tokens.create(comment="for model serving", lifetime_seconds=1200).token_value

class Place(BaseModel):
    date: str
    description: str
    is_accessible: str
    street_address: str
    city: str
    state: str
    zip_code: int

class RecList(BaseModel):
    recommendations: list[Place] = Field(..., description='List of recommended places')

llm = ChatDatabricks(endpoint="databricks-llama-4-maverick", max_tokens=4048)

parser = PydanticOutputParser(pydantic_object=RecList)

prompt = ChatPromptTemplate.from_messages([
    ("system", "Return which locations are family friendly. The description column tells us if the location is family friendly or not. Return JSON only:\n{format_instructions}"),
    ("human", "Data:\n{data}")
])

chain = prompt | llm | parser

json_fragment = df.head(100).to_json(orient="records")
response = chain.invoke({
    "data": json_fragment,
    "format_instructions": parser.get_format_instructions()
})

# Ensure the output is valid JSON
try:
    output = json.loads(response.json())
    print(json.dumps(output, indent=4))
except json.JSONDecodeError as e:
    print(f"Invalid JSON output: {e}")

  llm = ChatDatabricks(endpoint="databricks-llama-4-maverick", max_tokens=4048)


{
    "recommendations": [
        {
            "date": "2025-02-02T00:00:00.000Z",
            "description": "Has wheelchair accessible seating",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "date": "2025-02-02T00:00:00.000Z",
            "description": "Has wheelchair accessible restroom",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "date": "2025-02-02T00:00:00.000Z",
            "description": "Has wheelchair accessible parking lot",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "

/home/spark-e7ea95ae-4fcd-461a-8ba7-2e/.ipykernel/17917/command-8879970974882473-3223657831:53: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  output = json.loads(response.json())


Trace(request_id=tr-5f3b56782640458cad00aff7348f7ed0)

In [0]:
output

{'recommendations': [{'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible seating',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible restroom',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible parking lot',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible entrance',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-03T00:00:00.000Z',
   '