In [0]:
%pip install databricks-langchain langgraph langchain mlflow python-dotenv
%restart_python

Collecting databricks-langchain
  Downloading databricks_langchain-0.5.1-py3-none-any.whl.metadata (2.7 kB)
Collecting langgraph
  Downloading langgraph-0.4.8-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting mlflow
  Downloading mlflow-2.22.1-py3-none-any.whl.metadata (30 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting databricks-ai-bridge>=0.4.2 (from databricks-langchain)
  Downloading databricks_ai_bridge-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Collecting databricks-connect>=16.1.1 (from databricks-langchain)
  Downloading databricks_connect-16.1.5-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting databricks-vectorsearch>=0.50 (from databricks-langchain)
  Downloading databricks_vectorsearch-0.56-py3-none-any.whl.metadata (2.8 kB)
Collecting pydantic>2.10.0 (from databricks-langchain)
  Downloading pydantic-2.11.5-py3-none-any.whl.metadata 

In [0]:
query = """
SELECT
  `data_id`,
  `date`,
 `accessibility_info`.`description`,
 `accessibility_info`.`is_available` AS `is_accessible`,
  SPLIT(`address`, ',')[0] AS `street_address`,
  get(SPLIT(`address`, ','), 1) AS `city`,
  get(SPLIT(SPLIT(`address`, ',')[2], ' '), 1) AS `state`,
  get(SPLIT(SPLIT(`address`, ',')[2], ' '), 2) AS `zip_code`
FROM
  (
    SELECT
      *, EXPLODE(`accessibility`) AS `accessibility_info`
    FROM
      `dais-hackathon`.`nimble`.`dbx_google_maps_place_daily`
    WHERE
      `country` = 'US'
  ) AS `filtered_data`
  """

In [0]:
df = spark.sql(query).toPandas()

In [0]:
df[['description', 'is_accessible', 'street_address', 'city', 'state', 'zip_code']]

Unnamed: 0,description,is_accessible,street_address,city,state,zip_code
0,Has wheelchair accessible entrance,True,1431 Plymouth St,Mountain View,CA,94043
1,Has wheelchair accessible parking lot,True,1431 Plymouth St,Mountain View,CA,94043
2,Has wheelchair accessible restroom,True,1431 Plymouth St,Mountain View,CA,94043
3,Has wheelchair accessible seating,True,1431 Plymouth St,Mountain View,CA,94043
4,Has wheelchair-accessible entrance,True,20 W 34th St.,New York,NY,10001
5,Has wheelchair-accessible toilet,True,20 W 34th St.,New York,NY,10001
6,No wheelchair-accessible car park,False,20 W 34th St.,New York,NY,10001
7,Has wheelchair-accessible entrance,True,340 Lafayette St,New York,NY,10012
8,No wheelchair-accessible parking lot,False,340 Lafayette St,New York,NY,10012
9,No wheelchair accessible parking lot,False,51 E Houston St,New York,NY,10012


# Best family friendly locations

In [0]:
%python
import pandas as pd
from langchain.chat_models import ChatDatabricks
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import json
import os
import mlflow
from databricks.sdk import WorkspaceClient

from dotenv import load_dotenv

mlflow.langchain.autolog()

load_dotenv()

w = WorkspaceClient()

os.environ["DATABRICKS_HOST"] = w.config.host
os.environ["DATABRICKS_TOKEN"] = w.tokens.create(comment="for model serving", lifetime_seconds=1200).token_value

class Place(BaseModel):
    date: str
    description: str
    is_accessible: str
    street_address: str
    city: str
    state: str
    zip_code: int

class RecList(BaseModel):
    recommendations: list[Place] = Field(..., description='List of recommended places')

llm = ChatDatabricks(endpoint="databricks-llama-4-maverick", max_tokens=4048)

parser = PydanticOutputParser(pydantic_object=RecList)

prompt = ChatPromptTemplate.from_messages([
    ("system", "Provide the locations that are family friendly toliets. The description column tells us if the location is family friendly or not. Return JSON only:\n{format_instructions}"),
    ("human", "Data:\n{data}")
])

chain = prompt | llm | parser

#json_fragment = df.head(100).to_json(orient="records")
json_fragment = df.to_json(orient="records")
response = chain.invoke({
    "data": json_fragment,
    "format_instructions": parser.get_format_instructions()
})

# Ensure the output is valid JSON
try:
    output = json.loads(response.json())
    print(json.dumps(output, indent=4))
except json.JSONDecodeError as e:
    print(f"Invalid JSON output: {e}")

{
    "recommendations": [
        {
            "date": "2025-02-02T16:00:00.000Z",
            "description": "Has wheelchair accessible restroom",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "date": "2025-02-02T16:00:00.000Z",
            "description": "Has wheelchair-accessible toilet",
            "is_accessible": "true",
            "street_address": "20 W 34th St.",
            "city": " New York",
            "state": "NY",
            "zip_code": 10001
        },
        {
            "date": "2025-02-04T16:00:00.000Z",
            "description": "Has wheelchair accessible restroom",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "date": "2025

/home/spark-e7ea95ae-4fcd-461a-8ba7-2e/.ipykernel/17917/command-8879970974882473-4027960414:54: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  output = json.loads(response.json())


Trace(request_id=tr-404adee66c724f908855dee723b9f3ce)

In [0]:
output

{'recommendations': [{'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible seating',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible restroom',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible parking lot',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-02T00:00:00.000Z',
   'description': 'Has wheelchair accessible entrance',
   'is_accessible': 'true',
   'street_address': '1431 Plymouth St',
   'city': ' Mountain View',
   'state': 'CA',
   'zip_code': 94043},
  {'date': '2025-02-03T00:00:00.000Z',
   '

In [0]:
%python
import pandas as pd
from langchain.chat_models import ChatDatabricks
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
import json
import os
import mlflow
from databricks.sdk import WorkspaceClient

from dotenv import load_dotenv

mlflow.langchain.autolog()

load_dotenv()

w = WorkspaceClient()

os.environ["DATABRICKS_HOST"] = w.config.host
os.environ["DATABRICKS_TOKEN"] = w.tokens.create(comment="for model serving", lifetime_seconds=1200).token_value

class Place(BaseModel):
    date: str
    description: str
    is_accessible: str
    street_address: str
    city: str
    state: str
    zip_code: int

class RecList(BaseModel):
    recommendations: list[Place] = Field(..., description='List of recommended places')

llm = ChatDatabricks(endpoint="databricks-llama-4-maverick", max_tokens=4048)

parser = PydanticOutputParser(pydantic_object=RecList)

prompt = ChatPromptTemplate.from_messages([
    ("system", "Provide the locations that have accessible entrances. The description column tells us if the location is family friendly or not. Return JSON only:\n{format_instructions}"),
    ("human", "Data:\n{data}")
])

chain = prompt | llm | parser

#json_fragment = df.head(100).to_json(orient="records")
json_fragment = df.to_json(orient="records")
response = chain.invoke({
    "data": json_fragment,
    "format_instructions": parser.get_format_instructions()
})

# Ensure the output is valid JSON
try:
    output = json.loads(response.json())
    print(json.dumps(output, indent=4))
except json.JSONDecodeError as e:
    print(f"Invalid JSON output: {e}")

{
    "recommendations": [
        {
            "date": "2025-02-02T16:00:00.000Z",
            "description": "Has wheelchair accessible entrance",
            "is_accessible": "true",
            "street_address": "1431 Plymouth St",
            "city": " Mountain View",
            "state": "CA",
            "zip_code": 94043
        },
        {
            "date": "2025-02-02T16:00:00.000Z",
            "description": "Has wheelchair-accessible entrance",
            "is_accessible": "true",
            "street_address": "20 W 34th St.",
            "city": " New York",
            "state": "NY",
            "zip_code": 10001
        },
        {
            "date": "2025-02-02T16:00:00.000Z",
            "description": "Has wheelchair-accessible entrance",
            "is_accessible": "true",
            "street_address": "340 Lafayette St",
            "city": " New York",
            "state": "NY",
            "zip_code": 10012
        },
        {
            "date": "2025-02

/home/spark-e7ea95ae-4fcd-461a-8ba7-2e/.ipykernel/17917/command-8879970974882529-4263953928:54: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  output = json.loads(response.json())


Trace(request_id=tr-7aa25b54f6a0476a99cd5c8326af9279)

# Validation framework

In [0]:
%python
import pandas as pd
import json
import re

rec_obj: RecList = response
recs_df = pd.DataFrame(json.loads(rec_obj.json())['recommendations'])

id_cols = ["street_address", "city", "state", "zip_code"]

missing_cols = [col for col in id_cols if col not in recs_df.columns]
if missing_cols:
    raise KeyError(f"Columns missing in recs_df: {missing_cols}")

if 'description' not in candidate_df.columns:
    raise KeyError("Column 'description' missing in candidate_df")

if 'description' not in recs_df.columns:
    raise KeyError("Column 'description' missing in recs_df")

candidate_df = df.head(100).copy()

for col in id_cols:
    candidate_df[col] = candidate_df[col].astype(str).str.strip().str.lower()
    recs_df[col]      = recs_df[col].astype(str).str.strip().str.lower()

recs_df = recs_df.rename(columns={"description": "recs_description"})

merged = recs_df.merge(candidate_df[id_cols + ["description"]],  
                       on=id_cols, how="left", indicator=True)

merged['prediction_correct'] = merged['_merge'].apply(lambda x: 0 if x == 'both' else 1)

display(merged)

/home/spark-e7ea95ae-4fcd-461a-8ba7-2e/.ipykernel/17917/command-8879970974882522-1660190286:9: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  recs_df = pd.DataFrame(json.loads(rec_obj.json())['recommendations'])


date,recs_description,is_accessible,street_address,city,state,zip_code,description,_merge,prediction_correct
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible entrance,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible parking lot,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible restroom,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible seating,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible entrance,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible parking lot,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible restroom,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible seating,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible entrance,both,0
2025-02-02T16:00:00.000Z,Has wheelchair accessible entrance,True,1431 plymouth st,mountain view,ca,94043,Has wheelchair accessible parking lot,both,0
