In [None]:
from langsmith import Client
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools import BaseTool, StructuredTool, tool
from langchain.chat_models import init_chat_model
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory

import sys
from pathlib import Path

PROJECT_ROOT = Path("/Users/yuqiugan/Desktop/Personal_DS_Projects/house_rental_agent")
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src.GoogleMaps_tools import query_google_maps
from src.Zillow_tools import submit_zillow_query_via_brightData, retrieve_snapshot_from_brightData
from src.Database_tools import bulk_upsert_listings
from src.Query_database_tool import agent_query_listing_table_tool
from src.settings import GOOGLE_GEMINI_API_KEY
from src.Helper_tools import get_date_time_now
from langchain_google_genai import ChatGoogleGenerativeAI


### Build Agent via LangChain

In [None]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=GOOGLE_GEMINI_API_KEY,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

tools = [query_google_maps, submit_zillow_query_via_brightData, 
         retrieve_snapshot_from_brightData, bulk_upsert_listings, 
         agent_query_listing_table_tool, get_date_time_now]

system_prompt = """
You are a professional real estate broker. Use tools iteratively to gather *real* rental listings.

You have 6 tools:
- query_google_maps: transit route information from origin to destination.
- submit_zillow_query_via_brightData: submit a Zillow search. Returns {{\"snapshot_id\": \"...\"}} on success.
- retrieve_snapshot_from_brightData: given snapshot_id, poll until data is ready (the tool handles waiting/retries internally). Returns listing data or {{\"status\": \"not_ready\"}} if it truly timed out.
- bulk_upsert_listings: upsert the listing information to the database listing table.
- agent_query_listing_table_tool: query listing information from local database, this will not cost any expense but may get some outdated listings. In the database's listing table, "updated_at" indicate the last time the record was updated,
"created_at" indicates the first time that this record logged in the table. IMPORTANT: Pass spec as a JSON object, not a string or code block. Do not wrap in backticks.
- get_date_time_now: No input parameters, directly return the date time of now.

CRITICAL EXECUTION RULES
- After calling submit_zillow_query_via_brightData, you MUST immediately call retrieve_snapshot_from_brightData with the snapshot_id returned.
- Submit exactly once per user request. If you already have a snapshot_id in this conversation, DO NOT call submit_zillow_query_via_brightData again. Wait until retrieve_snapshot_from_brightData get valid results.
- By default, if retrieve_snapshot_from_brightData return valid data, ALWAYS save to the local database for future reference.
- Do NOT produce a final answer until retrieve_snapshot_from_brightData returns real listing data (non-empty) or definitively reports no data (e.g., timed out after internal retries).
- If retrieve_snapshot_from_brightData returns {{\"status\":\"not_ready\"}} or similar, you may try it again. However, the tool itself already waits and retries; only re-call if it explicitly tells you it's safe to try again later.
- By default, when using query_google_maps to plan the route, the desination should always be user's request destination, the origin should be the address of the properties you got from retrieve_snapshot_from_brightData.
- By default, when user requests to check listings in a certain area, ALWAYS check local database first. If you find the records are out dated (no records in recent 5 days), search online then.
- When user ask about the details about the transit plan, alway generate transit map (set generate_route_map=True when calling query_google_maps)
- Keep your intermediate reasoning in the scratchpad; the user should only see the final JSON.


FINAL OUTPUT FORMAT (ONLY WHEN DONE)
Return a JSON object with:
- "Tool call history": String - brief tool call log.
- "Reasoning history": String - brief description of what you did and why.
- Return JSON only, do not write ``` json at the beginging or end of the JSON structure.
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}")
    ]
)

_session_store = {}

def get_session_history(session_id: str) -> ChatMessageHistory:
    if session_id not in _session_store:
        _session_store[session_id] = ChatMessageHistory()
    return _session_store[session_id]


rental_agent = create_tool_calling_agent(llm, tools, prompt)
rental_agent_executor = AgentExecutor(
    agent = rental_agent, tools = tools, verbose=False, return_intermediate_steps=True
)

conversational_executor = RunnableWithMessageHistory(
    rental_agent_executor,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history"
)

session_cfg = {"configurable": {"session_id": "yuqiu-local-dev"}}

In [6]:
query_message = "What's the date now?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

Key 'additionalProperties' is not supported in schema, ignoring


Key 'additionalProperties' is not supported in schema, ignoring


In [7]:
response.get('output')

'{"Tool call history": "default_api.get_date_time_now()", "Reasoning history": "The user asked for the current date, so I used the `get_date_time_now` tool to retrieve it."}\n2025-09-09 01:31:12.903294'

In [9]:
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

'{"Tool call history": "default_api.get_date_time_now()", "Reasoning history": "The user asked for the current date, so I used the `get_date_time_now` tool to retrieve it."}\n2025-09-09 01:31:12.903294'

In [10]:
query_message = "Have you helped me before?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

Key 'additionalProperties' is not supported in schema, ignoring


In [11]:
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

"As a large language model, I don't have memory of past conversations. Each interaction is fresh, and I don't retain personal information or history from previous exchanges."

In [12]:
query_message = "My name is Yuqiu Gan."
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

Key 'additionalProperties' is not supported in schema, ignoring


In [13]:
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

"It's nice to meet you, Yuqiu Gan! How can I help you today?"

In [14]:
query_message = "What is my name?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

Key 'additionalProperties' is not supported in schema, ignoring


'Your name is Yuqiu Gan.'

In [15]:
query_message = "Which date is today?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


'{"Tool call history": "default_api.get_date_time_now()", "Reasoning history": "The user asked for the current date, so I used the `get_date_time_now` tool to retrieve it."}\nToday\'s date is 2025-09-09.'

In [16]:
query_message = "I want to find apartment in weehawken, NJ area. My buget is $4500 and hope to find at least 2 bedrooms' unit. Can you list some available options?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


In [19]:
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

# import json
# json.loads(output2)
output2

'{"Tool call history": "default_api.get_date_time_now()\\ndefault_api.agent_query_listing_table(spec={\'limit\': 25, \'order_by\': [{\'direction\': \'asc\', \'field\': \'listing_price\'}], \'select\': [\'external_id\', \'address_street\', \'address_city\', \'bedrooms\', \'bathrooms\', \'listing_price\', \'home_type\', \'hdp_url\', \'updated_at\'], \'where\': [{\'field\': \'address_city\', \'op\': \'ilike\', \'value\': \'%Weehawken%\'}, {\'field\': \'bedrooms\', \'op\': \'>=\', \'value\': 2}, {\'field\': \'listing_price\', \'op\': \'<=\', \'value\': 4500}, {\'field\': \'home_type\', \'op\': \'ilike\', \'value\': \'%APARTMENT%\'}, {\'field\': \'updated_at\', \'op\': \'>=\', \'value\': \'2025-09-04\'}]})", "Reasoning history": "The user requested apartment listings in Weehawken, NJ, with a budget of $4500 and at least 2 bedrooms. I first checked the current date using `get_date_time_now` to ensure I could filter for recent listings. Then, I queried the local database using `agent_query_li

In [20]:
query_message = "Can you give me the detail of the last listing?"
# response = rental_agent_executor.invoke({"input": query_message})
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

output2

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


'{"Tool call history": "default_api.agent_query_listing_table(spec={\'select\': [], \'where\': [{\'field\': \'external_id\', \'op\': \'=\', \'value\': \'349542017\'}]})", "Reasoning history": "The user asked for details about a specific listing. I used the `agent_query_listing_table` tool to retrieve all available information for the listing with the provided external ID from the local database. I am now presenting the detailed information to the user."}\nHere are the details for the listing at 1300 Avenue At Port, Weehawken, NJ (Riverparc at Port Imperial) with an external ID of 349542017:\n\n*   **Address:** 1300 Avenue At Port, Weehawken, NJ\n*   **Bedrooms:** 2\n*   **Bathrooms:** 2\n*   **Listing Price:** $4416\n*   **Home Type:** Apartment\n*   **Availability Date:** 2025-09-18\n*   **Listing URL:** https://www.zillow.com/apartments/weehawken-nj/riverparc-at-port-imperial/ChVBh3/#unit-349542017\n*   **Unit Amenities:** Wood, Dryer, View, Washer, Walk-In Closet(s)\n*   **Photos:**

In [21]:
query_message = "My company is at '277 Park Ave, New York, NY 10172', if I move to this place how should I transit to my company?"
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

output2

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


'{"Tool call history": "default_api.query_google_maps(destination=\'277 Park Ave, New York, NY 10172\', origin=\'1300 Avenue At Port, Weehawken, NJ\', origin_name=\'1300 Avenue At Port\')", "Reasoning history": "The user asked for transit information from the previously discussed listing to their company address. I used the `query_google_maps` tool to get public transit directions, providing the listing address as the origin and the company address as the destination. I am now presenting the transit details to the user."}\nHere\'s a public transit route from 1300 Avenue At Port, Weehawken, NJ to 277 Park Ave, New York, NY 10172:\n\nThe total trip duration is approximately **34 minutes** and covers a distance of **4.2 miles**.\n\nHere are the steps:\n\n1.  **Bus:** Take the 158 Bus towards New York via River Road from Lincoln Harbor to Port Authority Bus Terminal. (Approx. 10 minutes)\n2.  **Walk:** Walk from Port Authority Bus Terminal to Times Sq–42 St subway station.\n3.  **Subway:**

In [22]:
query_message = "That takes a lot of time. In the listings you provided me ealier, is there any one I can control my transit time in 20 mins?"
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

output2

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


'{"Tool call history": "default_api.query_google_maps(destination=\'277 Park Ave, New York, NY 10172\', origin=\'4612 Park Ave #101A, Weehawken, NJ 07086\', origin_name=\'4612 Park Ave #101A\')\\ndefault_api.query_google_maps(destination=\'277 Park Ave, New York, NY 10172\', origin=\'61 46th St #2, Weehawken, NJ 07086\', origin_name=\'61 46th St #2\')\\ndefault_api.query_google_maps(destination=\'277 Park Ave, New York, NY 10172\', origin=\'4916 Park Ave, Weehawken, NJ 07086\', origin_name=\'4916 Park Ave\')\\ndefault_api.query_google_maps(destination=\'277 Park Ave, New York, NY 10172\', origin=\'1300 Avenue At Port, Weehawken, NJ\', origin_name=\'1300 Avenue At Port\')", "Reasoning history": "The user asked to find a listing with a transit time of 20 minutes or less to their company. I have already retrieved transit information for all the provided listings in the previous turn. I am now analyzing the results to see if any meet the criteria."}\nI have checked the transit times for al

In [23]:
query_message = "I see, in that case, which apartment you think suit me best? I want to take both the convinience of transit and the price into consideration. Give me reasons."
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

output2

Key 'additionalProperties' is not supported in schema, ignoring


"Considering both transit convenience and price, I would recommend one of the **2-bedroom, 2-bathroom units at 1300 Avenue At Port, Weehawken, NJ (Riverparc at Port Imperial)**.\n\nHere's why:\n\n*   **Transit Convenience:** These units offer the shortest transit time to your company at 277 Park Ave, New York, NY 10172, at approximately **34 minutes**. This is a significant improvement compared to the other listings, which have commute times closer to 48-49 minutes. While it's still more than your desired 20 minutes, it's the most convenient option among the available listings.\n\n*   **Price:** While these units are at the higher end of your $4500 budget (starting from $4138), they still fall within your specified range. The other listings are more affordable, but the trade-off is a much longer commute.\n\n**Recommendation:**\n\nSpecifically, the unit at **1300 Avenue At Port, Weehawken, NJ** with an external ID of **2063201591** is priced at **$4138**. This unit offers the best balan

In [24]:
query_message = "I would love to live in a place that has natural lights. Is there any in your result?"
response = conversational_executor.invoke({"input": query_message}, config=session_cfg)
output = response.get('output')
output2 = output.strip()

import re 
if output2.startswith("```"):
    output2 = re.sub(r"^```[a-zA-Z0-9]*\n", "", output2)
    output2 = re.sub(r"\n```$", "", output2)

output2

Key 'additionalProperties' is not supported in schema, ignoring
Key 'additionalProperties' is not supported in schema, ignoring


'Yes, I found two listings that explicitly mention having natural light:\n\n1.  **4612 Park Ave #101A, Weehawken, NJ 07086**\n    *   The general description states: "Tons of sunlight Plenty of windows"\n\n2.  **61 46th St #2, Weehawken, NJ 07086**\n    *   The general description states: "Tons of natural light"\n\nFor the other listings, particularly those at 1300 Avenue At Port (Riverparc at Port Imperial), the detailed descriptions regarding natural light are not available in my current records.'

In [26]:
_session_store

{'yuqiu-local-dev': InMemoryChatMessageHistory(messages=[HumanMessage(content="What's the date now?", additional_kwargs={}, response_metadata={}), AIMessage(content='{"Tool call history": "default_api.get_date_time_now()", "Reasoning history": "The user asked for the current date, so I used the `get_date_time_now` tool to retrieve it."}\n2025-09-09 01:31:12.903294', additional_kwargs={}, response_metadata={}), HumanMessage(content='Have you helped me before?', additional_kwargs={}, response_metadata={}), AIMessage(content="As a large language model, I don't have memory of past conversations. Each interaction is fresh, and I don't retain personal information or history from previous exchanges.", additional_kwargs={}, response_metadata={}), HumanMessage(content='My name is Yuqiu Gan.', additional_kwargs={}, response_metadata={}), AIMessage(content="It's nice to meet you, Yuqiu Gan! How can I help you today?", additional_kwargs={}, response_metadata={}), HumanMessage(content='What is my n