In [None]:
!pip install langchain-google-genai langchain langchain_community beautifulsoup4 requests pandas chromadb

Collecting langchain_community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting chromadb
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Using cached google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1

#API Keys

In [None]:
import os, getpass
os.environ["GOOGLE_API_KEY"] = getpass.getpass("API Key:")

API Key:··········


#Web Scraping

In [None]:
import requests, json, re, time, random
from bs4 import BeautifulSoup

HEADERS = {"User-Agent": "Mozilla/5.0"}

def scrape_magicbricks(city="Visakhapatnam", pages=5):
    data = []
    for p in range(1, pages+1):
        url = f"https://www.magicbricks.com/property-for-rent/residential-real-estate?&proptype=Multistorey-Apartment,Builder-Floor-Apartment,Penthouse,Studio-Apartment&cityName={city}&page={p}"
        r = requests.get(url, headers=HEADERS, timeout=10)
        soup = BeautifulSoup(r.text, "lxml")
        cards = soup.select(".mb-srp__list")
        for c in cards:
            d = {
                "title": c.select_one(".mb-srp__card--title").text.strip() if c.select_one(".mb-srp__card--title") else None,
                "price": c.select_one(".mb-srp__card__price--amount").text.strip() if c.select_one(".mb-srp__card__price--amount") else None,
                "sqft": c.select_one(".mb-srp__card__summary--value").text.strip() if c.select_one(".mb-srp__card__summary--value") else None,
                "locality": c.select_one(".mb-srp__card--title+div").text.strip().split(",")[0] if c.select_one(".mb-srp__card--title+div") else None,
                "url": "https://www.magicbricks.com" + c.select_one("a")["href"] if c.select_one("a") else None
            }
            data.append(d)
        time.sleep(random.uniform(1,3))
    return data

vizag_listings = scrape_magicbricks(pages=5)

#Scraped Data to Json

In [None]:
with open("vizag_rentals.json", "w", encoding="utf-8") as f:
    json.dump(vizag_listings, f, ensure_ascii=False, indent=2)

#Building VectorDB with Scraped Data

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.schema import Document

docs = [Document(
    page_content=f"{row['title']} in {row['locality']} priced at {row['price']} covering {row['sqft']}",
    metadata=row
) for row in vizag_listings]

vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
    collection_name="vizag_rentals"
)

#LangChain Agent

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import Tool, initialize_agent, AgentType
from langchain.tools.retriever import create_retriever_tool

retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
retriever_tool = create_retriever_tool(
    retriever,
    name="vizag_rental_search",
    description="Search for residential rental properties in Visakhapatnam with price, area and locality filters."
)

llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)

tools = [retriever_tool]

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

In [None]:
# 1. Re-build the vector store with correct Document structure
from langchain_core.documents import Document   # NEW import

docs = [
    Document(
        page_content=f"{row['title']} in {row['locality']} priced at {row['price']} covering {row['sqft']}",
        metadata={k: v for k, v in row.items()}   # keep metadata flat
    )
    for row in vizag_listings
]

# 2. Recreate vectorstore
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
    collection_name="vizag_rentals",
    persist_directory="./chroma_db"   # optional persistence
)

# 3. Create the retriever tool
from langchain.tools.retriever import create_retriever_tool
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
vizag_tool = create_retriever_tool(
    retriever,
    name="vizag_rental_search",
    description="Search for residential rental properties in Visakhapatnam with price, area and locality filters."
)

# 4. Re-create the agent with modern syntax
from langchain.agents import create_react_agent, AgentExecutor
from langchain import hub

prompt = hub.pull("hwchase17/react")   # standard ReAct prompt
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)

agent = create_react_agent(llm, [vizag_tool], prompt)
agent_executor = AgentExecutor(agent=agent,
                               tools=[vizag_tool],
                               verbose=True,
                               handle_parsing_errors=True)





In [None]:
# 5. Query the agent (new call style)
agent_executor.invoke({"input": "What is average rent of Dwaraka Nagar?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to search for rental properties in Dwaraka Nagar, Visakhapatnam to get an idea of the average rent.  I'll use the `vizag_rental_search` function to find relevant listings.

Action: vizag_rental_search
Action Input: query='rental properties in Dwaraka Nagar Visakhapatnam'
[0m[36;1m[1;3m3 BHK Flat for Rent in Dwarakanagar, Visakhapatnam in None priced at ₹18,000 covering Furnished

3 BHK Flat for Rent in Dwarakanagar, Visakhapatnam in None priced at ₹18,000 covering Furnished

3 BHK Flat for Rent in Dwarakanagar, Visakhapatnam in None priced at ₹18,000 covering Furnished

2 BHK Flat for Rent in Dwarakanagar, Visakhapatnam in None priced at ₹26,000 covering Semi-Furnished

2 BHK Flat for Rent in Dwarakanagar, Visakhapatnam in None priced at ₹26,000 covering Semi-Furnished[0m[32;1m[1;3mQuestion: What is average rent of Dwaraka Nagar?
Thought: I need to search for rental properties in Dwaraka Nagar, Visakhapa

{'input': 'What is average rent of Dwaraka Nagar?',
 'output': 'The average rent in Dwaraka Nagar, based on the limited data, is approximately ₹21,200.  This is a rough estimate and may not be representative of the entire area.'}

In [None]:
# 5. Query the agent (new call style)
agent_executor.invoke({"input": "How many apartments are there in Visakhapatnam, with 2BHK?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to use the `vizag_rental_search` function to find apartments in Visakhapatnam that are 2BHK.  I don't have a way to get an exact count, but I can search and see how many results I get.  This will give me an estimate.  The search will likely return a limited number of results, not a complete count of all 2BHK apartments in the city.

Action: vizag_rental_search
Action Input: query='2BHK apartments in Visakhapatnam'
[0m[36;1m[1;3m2 BHK Flat for Rent in Visakhapatnam in None priced at ₹21,000 covering Semi-Furnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹21,000 covering Semi-Furnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹21,000 covering Semi-Furnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹12,000 covering Semi-Furnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹12,000 covering Semi-Furnished[0m[32;1m[1;3mThought: The search returned 5 re

{'input': 'How many apartments are there in Visakhapatnam, with 2BHK?',
 'output': 'Based on a limited search, at least 5 2BHK apartments are available for rent in Visakhapatnam.  However, this is likely a small fraction of the total number.  A precise count is not possible with the current tools.'}

In [None]:
# 5. Query the agent (new call style)
agent_executor.invoke({"input": "Can you say what is the cheapest place to live in Visakhapatnam"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To find the cheapest place to live in Visakhapatnam, I need to search for rental properties with a focus on the lowest price.  I'll use the `vizag_rental_search` function to retrieve rental listings and then analyze the results to identify the lowest price.

Action: vizag_rental_search
Action Input: query='cheap rental apartments Visakhapatnam'
[0m[36;1m[1;3m2 BHK Flat for Rent in Visakhapatnam in None priced at ₹7,500 covering Unfurnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹7,500 covering Unfurnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹7,500 covering Unfurnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹15,000 covering Semi-Furnished

2 BHK Flat for Rent in Visakhapatnam in None priced at ₹15,000 covering Semi-Furnished[0m[32;1m[1;3mQuestion: Can you say what is the cheapest place to live in Visakhapatnam
Thought:To find the cheapest place to live in Vis

{'input': 'Can you say what is the cheapest place to live in Visakhapatnam',
 'output': 'Based on the limited data from the search, the cheapest rental listing found in Visakhapatnam was ₹7,500.  However, the locality was not specified, so this does not definitively identify the cheapest *place* to live.  More data is needed for a complete answer.'}

In [None]:
# 5. Query the agent (new call style)
agent_executor.invoke({"input": "What is the Rent of 3 BHK Flat for Rent in Vaisakhi SkyPark, Yendada, Visakhapatnam"})



[1m> Entering new AgentExecutor chain...[0m


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 32
}
].
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-1.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 50
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 29
}
].
  quota_metric: "generativelanguage.googleapis.com/generate_conten

KeyboardInterrupt: 

#ADDITIONAL DATA SCRAPING

In [None]:
def scrape_housing(city="visakhapatnam", pages=3):
    """
    Quick scraper for https://housing.com (no login needed)
    """
    base = f"https://housing.com/in/buy/rent/search?f={city}&page="
    data = []
    for p in range(1, pages+1):
        url = base + str(p)
        r = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(r.text, "lxml")
        for card in soup.select("[data-testid='listing-card']"):
            price = card.select_one("[data-testid='price']")
            sqft  = card.select_one("[data-testid='area']")
            loc   = card.select_one("[data-testid='locality']")
            data.append({
                "title": card.select_one("h2").text.strip(),
                "price": price.text.strip() if price else None,
                "sqft":  sqft.text.strip()  if sqft  else None,
                "locality": loc.text.strip() if loc else None,
                "source": "housing.com"
            })
        time.sleep(random.uniform(1,3))`
    return data

# Merge into the same JSON file
more = scrape_housing()
vizag_listings.extend(more)

with open("vizag_rentals.json", "w", encoding="utf-8") as f:
    json.dump(vizag_listings, f, ensure_ascii=False, indent=2)

In [None]:
import re

def parse_number(txt):
    digits = re.sub(r'[^\d,]', '', str(txt))
    return int(digits.replace(',', '')) if digits else None

for row in vizag_listings:
    row["price_num"] = parse_number(row.get("price"))
    row["sqft_num"]  = parse_number(row.get("sqft"))

In [None]:
docs = [
    Document(
        page_content=f"{row['title']} in {row['locality']} priced at {row['price']} covering {row['sqft']}",
        metadata=row          # now includes price_num & sqft_num
    )
    for row in vizag_listings
]

vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"),
    collection_name="vizag_rentals"
)

In [None]:
import pandas as pd
df = pd.DataFrame(vizag_listings).dropna(subset=["price_num", "sqft_num"])

def avg_rent_area(locality: str, max_rent: int):
    mask = (
        (df["locality"].str.contains(locality, case=False, na=False)) &
        (df["price_num"] <= max_rent)
    )
    subset = df[mask]
    if subset.empty:
        return "No matching listings found."
    return (
        f"Found {len(subset)} listings in {locality} ≤ ₹{max_rent:,}.\n"
        f"Average rent : ₹{subset['price_num'].mean():,.0f}\n"
        f"Average area : {subset['sqft_num'].mean():.0f} sqft"
    )

from langchain.tools import StructuredTool
avg_tool = StructuredTool.from_function(
    func=avg_rent_area,
    name="average_rent_area",
    description="Compute average rent & area given locality & max rent (₹)"
)

tools = [retriever_tool, avg_tool]

In [None]:
agent_executor = AgentExecutor(
    agent=create_react_agent(llm, tools, hub.pull("hwchase17/react")),
    tools=tools,
    verbose=True,
    handle_parsing_errors=True
)

agent_executor.invoke({
    "input": "I need a 2BHK in Dwaraka Nagar with rent below ₹15k. Show average rent and area."
})

#STREAM LIT