In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
import re
import os
import json
import aiofiles
import asyncio
from typing import Dict, Any
from datetime import datetime
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
import os
from dotenv import load_dotenv
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
load_dotenv()

  from tqdm.autonotebook import tqdm


True

In [3]:
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs\
)

In [4]:
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
index_name = 'realestatebot'
namespace = 'docs-weblinks'

In [5]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.core import Settings, VectorStoreIndex, get_response_synthesizer  # noqa: F401
from llama_index.core.chat_engine.context import ContextChatEngine
from llama_index.core.llms import ChatMessage
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.postprocessor import LongContextReorder
from llama_index.core.retrievers import VectorIndexRetriever
# from llama_index.llms.gemini import Gemini
from llama_index.llms.groq import Groq

In [6]:
from llama_index.core.query_engine import RetrieverQueryEngine
from crewai import Agent, Task, Crew, LLM
from crewai_tools import LlamaIndexTool

In [7]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

In [53]:
Settings.embed_model=embed_model
Settings.llm = Groq(api_key=os.environ.get("GROQ_API_KEY"), model = 'llama3-70b-8192')
# Settings.llm = Gemini(api_key=os.environ.get("GEMINI_API_KEY"), model ='models/gemini-1.5-flash')

In [54]:
from llama_index.core import PromptTemplate

In [55]:
template = (
    """ You are an assistant for answering real estate-related queries. Use the following pieces of retrieved information to answer questions about properties, such as available listings, property details (e.g., number of BHKs, facilities), and other relevant information. Provide clear and direct answers as if you're responding to a user's inquiry, without mentioning any documents or contexts. If you don't know the answer, just say that you don't know.

You should always answer using the provided context and only the context from the document.

Question: {query_str} Context: {context_str} Answer: """
)

qa_template = PromptTemplate(template)

In [91]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core import ChatPromptTemplate

In [92]:
chat_text_qa_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            """ You are an assistant for answering real estate-related queries. Use the following pieces of retrieved information to answer questions about properties, such as available listings, property details (e.g., number of BHKs, facilities), and other relevant information. Provide clear and direct answers as if you're responding to a user's inquiry, without mentioning any documents or contexts. If you don't know the answer, just say that you don't know.

You should always answer using the provided context and only the context from the document."""
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information and not prior knowledge, "
            "answer the query.\n"
            "Query: {query_str}\n"
            "Answer: "
        ),
    ),
]
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

In [94]:
chat_refine_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            "You are an expert Q&A system that strictly operates in two modes "
            "when refining existing answers:\n"
            "1. **Rewrite** an original answer using the new context.\n"
            "2. **Repeat** the original answer if the new context isn't useful.\n"
            "Never reference the original answer or context directly in your answer.\n"
            "When in doubt, just repeat the original answer."
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "New Context: {context_msg}\n"
            "Query: {query_str}\n"
            "Original Answer: {existing_answer}\n"
            "New Answer: "
        ),
    ),
]
refine_template = ChatPromptTemplate(chat_refine_msgs)

In [95]:
def pinecone_vectorstore():
    index = pc.Index(index_name)
    pinecone_vector_store = PineconeVectorStore(pinecone_index=index, namespace='docs-weblinks')

    return VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)

reorder = LongContextReorder()
vector_index_retriever = VectorIndexRetriever(
index=pinecone_vectorstore(),
namespace=namespace,
postprocessors=[reorder],
similarity_top_k=15),

index=pinecone_vectorstore()
namespace=namespace,
response_synthesizer = get_response_synthesizer(response_mode='refine', text_qa_template=text_qa_template, refine_template=refine_template)
postprocessors = [reorder]

In [96]:
query_engine = RetrieverQueryEngine(
        retriever=vector_index_retriever[0],
        response_synthesizer=response_synthesizer,
        node_postprocessors=postprocessors,
)

In [99]:
result = query_engine.query("What are the indian laws mentioned")

In [100]:
result

Response(response='There is only one Indian law mentioned in the provided text: the Registration Act, 1908.', source_nodes=[NodeWithScore(node=TextNode(id_='ImportantLegalPropertyDocuments.txt#chunk_2', embedding=None, metadata={'creation_date': '2024-09-29', 'last_modified_date': '2024-09-29', 'file_name': 'IMPORTANT LEGAL PROPERTY DOCUMENTS.txt', 'file_size': 30649.0, 'file_path': 'E:\\Codes\\Data Sciene\\AI\\Real_Estate_Chatbot\\app\\db\\extracted_output\\pdf-documents\\IMPORTANT LEGAL PROPERTY DOCUMENTS.txt'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Applicable Laws\nIn India, basically the following laws govern property matters –\n•\nThe Transfer of Property Act, 1882\n•\nThe Special Relief Act, 1963\n•\nThe Indian Contract Act, 1872\n•\nThe Registration Act, 1908\n•\nThe Indian Stamp Act, 1899\n•\nThe Indian Succession Act, 1925\n•\nThe Indian Easements Act, 1882\n•\nThe Real Estate (Regulation and Development) Act, 2016\nINDIAN PROP

In [42]:
query_tool = LlamaIndexTool.from_query_engine(
    query_engine,
    name="Real Estate Bot",
    description="This tool is a real estate chatbot that helps answer property-related queries, including available listings, BHK details, facilities, and other relevant real estate information using context from the retrieved documents."
)

In [43]:
llm = LLM(
    model = 'groq/llama-3.1-70b-versatile',
    api_key=os.environ.get("GROQ_API_KEY")
)

In [44]:
# Define the Real Estate Researcher agent
real_estate_agent = Agent(
    role="Real Estate Information Provider",
    goal="Provide tailored responses and retrieve relevant information from the document according to the user's query: {input}",
    backstory="""An expert in finding and filtering relevant information from the provided documents or databases.
    Your goal is to give concise and accurate information based on the user's query.""",
    llm=llm,
    tools=[query_tool],  # Pass the real estate bot query tool
    verbose=True,
)

# Define the task for the agent
real_estate_task = Task(
    agent=real_estate_agent,
    description="""Your task is to filter and retrieve the most relevant information from the document based on the user's query: {input}.
    Never provide any information outside of the document. If you don't know the answer, simply say that you don't know. Never give incorrect information.""",
    expected_output="The output should be a single paragraph.",
    output_file='./output_crew_ai.txt'
)

# Create the Crew instance
my_crew = Crew(agents=[real_estate_agent], tasks=[real_estate_task])



In [51]:
crew = my_crew.kickoff(inputs={"input": "Are there any properties in jaipur"})

[1m[95m# Agent:[00m [1m[92mReal Estate Information Provider[00m
[95m## Task:[00m [92mYour task is to filter and retrieve the most relevant information from the document based on the user's query: Are there any properties in jaipur.
    Never provide any information outside of the document. If you don't know the answer, simply say that you don't know. Never give incorrect information.[00m


[1m[95m# Agent:[00m [1m[92mReal Estate Information Provider[00m
[95m## Thought:[00m [92mThought: The user is searching for properties in Jaipur, I should use the Real Estate Bot tool to query the available properties.[00m
[95m## Using tool:[00m [92mReal Estate Bot[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"properties in Jaipur\"}"[00m
[95m## Tool Output:[00m [92m
You can find various properties for sale in Jaipur, including Flats, across different localities such as Bhankrota, with different configurations and varying sizes, with prices starting from different range

In [53]:
crew.raw

'Yes, there are various properties available in Jaipur, including but not limited to: 1. Office Space for Rent 2. Flats / Apartments for Rent 3. Warehouse / Godown for Rent 4. Showrooms for Rent 5. Independent House for Rent 6. Commercial Shops for Rent 7. Builder Floor for Rent 8. Commercial Plots for Rent 9. Factory for Rent 10. Industrial Land for Rent 11. Hotel & Restaurant for Rent 12. Residential Land / Plots for Rent 13. Farm / Agricultural Land for Rent 14. Business Center for Rent 15. Guest House for Rent 16. Villa for Rent 17. Farm House for Rent 18. Studio Apartments for Rent'