In [1]:
#IMPORTS
import json
import os
from uuid import uuid4

from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings

# from langchain.memory import ConversationSummaryMemory
# from langchain.chains import ConversationChain
# from langchain.chains import RetrievalQA
from langchain.chains import create_history_aware_retriever
from langchain.output_parsers import PydanticOutputParser
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import ConfigurableField
from langchain_core.messages import HumanMessage, AIMessage


from pydantic import BaseModel, Field, validator

In [2]:
#SETUP OPEN AI CREDENTIALS
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"
with open("voc_key") as f:
    key = f.read()
    assert len(key) > 0
    os.environ["OPENAI_API_KEY"] = key

MODEL_NAME = "gpt-3.5-turbo"

# Generate real estate property listings with an LLM

In [5]:
# Setup base LLM. 
# Note how temperature is configurable.
llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0).configurable_fields(
    temperature=ConfigurableField(
        id="llm_temperature",
        name="LLM Temperature",
        description="The measure of creativity the LLM allowed as a decimal number between 0 and 1. With 1 being the most creative in its responses."
    )   
)

In [57]:
# Define listings
class Listing(BaseModel):
    neighborhood: str = Field(description="Name of a neighborhood in city or village.")
    price: str = Field(description="Price of a property in US Dollars ($).")
    bedrooms: int = Field(description="Number of bedrooms the property has.")
    bathrooms: int = Field(description="Number of bathrooms the property has.")
    house_size: str = Field(description="The number of squarefeet (sqft) of liveable space the property has.")
    description: str = Field(description="A short description of the property meant to entice potential buyers.")
    neighborhood_description: str = Field(description="A short description of the neighborhood the property is located in. Meant to attract potential buyers.")

    def to_text(self):
        return """
Neighborhood: {neighborhood} 
Price: {price}
Bedrooms: {bedrooms}
Bathrooms: {bathrooms}
House Size: {house_size}

Description: {description}

Neighborhood Description: {neighborhood_description}  
        """.format(
            neighborhood=self.neighborhood, 
            price=self.price, 
            bedrooms=self.bedrooms, 
            bathrooms=self.bathrooms, 
            house_size=self.house_size, 
            description=self.description,
            neighborhood_description=self.neighborhood_description
        )

example_1 = """
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

example_2 = """
Neighborhood: The Ghetto
Price: $150,000
Bedrooms: 2
Bathrooms: 1
House Size: 1,500 sqft

Description: An old house in a rather poor condition. It needs a lot of work before you can call this your home. A real fixer upper. But still good value for the asking price.

Neighborhood Description: Welcome to this family friendly neighborhood, set in the outskirts of the city. Close to schools, supermarkets, parks and public transport. The ideal neighborhood for a young starting families.
"""

parser = PydanticOutputParser(pydantic_object=Listing)

print(parser.get_format_instructions())

listing_prompt = PromptTemplate(
    template="""
Generate a listing of a random real estate property for a given city.
You can be creative with names and descriptions. 
The number of bedrooms is typically somewhere between 1 and 6 and the number of bathrooms between 1 and 3. 
This usually depends on the size of the house, bigger houses tend to have more rooms.
The size of a house is usually between 1000 and 3000 sqft.
The price of the house should range somewhere between $100,000 and $1,000,000, and is heavily influenced by the other details.
Bigger houses, with more rooms and in better neighborhoods are generally more expensive.
Smaller houses, with fewer rooms or in impoverished neighborhoods go for much lower prices.

Here are a few examples:
###
{examples}
###

{format_instructions}
""",
    input_variables=["city", "examples"],
    partial_variables={"format_instructions": parser.get_format_instructions}
)

query = listing_prompt.format(city="New York", examples="\n--\n".join([example_1, example_2]))
print(query)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"neighborhood": {"description": "Name of a neighborhood in city or village.", "title": "Neighborhood", "type": "string"}, "price": {"description": "Price of a property in US Dollars ($).", "title": "Price", "type": "string"}, "bedrooms": {"description": "Number of bedrooms the property has.", "title": "Bedrooms", "type": "integer"}, "bathrooms": {"description": "Number of bathrooms the property has.", "title": "Bathrooms", "type": "integer"}, "house_size": {"description": "The number of squarefeet (sqft) of liveable space the p

In [58]:
# Generate / fetch listings.
def generate_listings(n):
    listings = []
    for i in range(0, n):
        output = llm.with_config(configurable={"llm_temperature": 1}).predict(query)
        result = parser.parse(output)
        # print(output)
        listings.append(result)

    with open("data.json", "w") as f:
        json.dump([l.model_dump() for l in listings], f)
        
    return listings

def fetch_listings():
    with open("data.json") as f:
        listings = json.load(f)
        return [Listing(**l) for l in listings]
        
listings = generate_listings(30)
# listings = fetch_listings()
len(listings)

30

In [64]:
#Setup vector DB and store the listings.
embeddings = OpenAIEmbeddings()

db = Chroma(
    collection_name="listings",
    embedding_function=embeddings,
    persist_directory="./chroma_db"
)

def store_listings():
    docs = [Document(page_content=l.to_text(), metadata={"source": "llm_generated"}, id=i) for i, l in enumerate(listings)]
    uuids = [str(uuid4()) for _ in range(len(docs))]
    
    return db.add_documents(documents=docs, ids=uuids)

store_listings()

['9ec34271-8023-4c8c-8baf-f31c630c06e4',
 'c0d88da7-e41d-46be-8019-c1263b4914cd',
 '830a2170-cc4c-4618-8e34-99c58d285152',
 '6e35c059-7650-4a94-af0e-ee1c327e693d',
 '02a93f23-91bf-442b-91ce-6d1c3fd792a7',
 '3dc5e703-87d9-44d0-9397-4d30edb167f1',
 '3103f3d6-ae25-4857-8d86-217e6e5a92d4',
 '9da1b933-fc0c-427a-87c9-5700a5305219',
 'daf56ac4-ffa7-4b27-989c-5cf0ccccbcee',
 '52f4aca5-6980-4a47-b29a-203dc630228c',
 '7cef1f7e-3f1d-4a37-8669-4300e1c99fb7',
 'a4c5e1b7-3ce6-4104-a3ac-f686b79a61b8',
 'bccfce81-54d5-4ee7-be5e-d5cd63d4315b',
 'b6cd4b9a-746e-4bed-af36-a60d3e07c85f',
 'c7a7aaf6-8af4-498d-9215-6049e8f247e5',
 '1e003cda-b3f3-4864-bcbd-ebf1c28af15a',
 '11d5b044-406e-4442-b773-d7b7acf292fb',
 'de9f1f4c-04c1-435e-b8c5-a000de2528fc',
 '17449444-4dc0-4382-895e-1e977767d9ff',
 '1e17559b-08ec-4550-8776-e988cf63d35d',
 'b1aa3028-322f-4912-ac03-a61780795fb8',
 'c4f36ed9-37b9-41b8-b67a-3a8b928ef2bd',
 '202818af-01a0-4675-93ec-c11d0429e5a3',
 'a37b2299-5331-42a4-8941-054a3412dd5a',
 '2ccd84c7-6c41-

# Setup the AI Real Estate Agent that has access to the stored listings

In [65]:
# Setup llm chain
system_prompt = """
You are an AI Real Estate Agent that will recommend the most suited listings based on the client's persona.
Do not invent any new listings, use only the ones provided in the context.
If the client wants a three bedroom house, don't suggest a one bedroom one.
Or if the client has a budget of $250,000, don't suggest a house that is a lot more expensive, like over $300,000.
"""

question_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, db.as_retriever(), question_prompt
)

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

system_prompt_2 = """
You are an AI Real Estate Agent giving recommendations to a client.
Use the persona you've built from the chat history and the listings from the context.
If you don't know the answer, say you can't find any listings that match their wishes.

{context}.
"""

question_prompt_2 = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt_2),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

qa_chain = create_stuff_documents_chain(llm, question_prompt_2)
rag_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

In [70]:
# BUILD FAKE CHAT HISTORY
## Note that we're building a fake chat history.
## In reality this would be done through a Q&A chain.

chat_history = []

questions = [
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",
    "What is your family situation like?",
    "Do you intend to do any remodelling yourself?"
]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters.",
    "Two adults with a toddler of 18 months. Hopefully with another one underway within the next year.",
    "We prefer not to have to do any remodelling. But we open to it if it means we can realise our dream home."
]

for q,a in zip(questions, answers):
    chat_history.extend([AIMessage(content=q), HumanMessage(content=a)])
chat_history

[AIMessage(content='How big do you want your house to be?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='A comfortable three-bedroom house with a spacious kitchen and a cozy living room.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='What are 3 most important things for you in choosing this property?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='A quiet neighborhood, good local schools, and convenient shopping options.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Which amenities would you like?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Which transportation options are important to you?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Easy access to a reliable bus line, proximity to a major highw

In [71]:
q_1 = "Could you list me the top 5 properties that best fit our wishes? We have a maximum budget of $100,000"
a_1 = rag_chain.invoke({
    "input": q_1,
    "chat_history": chat_history
})

a_1["answer"]

"I'm sorry, I couldn't find any listings that match your criteria within your budget. Would you like me to adjust the search parameters or provide more options?"

In [72]:
chat_history.extend(
    [HumanMessage(content=q_1), AIMessage(content=a_1["answer"])]
)
q_2 = "We can raise our budget to $600,000. Can you find the top 5 listings for that budget?"
a_2 = rag_chain.invoke({
    "input": q_2,
    "chat_history": chat_history
})

print(a_2["answer"])

Based on your preferences for a quiet neighborhood, good local schools, convenient shopping options, a backyard for gardening, a two-car garage, a modern, energy-efficient heating system, easy access to a reliable bus line, proximity to a major highway, bike-friendly roads, and a balance between suburban tranquility and urban amenities, here are the top 5 listings that fit your criteria within your new budget of $600,000:

1. **Neighborhood: Oak Park**
   - Price: $550,000
   - Bedrooms: 4
   - Bathrooms: 2
   - House Size: 2,200 sqft
   - Description: Beautifully renovated 4-bedroom, 2-bathroom home in a family-friendly neighborhood with tree-lined streets and charming cafes. Modern kitchen, master suite with en-suite bathroom, and lush backyard. Easy access to parks, schools, and shopping centers.

2. **Neighborhood: Sunset Hills**
   - Price: $650,000
   - Bedrooms: 4
   - Bathrooms: 3
   - House Size: 2,500 sqft
   - Description: Exquisite 4-bedroom, 3-bathroom home in a prestigiou

In [73]:
chat_history.extend(
    [HumanMessage(content=q_2), AIMessage(content=a_2["answer"])]
)
q_3 = "I really like the house on Sunset Hills. It's a bit over budget though. Do you think we could negotiate a lower price with owner?"
a_3 = rag_chain.invoke({
    "input": q_3,
    "chat_history": chat_history
})

a_3["answer"]

'I can definitely reach out to the listing agent and inquire about the possibility of negotiating a lower price for the house in Sunset Hills. Would you like me to proceed with that and keep you updated on any developments?'

# Conclusion

This application could be improved in a couple of ways.

The most obvious being an interactive app, like Gradio, where the client can answer real questions instead of the hardcoded ones.

Some other noteable areas of improvement I found while testing:

- *For the generation of listings*:
    - The creativity of the names, descriptions and pricing of the listings isn't very diverse.
 
I've played around with the temperature but that didn't help me much.
If we needed to improve on this, we'd have to make the prompt a bit more elaborate.
Or we could alter the architecture altogether and have the LLM generate each key feature separately for more control.

- *For the conversational llm chain*:
    - Sometimes the agent returns fewer than 5 listings when it could return more
    - In follow up questions, the agent sometimes returns a single listing instead of a new top 5

These could probably be improved with a better system prompt.

In [81]:
for l in chat_history:
    l.pretty_print()


How big do you want your house to be?

A comfortable three-bedroom house with a spacious kitchen and a cozy living room.

What are 3 most important things for you in choosing this property?

A quiet neighborhood, good local schools, and convenient shopping options.

Which amenities would you like?

A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.

Which transportation options are important to you?

Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.

How urban do you want your neighborhood to be?

A balance between suburban tranquility and access to urban amenities like restaurants and theaters.

What is your family situation like?

Two adults with a toddler of 18 months. Hopefully with another one underway within the next year.

Do you intend to do any remodelling yourself?

We prefer not to have to do any remodelling. But we open to it if it means we can realise our dream home.

Could you list me the to