In [1]:
import getpass
import os

os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key:  ········


In [2]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory, CombinedMemory, ChatMessageHistory
from langchain.chains import ConversationChain
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_chroma import Chroma
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA

In [3]:
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = ChatOpenAI(
    model=model_name,
    temperature=0,
    max_tokens=1024,
    max_retries=2
)

## Step 2: Generating Real Estate Listings

In [4]:
system_prompt = "You are a helpful assistant that generates real estate listings"
human_prompt = """
Create 2 listings as CSV:
Listing Example:

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

messages = [
    ("system", system_prompt),
    ("human", human_prompt),
]
ai_msg = llm.invoke(messages)
ai_msg.content

'Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description\nGreen Oaks,$800,000,3,2,2000 sqft,"Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.","Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."\nRiverfront Retreat,$1,200,000,

## Step 2.1: I generated the Real Estate Listings using the free version of ChatGPT 4o and put them in a listings.csv file for convenience

In [6]:
# Load CSV file using pandas for visualization purposes
import pandas as pd 

df = pd.read_csv("listings.csv")
df

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description
0,Sunny Meadows,"$750,000",3,2,"1,800 sqft",Enjoy suburban tranquility in this beautiful 3...,Sunny Meadows is known for its friendly commun...
1,Cedar Grove,"$850,000",4,3,"2,200 sqft",Step into luxury in this elegantly designed 4-...,Cedar Grove is a vibrant neighborhood celebrat...
2,Maple Park,"$1,100,000",5,4,"3,000 sqft",Experience the grandeur of this spacious 5-bed...,"Maple Park boasts tree-lined streets, historic..."
3,Pine Ridge,"$900,000",4,3,"2,400 sqft","This charming 4-bedroom, 3-bathroom home in Pi...",Pine Ridge is celebrated for its scenic beauty...
4,Oceanview Estates,"$1,500,000",4,3,"2,800 sqft","Welcome to Oceanview Estates, where luxury mee...",Oceanview Estates is a prestigious community o...
5,Mountain Vista,"$1,300,000",4,3,"2,600 sqft",Set against the backdrop of stunning mountain ...,Mountain Vista is a picturesque community know...
6,Green Oaks,"$800,000",3,2,"2,000 sqft",Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
7,Sunset Ridge,"$950,000",4,3,"2,500 sqft",Experience luxury living at Sunset Ridge. This...,Sunset Ridge is an upscale neighborhood known ...
8,Lakeside Village,"$1,200,000",5,4,"3,200 sqft",Discover lakeside elegance in this exquisite 5...,Lakeside Village is a vibrant community built ...
9,Urban Heights,"$650,000",2,2,"1,400 sqft","Welcome to Urban Heights, a chic and contempor...",Urban Heights is located in the heart of the c...


### Load CSV with CSVLoader

In [7]:
loader = CSVLoader(file_path="./listings.csv")
listings = loader.load()

## Step 3: Storing Listings in a Vector Database

### Step 3.1 Vector Database Setup:
- Vector Database Setup: Initialize and configure ChromaDB or a similar vector database to store real estate listings.
- Generating and Storing Embeddings: Convert the LLM-generated listings into suitable embeddings that capture the semantic content of each listing, and store these embeddings in the vector database.

We will use a CharacterTextSplitter just for demonstration purposes

In [8]:
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
split_listings = splitter.split_documents(listings)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(split_listings, embeddings)

In [10]:
# Checking the splits
for split in split_listings:
    print(split)
    print("\n")

page_content='Neighborhood: Sunny Meadows
Price: $750,000
Bedrooms: 3
Bathrooms: 2
House Size: 1,800 sqft
Description: Enjoy suburban tranquility in this beautiful 3-bedroom home in Sunny Meadows. The house features an open layout, a modern kitchen with energy-efficient appliances, and a cozy living room filled with natural light. A well-manicured garden and spacious backyard provide the perfect space for family gatherings.
Neighborhood Description: Sunny Meadows is known for its friendly community, lush parks, and top-rated schools. Residents enjoy a peaceful atmosphere with easy access to local amenities and shopping centers.' metadata={'source': './listings.csv', 'row': 0}


page_content='Neighborhood: Cedar Grove
Price: $850,000
Bedrooms: 4
Bathrooms: 3
House Size: 2,200 sqft
Description: Step into luxury in this elegantly designed 4-bedroom home in Cedar Grove. Featuring a gourmet kitchen, spacious bedrooms with ample closet space, and a master suite with a spa-inspired bath, this

#### Testing the database

In [12]:
query = """
    List as bullet points the name of the neighborhoods with 4 bedroom houses. 
    explain the steps you took to get to this result
"""

use_chain_helper = False
if use_chain_helper:
    rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
    print(rag.run(query))
else:
    similar_docs = db.similarity_search(query, k=5)
    # similar_docs = db.similarity_search_by_vector(embedding=embeddings.embed_query(query), k=5)
    # similar_docs = db.similarity_search_with_score(query, k=5)
    # Standard similarity search was returning overly similar results. 
    # Trying MMR to increase diversity:
    # similar_docs = db.max_marginal_relevance_search(query, k=5, fetch_k=50)
    
    for doc in similar_docs:
        print(doc.page_content)
        print("\n######")
    
    prompt = PromptTemplate(
        template="{query}\nContext: {context}",
        input_variables=["query", "context"],
    )
    chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
    print(chain.run(input_documents=similar_docs, query = query))

Neighborhood: Oceanview Estates
Price: $1,500,000
Bedrooms: 4
Bathrooms: 3
House Size: 2,800 sqft
Description: Welcome to Oceanview Estates, where luxury meets coastal living. This exquisite 4-bedroom, 3-bathroom property offers panoramic ocean views, a contemporary open layout, and high-end finishes throughout. Step outside to your private terrace and experience breathtaking sunsets.
Neighborhood Description: Oceanview Estates is a prestigious community offering exclusive beach access, upscale dining, and boutique shopping. Residents enjoy a blend of coastal serenity and modern amenities in this highly sought-after neighborhood.

######
Neighborhood: Mountain Vista
Price: $1,300,000
Bedrooms: 4
Bathrooms: 3
House Size: 2,600 sqft
Description: Set against the backdrop of stunning mountain ranges, this 4-bedroom, 3-bathroom home in Mountain Vista offers a perfect retreat from the hustle and bustle. The home features a spacious living area, large windows capturing breathtaking views, and

## Step 4: Building the User Preference Interface

In [13]:
questions = [   
                "How big do you want your house to be?" 
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

In [14]:
# hard-coding buyer preference for convenience
buyer_preferences = answers[0]
buyer_preferences

'A comfortable three-bedroom house with a spacious kitchen and a cozy living room.'

### Step 5: Searching Based on Preferences

Semantic Search Implementation: Use the structured buyer preferences to perform a semantic search on the vector database, retrieving listings that most closely match the user's requirements.
Listing Retrieval Logic: Fine-tune the retrieval algorithm to ensure that the most relevant listings are selected based on the semantic closeness to the buyer’s preferences.

In [16]:
# Standard similarity search was returning overly similar results. 
# Trying MMR to increase diversity:
similar_docs = db.similarity_search(query, k=5)

print(f"Buyers preference: \n{buyer_preferences}")

Buyers preference: 
A comfortable three-bedroom house with a spacious kitchen and a cozy living room.


### Step 6: Personalizing Listing Descriptions
LLM Augmentation: For each retrieved listing, use the LLM to augment the description, tailoring it to resonate with the buyer’s specific preferences. This involves subtly emphasizing aspects of the property that align with what the buyer is looking for.
Maintaining Factual Integrity: Ensure that the augmentation process enhances the appeal of the listing without altering factual information.

In [18]:
query = """
Using the details from the property listing provided in the context, 
generate an enhanced description that speaks directly to the buyer's unique preferences. 

Tailor the language to emphasize features of the properties such as design elements, 
amenities, or neighborhood benefits—that align with what the buyer is looking for. 
While augmenting the description to increase its appeal, 
ensure that all factual information (like price, number of bedrooms, bathrooms, and square footage) remains unchanged.
""" 

for doc in similar_docs:
    print("\n__Listing:\n")
    print(doc.page_content)
    prompt = PromptTemplate(
        template="{query}\nContext: {context}",
        input_variables=["query", "context"],
    )

    chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
    print("\n__LLM Augmentation:\n")
    print(chain.run(input_documents=[doc], query = query))
    print("\n------------------------------------------------")


__Listing:

Neighborhood: Oceanview Estates
Price: $1,500,000
Bedrooms: 4
Bathrooms: 3
House Size: 2,800 sqft
Description: Welcome to Oceanview Estates, where luxury meets coastal living. This exquisite 4-bedroom, 3-bathroom property offers panoramic ocean views, a contemporary open layout, and high-end finishes throughout. Step outside to your private terrace and experience breathtaking sunsets.
Neighborhood Description: Oceanview Estates is a prestigious community offering exclusive beach access, upscale dining, and boutique shopping. Residents enjoy a blend of coastal serenity and modern amenities in this highly sought-after neighborhood.

__LLM Augmentation:

Step into your dream coastal oasis at Oceanview Estates, where luxury and breathtaking ocean views await. This stunning 4-bedroom, 3-bathroom home boasts a contemporary open layout, high-end finishes, and a private terrace perfect for enjoying stunning sunsets. 

Located in the prestigious Oceanview Estates community, you'll 