# Step 1: Setting Up the Python Application

In [34]:
!pip install chromadb
!pip install langchain
!pip install langchain-openai
!pip install numpy
!pip install openai
!pip install pandas
!pip install pydantic



In [35]:
import os
import pandas as pd
import shutil
import random
import re
from collections import defaultdict
from openai import OpenAI
from io import StringIO
from langchain.schema import Document
from langchain.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# Step 2: Generating Real Estate Listings

In [36]:
OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY'
MODEL_NAME = 'gpt-3.5-turbo'
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
client = OpenAI(api_key=OPENAI_API_KEY)
openai_embedder = OpenAIEmbeddings()

In [37]:
prompt='''
Generate a detailed CSV file containing information on 20 unique real estate listings. Each listing should be organized into the following columns:

Neighborhood: The specific neighborhood of the property, e.g., "Green Oaks."
Price: The market price listed in USD, the type is integer, formatted as "$800,000."
Bedrooms: The total number of bedrooms, the type is integer, e.g., 3.
Bathrooms: The total number of bathrooms, the type is integer, e.g., 2.
House Size: The square footage of the property, the type is integer, e.g., "2,000 sqft."
For each property, provide a descriptive paragraph that highlights its unique features and amenities. Emphasize sustainable elements such as energy-efficient appliances, solar panels, sustainable materials, and garden spaces.

Example Listing Format:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description
Green Oaks,"$800,000",3,2,"2,000 sqft","Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem."

Description Guidance:
Reflect the property's appeal, including its eco-friendly design and inviting living spaces.
Capture the neighborhood's character, emphasizing community elements such as organic grocery stores, parks, cafés, public transport accessibility, and environmental initiatives.
Ensure the CSV document has clear headers for each column, following the given example. Provide 20 unique listings, each showcasing distinct properties.
'''

messages = [{"role": "system", "content": f"{prompt}"}]
response = client.chat.completions.create(model=MODEL_NAME, messages=messages)
content = response.choices[0].message.content
print(content)

messages.append({"role": "assistant", "content": content})

Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description
Willow Creek,"$750,000",4,3,"2,500 sqft","Discover this modern 4-bedroom, 3-bathroom home in the peaceful Willow Creek neighborhood. The house features energy-efficient appliances, a smart thermostat, and LED lighting throughout. The spacious backyard is perfect for entertaining, complete with a rainwater harvesting system and a variety of fruit trees. Enjoy sustainable living with style in Willow Creek."
Riverfront Estates,"$1,200,000",5,4,"3,800 sqft","Luxury meets sustainability in this 5-bedroom, 4-bathroom home in Riverfront Estates. This property boasts solar panels, geothermal heating, and cooling systems, as well as reclaimed wood flooring. The gourmet kitchen overlooks a serene riverfront, and the home includes a rooftop garden for growing your own herbs and vegetables. Experience eco-luxury living at its finest in Riverfront Estates."
Sunnydale Heights,"$680,000",3,2,"1,800 sqft","Nestled in the vibrant community of

In [38]:
# Use StringIO to simulate a file object from the string data
data_io = StringIO(content.strip())
# Create a DataFrame
df = pd.read_csv(data_io)
# Display the DataFrame
df.head()

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description
0,Willow Creek,"$750,000",4,3,"2,500 sqft","Discover this modern 4-bedroom, 3-bathroom hom..."
1,Riverfront Estates,"$1,200,000",5,4,"3,800 sqft","Luxury meets sustainability in this 5-bedroom,..."
2,Sunnydale Heights,"$680,000",3,2,"1,800 sqft",Nestled in the vibrant community of Sunnydale ...
3,Maple Grove,"$820,000",4,3,"2,300 sqft","Welcome to this 4-bedroom, 3-bathroom home in ..."
4,Pine Ridge,"$900,000",5,3,"2,700 sqft",Discover a sustainable oasis in Pine Ridge wit...


# Step 3: Storing Listings in a Vector Database

In [39]:
# DB configuration
DB_PATH = "db"

if os.path.exists(DB_PATH):
    shutil.rmtree(DB_PATH)
    
documents = []
for index, row in df.iterrows():
    documents.append(Document(page_content=row['Description'], metadata={'id': str(index)}))

# Save to vector database
db = Chroma.from_documents(documents, openai_embedder, persist_directory=DB_PATH)
print(f"{len(documents)} documents saved")

19 documents saved


# Step 4: Building the User Preference Interface

In [40]:
questions = [   
                "How big do you want your house to be?", 
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
    ]

def parse_preferences(answers):
    structured_preferences = defaultdict(lambda: defaultdict(list))

    for answer in answers:
        # Bedrooms and house size
        bedrooms_match = re.search(r'\b(\w+-bedroom)\b', answer, re.IGNORECASE)
        if bedrooms_match:
            structured_preferences['house']['bedrooms'].append(bedrooms_match.group(1))
        
        # Kitchen and living room
        if 'kitchen' in answer:
            structured_preferences['house']['features'].append('spacious kitchen')
        if 'living room' in answer:
            structured_preferences['house']['features'].append('cozy living room')

        # Neighborhood and schools
        if 'quiet neighborhood' in answer:
            structured_preferences['location']['neighborhood'].append('quiet')
        if 'local schools' in answer:
            structured_preferences['location']['schools'].append('good')
        if 'shopping options' in answer:
            structured_preferences['location']['shopping'].append('convenient')

        # Outdoor and garage features
        if 'backyard' in answer:
            structured_preferences['house']['features'].append('backyard for gardening')
        if 'garage' in answer:
            structured_preferences['house']['features'].append('two-car garage')
        if 'energy-efficient heating system' in answer:
            structured_preferences['house']['features'].append('modern energy-efficient heating system')

        # Transportation
        if re.search(r'\bbus line\b', answer, re.IGNORECASE):
            structured_preferences['transport']['access'].append('bus line')
        if 'major highway' in answer:
            structured_preferences['transport']['access'].append('major highway')
        if 'bike-friendly roads' in answer:
            structured_preferences['transport']['features'].append('bike-friendly roads')

        # Urban settings
        if 'suburban tranquility' in answer:
            structured_preferences['location']['urban balance'].append('suburban tranquility')
        if 'urban amenities' in answer:
            structured_preferences['location']['urban balance'].append('urban amenities')
        if 'restaurants and theaters' in answer:
            structured_preferences['location']['amenities'].append('restaurants and theaters')

    return structured_preferences

def construct_query_text(structured_preferences):
    query_parts = []

    # House size and features
    if structured_preferences['house']['bedrooms']:
        bedrooms = ', '.join(structured_preferences['house']['bedrooms'])
        query_parts.append(f"a {bedrooms} house")
    
    if structured_preferences['house']['features']:
        features = ', '.join(structured_preferences['house']['features'])
        query_parts.append(f"with {features}")

    # Neighborhood and location preferences
    if structured_preferences['location']['neighborhood']:
        neighborhood = ', '.join(structured_preferences['location']['neighborhood'])
        query_parts.append(f"in a {neighborhood} neighborhood")
    
    if structured_preferences['location']['schools']:
        schools = ', '.join(structured_preferences['location']['schools'])
        query_parts.append(f"with {schools} local schools")
        
    if structured_preferences['location']['shopping']:
        shopping = ', '.join(structured_preferences['location']['shopping'])
        query_parts.append(f"and {shopping} shopping options")
    
    # Transportation preferences
    if structured_preferences['transport']['access']:
        transport_access = ', '.join(structured_preferences['transport']['access'])
        query_parts.append(f"with access to a {transport_access}")
    
    if structured_preferences['transport']['features']:
        transport_features = ', '.join(structured_preferences['transport']['features'])
        query_parts.append(f"and features like {transport_features}")

    # Urban balance and amenities
    if structured_preferences['location']['urban balance']:
        urban_balance = ', '.join(structured_preferences['location']['urban balance'])
        query_parts.append(f"balancing {urban_balance}")

    if structured_preferences['location']['amenities']:
        amenities = ', '.join(structured_preferences['location']['amenities'])
        query_parts.append(f"and nearby {amenities}")

    # Construct the final query text
    query_text = "I'm looking for " + ' '.join(query_parts) + "."
    return query_text

idx = random.randint(0, 4)
question = questions[idx]
answer = answers[idx]
print(f"Question: {question}")
print(f"Answer: {answer}")

ret = parse_preferences([answer])
query = construct_query_text(ret)
print(query)


Question: How urban do you want your neighborhood to be?
Answer: A balance between suburban tranquility and access to urban amenities like restaurants and theaters.
I'm looking for balancing suburban tranquility, urban amenities and nearby restaurants and theaters.


# Step 5&6: Searching Based on Preferences & Personalizing Listing Descriptions

In [41]:
# Get a random question
similarity_threshold = 0.6

template = """
Reply to the question based on the following context from database:
====================================

{context}

====================================

The context is retrieved from the database based on the user's preferences.
It's already sorted. The most relevant context is at the top.
Give a clear and reasonable response to the user who wants to buy a house based on the context provided above.
For each retrieved listing, you should also augment the description,
tailoring it to resonate with the buyer’s specific preferences.
This involves subtly emphasizing aspects of the property that align with what the buyer is looking for.
---

The preference from the user is:
{question}
"""

# Search the DB.
results = db.similarity_search_with_relevance_scores(query, k=3)
if len(results) == 0 or results[0][1] < similarity_threshold:
    print("Failed to find any matching results.")
else:
    context_text = "\n\n====================================\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(template)
    prompt = prompt_template.format(context=context_text, question=query)
    llm = ChatOpenAI()
    response_text = llm.predict(prompt)
    print(f"PROMPT:\n{prompt}")
    print(f"Response: {response_text}")

PROMPT:
Human: 
Reply to the question based on the following context from database:

Discover sustainable living in Sunset Terrace with this 3-bedroom, 2-bathroom home. The house includes energy-efficient windows, a smart irrigation system, and a rooftop solar panel array. Sunset Terrace offers easy access to public transportation, organic markets, and community gardens. Experience eco-friendly urban living in Sunset Terrace.


Experience eco-chic living in Springfield Heights with this 3-bedroom, 2-bathroom home. The property boasts a green roof, sustainable bamboo flooring, and energy-efficient lighting. Enjoy the community garden and nearby hiking trails in the green-friendly neighborhood. Springfield Heights is a haven for those seeking a sustainable lifestyle.


Located in the Rolling Green community of Golden Hills, this 3-bedroom, 2-bathroom home offers eco-friendly living. Enjoy energy-efficient appliances, a xeriscaped front yard, and a backyard oasis with a solar-powered hot 