# Udacity GenAI Project: Personalized Real Estate Agent

Project Setup

In [14]:
!pip install pandas



In [15]:
import os
import json
import requests
import pandas as pd
from typing import List
from pydantic import BaseModel, Field, ValidationError, NonNegativeInt
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

In [16]:
os.environ["OPENAI_API_KEY"] = "ENTER_API_KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

model_name = "gpt-3.5-turbo"
llm = OpenAI(model_name=model_name, temperature=0)

# Generate Real Estate Listings

### The submission must demonstrate using a Large Language Model (LLM) to generate at least 10 diverse and realistic real estate listings containing facts about the real estate.

#### We'll setup a Pydantic model and use the LangChain Prompt Template for the LLM to output real estate listings with some details such as size, price, etc, and also both detailed and summary descriptions about the Listings

#### We'll then use the generated descriptions and neighborhood information as prompt for the image generation

In [4]:
# Defining the various fields on our real estate listing with Pydantic BaseModel

class RealEstateListing(BaseModel):
    summary: str = Field(description="Summary of this listing")
    neighborhood: str = Field(description="Name of the neighborhood")
    price: NonNegativeInt = Field(description="Price of this listing in SEK")
    bedrooms: NonNegativeInt = Field(description="Number of bedrooms in this listing")
    bathrooms: NonNegativeInt = Field(description="Number of bathrooms in this listing")
    house_size: NonNegativeInt = Field(description="Size of this listing in square meters")
    description: str = Field(description="Detailed description of the property")
    neighborhood_description: str = Field(description="Description of the neighborhood")

In [5]:
# Setup the instruction for the LLM and provide it of an example how the output should look like
# We will setup the listing generation here as well

def generate_real_estate_listings():
    instruction = (
        "Generate 15 detailed real estate listings in JSON format."
        "Ensure the listings are styled and formatted like the example below, which is typical of real estate listings in Stockholm, Sweden. "
        "Focus on providing information in Swedish currency (SEK), metric units (square meters), and descriptions suitable for Stockholm neighborhoods."
        "The listing should be in American English"
    )
    example_listing = [
        {
            "summary": "Spacious 3-bedroom villa in the prestigious neighborhood of Nockeby.",
            "neighborhood": "Nockeby",
            "price": 4000000,
            "bedrooms": 3,
            "bathrooms": 2,
            "house_size": 200,
            "description": (
                "Welcome to this eco-friendly villa in the heart of Nockeby. "
                "This charming 3-bedroom, 2-bathroom home boasts energy-efficient features "
                "such as solar panels and a well-insulated structure. Natural light floods "
                "the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes."
            ),
            "neighborhood_description": (
                "Nockeby is a prestigious, environmentally-conscious community with access "
                "to grocery stores, community gardens, and bike paths. Take a stroll through "
                "the nearby Nockeby Strand or grab a cup of coffee at the cozy cafe."
            ),
        }
    ]

    parser = PydanticOutputParser(pydantic_object=RealEstateListing)
    prompt_template = PromptTemplate(
        template="{instruction}\nExample: {example}\n{format_instructions}",
        input_variables=["instruction", "example"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )

    prompt = prompt_template.format(
        instruction=instruction,
        example=json.dumps(example_listing, indent=2) # tdil, indent makes the JSON much more readable
    )

    response = llm(prompt)
    try:
        raw_data = json.loads(response)
        listings = [RealEstateListing(**item) for item in raw_data]
        return listings
    except (json.JSONDecodeError, ValidationError) as e:
        print(f"Error parsing LLM response: {e}") # in case something is wrong with the key or low in credit, we'll get an error here.
        return []

In [6]:
# Save listings to CSV so that we later retrieve the listings from local storage.

def save_listings_to_csv(listings: List[RealEstateListing], filename: str):
    df = pd.DataFrame([listing.dict() for listing in listings])
    df.to_csv(filename, index=False)
    print(f"Listings saved to {filename}")
    return df

In [7]:
# Generate image prompts based on the summary, neighorhood, and description.

def generate_image_prompts(df: pd.DataFrame) -> pd.DataFrame:
    def create_prompt(row):
        return (
            f"Generate a realistic photo of a {row['summary'].lower()} " # user lower case to keep the prompt case consistent
            f"located in {row['neighborhood']} with features like: {row['description'].lower()}."
        )
    df["photo_prompt"] = df.apply(create_prompt, axis=1)
    return df

# Consider for the furture, floor plan from the bedrooms, bathrooms, and sq/m data

In [8]:
# Image Generation. Pass the prompt just created by the generate_images_prompt function to the LLM
# The images will be saved locally for later retrieval

def generate_images(prompts: List[str], output_dir: str = "generated_images") -> List[str]:
    os.makedirs(output_dir, exist_ok=True)
    image_paths = []

    for i, prompt in enumerate(prompts):
        try:
            response = openai.Image.create(prompt=prompt, n=1, size="1024x1024")
            image_url = response['data'][0]['url']

            # Save image locally
            image_path = os.path.join(output_dir, f"property_{i+1}.png")
            img_data = requests.get(image_url).content
            with open(image_path, 'wb') as img_file:
                img_file.write(img_data)

            image_paths.append(image_path)
            print(f"Image {i+1} saved at {image_path}")
        except Exception as e:
            print(f"Error generating image for prompt {i+1}: {e}")
            image_paths.append(None)

    return image_paths


In [None]:
# Let's run the listing and image geneartion functions.

listings = generate_real_estate_listings()
if not listings:
  print("ERROR: Didn't generate any listings. Happy bug hunting!")

#Note, this is not the final listing yet, as we will add image paths after generating images
listings_csv = "real_estate_listings.csv"
df = save_listings_to_csv(listings, listings_csv)

# Generate photo prompts
df_with_prompts = generate_image_prompts(df)

# Let's save the prompt to a csv for possible future use
df_with_prompts.to_csv("real_estate_with_prompts.csv", index=False)

# Generate images with the photo prompts
image_paths = generate_images(df_with_prompts["photo_prompt"].tolist())
df_with_prompts["local_image_path"] = image_paths

# Create final listing and export to csv
df_with_prompts.to_csv("listings.csv", index=False)
print("Real estate listings with images saved to 'listings.csv'.")

In [None]:
# Quick check to see what the list looks like...
df

Unnamed: 0,summary,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description,photo_prompt,local_image_path
0,Modern 2-bedroom apartment in the vibrant neig...,Södermalm,3500000,2,1,80,Step into this stylish and contemporary apartm...,"Södermalm is known for its trendy cafes, bouti...",Generate a realistic photo of a modern 2-bedro...,generated_images/property_1.png
1,Charming 1-bedroom cottage in the historic nei...,Gamla Stan,2800000,1,1,50,Experience the charm of old-world Stockholm in...,"Gamla Stan is the oldest part of Stockholm, wi...",Generate a realistic photo of a charming 1-bed...,generated_images/property_2.png
2,Luxurious 4-bedroom penthouse in the exclusive...,Östermalm,10000000,4,3,250,Indulge in luxury living in this stunning pent...,"Östermalm is known for its upscale shopping, f...",Generate a realistic photo of a luxurious 4-be...,generated_images/property_3.png
3,Cozy studio apartment in the charming neighbor...,Vasastan,2000000,0,1,40,This compact studio apartment offers a comfort...,Vasastan is a residential area with tree-lined...,Generate a realistic photo of a cozy studio ap...,generated_images/property_4.png
4,Spacious 5-bedroom townhouse in the family-fri...,Bromma,6000000,5,2,180,This well-maintained townhouse offers ample sp...,Bromma is a peaceful suburb with excellent sch...,Generate a realistic photo of a spacious 5-bed...,generated_images/property_5.png
5,Modern 3-bedroom duplex in the up-and-coming n...,Hornstull,4500000,3,2,120,Discover contemporary living in this stylish d...,Hornstull is a vibrant neighborhood with a mix...,Generate a realistic photo of a modern 3-bedro...,generated_images/property_6.png
6,Quaint 2-bedroom bungalow in the picturesque n...,Djurgården,3200000,2,1,70,Escape to this charming bungalow nestled in th...,Djurgården is a peaceful island known for its ...,Generate a realistic photo of a quaint 2-bedro...,generated_images/property_7.png
7,Stylish 1-bedroom loft in the trendy neighborh...,Kungsholmen,2800000,1,1,60,Live in style in this chic loft apartment loca...,Kungsholmen is a hip neighborhood with a mix o...,Generate a realistic photo of a stylish 1-bedr...,generated_images/property_8.png
8,Elegant 4-bedroom villa in the exclusive neigh...,Lidingö,8000000,4,3,220,Experience luxury living in this elegant villa...,Lidingö is a prestigious island community with...,Generate a realistic photo of a elegant 4-bedr...,generated_images/property_9.png
9,Cozy 2-bedroom apartment in the historic neigh...,Gamla Enskede,3000000,2,1,65,This charming apartment offers a cozy retreat ...,Gamla Enskede is a picturesque area with tree-...,Generate a realistic photo of a cozy 2-bedroom...,generated_images/property_10.png


# Semantic Search

#### First, we will create vector embedding from the listing and store in ChromaDB.
#### Then, we will format the search output into a format that looks nice.
#### Finally, we will perform semantic search of listings based on buyer preference.

## Creating a Vector Database and Storing Listings

In [None]:
#Skip this cell if we don't need to gen more listings or Images

import os
import pandas as pd
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from PIL import Image

os.environ["OPENAI_API_KEY"] = "ENTER_API_KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [None]:
# load the real estate data from the csv file we created earlier.

def load_real_estate_data(filepath: str) -> pd.DataFrame:
    """Load the CSV file containing real estate listings and their metadata."""
    return pd.read_csv(filepath)

In [None]:
# Define a func to create embeddings and store function in ChromaDB

def store_embeddings_in_chromadb(data: pd.DataFrame, chroma_dir: str = "chromadb_store"):
    """
    Generate vector embeddings for each listing and store them in ChromaDB.

    """

    embeddings = OpenAIEmbeddings()
    vector_store = Chroma(persist_directory=chroma_dir, embedding_function=embeddings)

    for _, row in data.iterrows():
        embedding_text = f"{row['summary']} {row['description']} {row['neighborhood_description']}"

        metadata = {
            "id": row.name,
            "summary": row.get("summary", "N/A"),
            "neighborhood": row.get("neighborhood", "N/A"),
            "price": row.get("price", "N/A"),
            "bedrooms": row.get("bedrooms", "N/A"),
            "bathrooms": row.get("bathrooms", "N/A"),
            "house_size": row.get("house_size", "N/A"),
            "description": row.get("description", "N/A"),
            "neighborhood_description": row.get("neighborhood_description", "N/A"),
            "image_path": row.get("local_image_path", None),
        }

        doc = Document(page_content=embedding_text, metadata=metadata)
        vector_store.add_documents([doc])

    # Then, we store vector embedding to disk

    vector_store.persist()
    print(f"ChromaDB embeddings stored in directory: {chroma_dir}")

In [None]:
# Embed and store the generated listing into ChromaDB

csv_filepath = "listings.csv"
chroma_directory = "chromadb_store"
real_estate_df = load_real_estate_data(csv_filepath)
store_embeddings_in_chromadb(real_estate_df, chroma_dir=chroma_directory)

ChromaDB embeddings stored in directory: chromadb_store


## Listing UI Design

In [None]:
from IPython.display import display, HTML

# Note: this function has been enhance to take in augmented description from the LLM

def show_listing(index):
    """
    Get listing details from csv and augment the description using LLM.
    User would like to get more reliable source from the CSV and format the listing in HTML.

    """
    try:
        listing = data.iloc[index]
    except IndexError:
        print(f"Index {index} is out of bounds.")
        return

    summary = listing.get('summary', 'N/A')
    neighborhood = listing.get('neighborhood', 'N/A')
    price = listing.get('price', 'N/A')
    bedrooms = listing.get('bedrooms', 'N/A')
    bathrooms = listing.get('bathrooms', 'N/A')
    house_size = listing.get('house_size', 'N/A')
    description = listing.get('description', 'N/A')
    neighborhood_description = listing.get('neighborhood_description', 'N/A')
    image_path = listing.get('local_image_path', None)

    # This bit is added to take in the augmented preference from the LLM
    enhanced_description = augment_listing_description(
        description,
        preferences,
        summary,
        bedrooms,
        bathrooms,
        price,
        house_size,
        neighborhood_description,
    )


    # Check if image path is still valid
    if pd.notna(image_path):
        image_html = f'<img src="{image_path}" alt="Property Image" style="width:100%; max-width:800px; height:auto; border-radius:5px;">'
    else:
        # Placeholder image incase image is not available
        image_html = '<div style="width:100%; max-width:800px; height:225px; background-color:#f0f0f0; display:flex; align-items:center; justify-content:center; border-radius:5px;">404 - Image not found</div>'

    html_content = f"""
    <div style="border: 1px solid #ddd; border-radius: 5px; padding: 10px; width: 800px; font-family: Arial, sans-serif;">
        {image_html}
        <h2 style="margin-top: 10px;">{summary}</h2>
        <p><strong>Agent's Recommendation:</strong> <i>{enhanced_description}</i></p>
        <p><strong>Neighborhood:</strong> {neighborhood}</p>
        <p><strong>Price:</strong> {price} SEK</p>
        <p><strong>Bedrooms:</strong> {bedrooms}</p>
        <p><strong>Bathrooms:</strong> {bathrooms}</p>
        <p><strong>House Size:</strong> {house_size} sq/m</p>
        <p><strong>Description:</strong> {description}</p>
        <p><strong>Neighborhood Description:</strong> {neighborhood_description}</p>
    </div>
    """

    display(HTML(html_content))

In [None]:
# Test to what the UI front end could look like.

listings_file = "listings.csv"
data = pd.read_csv(listings_file)
enhanced_description = "Debug message, don't mind me" # planned to have LLM to gen this, but hard code for now.
show_listing(4)

##  Semantic search of listings based on buyer preference.

In [None]:
# Define a function to make a similarity search on the stored vector

def similarity_search_stored_vector(query: str, chroma_dir: str, top_k: int = 1):
    """
    Similarity search in the vector store for the most relevant listing.

    """

    embeddings = OpenAIEmbeddings()
    vector_store = Chroma(persist_directory=chroma_dir, embedding_function=embeddings)
    results = vector_store.similarity_search(query, k=top_k )

    print(f"Query: {query}")
    print(f"Top {top_k} results:\n")

    for idx, doc in enumerate(results):
        metadata = doc.metadata
        listing_id = metadata.get("id")
        print(f"\nResult {idx + 1} - ChromaDB Metadata: {metadata}") #TODO, decide if really want to show multi listings

        # pass the id to the HTML UI
        if listing_id is not None:
            show_listing(int(listing_id)) # check here again if the query id and csv id match
        else:
            print("Didn't find a listing. Happy bug hunt!")

In [None]:
# Let's see test a buyer preference and check the result with simiarity search

user_query = "I want to live close to great nightlife"
similarity_search_stored_vector(user_query, chroma_dir=chroma_directory, top_k= 1)

Query: I want to live close to great nightlife
Top 1 results:


Result 1 - ChromaDB Metadata: {'id': 12, 'image_path': 'generated_images/property_13.png', 'neighborhood': 'Norrmalm', 'price': 2500000, 'summary': 'Stylish 1-bedroom apartment in the trendy neighborhood of Norrmalm.'}


# LLM Augmentation

In [None]:
# Setting up RAG to pass the real estate factual info to an LLM to enhance its response to the buyer's preference.
# We will set this up within the semantic search function which we will add later.

def augment_listing_description(original_description, preferences, summary, bedrooms, bathrooms, price, house_size, neighborhood_description):
    """
    Use an LLM to enhance the property description by emphasizing aspects relevant to the buyer's preferences.

    """
    if not preferences:
        return original_description

    preference_text = " ".join(preferences)


    prompt = f"""
    You are a virtual real estate agent who is great at enhancing the original real estate descriptions to match the buyer's preferences.
    The original property description and property detials is provided below. Your task is to emphasize aspects of the property that align with the buyer's preferences without changing any factual details.
    Explain why you think this is the best property for your client.

    Buyer Preferences:
    {preference_text}

    Original Description:
    {original_description}

    Property Details:
    - Summary: {summary}
    - Price: {price} SEK
    - Bedrooms: {bedrooms}
    - Bathrooms: {bathrooms}
    - House Size: {house_size} sq/m
    - Neighborhood: {neighborhood_description}

    Enhanced Description:
    """

    print(prompt)

    try:
        response = openai.ChatCompletion.create(
            model= model_name,
            messages=[{"role": "system", "content": "You are a helpful real estate assistant."},
                      {"role": "user", "content": prompt}],
            temperature=0.5,
        )

        print(response)

        enhanced_description = response["choices"][0]["message"]["content"]
        return enhanced_description

    except openai.error.OpenAIError as e:
        print(f" OpenAI error: {e}")
        return original_description



In [None]:
# We setup another semantic search function using Max Margine Relevant. We will also add the RAG function here as well


# Define a function to query the stored vector
def max_marginal_relevance_search(preferences: list, chroma_dir: str, top_k: int = 1):
    """
    MMR search in the vector store for the most relevant listings in hope to overcome the duplicate issue with similarity_search

    """

    embeddings = OpenAIEmbeddings()
    vector_store = Chroma(persist_directory=chroma_dir, embedding_function=embeddings)

    user_query = "".join(preferences)
    query_embedding = embeddings.embed_query(user_query)

    # MMR for diverse results as a way to avoid duplicate results
    results = vector_store.max_marginal_relevance_search_by_vector(
        query_embedding,
        k= top_k,
        fetch_k= top_k,
    )

    print(f"Semantic Search Based on User Preferences:")
    print(f"Top {top_k} relevant listings:\n")

    for idx, doc in enumerate(results):
        metadata = doc.metadata
        listing_id = metadata.get("id")
        similarity_score = 1 - idx / top_k
        print(f"\nResult {idx + 1} - ChromaDB Metadata: {metadata}")

        # pass the id to the HTML UI
        if listing_id is not None:
            show_listing(int(listing_id)) # check here again if the query id and csv id match
        else:
            print("Didn't find a listing. Happy bug hunt!")

In [None]:
# Collect a buyer's preference

questions = [
                "How big do you want your house to be?"
                "What are 3 most important things for you in choosing this property?",
                "Which amenities would you like?",
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",
]

preferences = [
    "A comfortable three-bedroom apartment with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A modern, energy-efficient heating system.",
    "Easy access to a reliable public transport.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

In [None]:
# Let's see the result

max_marginal_relevance_search(preferences, chroma_dir= "chromadb_store", top_k= 1)

Semantic Search Based on User Preferences:
Top 1 relevant listings:


Result 1 - ChromaDB Metadata: {'id': 4, 'image_path': 'generated_images/property_5.png', 'neighborhood': 'Bromma', 'price': 6000000, 'summary': 'Spacious 5-bedroom townhouse in the family-friendly neighborhood of Bromma.'}

    You are a virtual real estate agent who is great at enhancing the original real estate descriptions to match the buyer's preferences.
    The original property description and property detials is provided below. Your task is to emphasize aspects of the property that align with the buyer's preferences without changing any factual details.
    Explain why you think this is the best property for your client. 
    
    Buyer Preferences:
    A comfortable three-bedroom apartment with a spacious kitchen and a cozy living room. A quiet neighborhood, good local schools, and convenient shopping options. A modern, energy-efficient heating system. Easy access to a reliable public transport. A balance be

# Further Testings

## CEO looking for a place to retire

In [None]:
# Imagine Dan Ek is looking for a place to retire

eks_preferences = [
    "An upscale prestigious location.",
    "Space to for my boats and cars.",
    "Prefer famous address."
]

max_marginal_relevance_search(eks_preferences, chroma_dir= "chromadb_store", top_k= 1)

Semantic Search Based on User Preferences:
Top 1 relevant listings:


Result 1 - ChromaDB Metadata: {'id': 13, 'image_path': 'generated_images/property_14.png', 'neighborhood': 'Djursholm', 'price': 9000000, 'summary': 'Spacious 4-bedroom villa in the exclusive neighborhood of Djursholm.'}

    You are a virtual real estate agent who is great at enhancing the original real estate descriptions to match the buyer's preferences.
    The original property description and property detials is provided below. Your task is to emphasize aspects of the property that align with the buyer's preferences without changing any factual details.
    Explain why you think this is the best property for your client. 
    
    Buyer Preferences:
    A comfortable three-bedroom apartment with a spacious kitchen and a cozy living room. A quiet neighborhood, good local schools, and convenient shopping options. A modern, energy-efficient heating system. Easy access to a reliable public transport. A balance betw

## KTH student looking for housing

In [None]:
# Image parents buying an apartment for a child going into university

students_preferences = [
    "Compact living suitable for student.",
    "Budget friendly where price does not exceed 3 000 000 SEK",
    "Close to bus line or tunnelbanna.",
    "Close to good night life where students hangout."
]

# students_preferences = "".join(students_preferences)
# similarity_search_stored_vector(students_preferences, chroma_dir= "chromadb_store", top_k= 1)
max_marginal_relevance_search(students_preferences, chroma_dir= "chromadb_store", top_k= 1)

Semantic Search Based on User Preferences:
Top 1 relevant listings:


Result 1 - ChromaDB Metadata: {'id': 12, 'image_path': 'generated_images/property_13.png', 'neighborhood': 'Norrmalm', 'price': 2500000, 'summary': 'Stylish 1-bedroom apartment in the trendy neighborhood of Norrmalm.'}

    You are a virtual real estate agent who is great at enhancing the original real estate descriptions to match the buyer's preferences.
    The original property description and property detials is provided below. Your task is to emphasize aspects of the property that align with the buyer's preferences without changing any factual details.
    Explain why you think this is the best property for your client. 
    
    Buyer Preferences:
    A comfortable three-bedroom apartment with a spacious kitchen and a cozy living room. A quiet neighborhood, good local schools, and convenient shopping options. A modern, energy-efficient heating system. Easy access to a reliable public transport. A balance betwee

## End of Project