In [1]:
from dotenv import load_dotenv
load_dotenv()
from langchain.llms import OpenAI
import os


In [2]:
#load the csv file
import pandas as pd
df = pd.read_csv("real_estate_with_embeddings.csv")
df.head()

Unnamed: 0,Property_ID,Title,Address,Price_CAD,Bedrooms,Bathrooms,Area_sqft,Type,Status,Description,Image_URL,Agent_Name,Agent_Phone,Agent_Email,Preferences_Embeddings,Natural_Description_Embeddings
0,101,Charming Family Home,"123 Maple St, Toronto, ON",850000.0,4.0,3.0,2300.0,Detached House,For Sale,A spacious family home located in a quiet neig...,https://example.com/image1.jpg,Emily Clark,416-555-1234,emily.clark@example.com,[ 0.07970859 -0.10006012 -0.07188142 0.358909...,[ 2.28318423e-01 6.99329823e-02 8.62035304e-...
1,102,Luxury Downtown Condo,"456 Bay St, Vancouver, BC",1150000.0,2.0,2.0,1100.0,Condo,For Sale,A luxurious condo in the heart of downtown Van...,https://example.com/image2.jpg,James Harris,604-555-5678,james.harris@example.com,[ 3.80097449e-01 1.04890235e-01 -1.01807127e-...,[ 5.24142027e-01 1.50227517e-01 1.40960217e-...
2,103,Cozy Suburban Bungalow,"789 Pine Ave, Calgary, AB",450000.0,3.0,1.0,1400.0,Bungalow,Pending,This cozy bungalow is perfect for small famili...,https://example.com/image3.jpg,Sarah Thompson,403-555-9876,sarah.thompson@example.com,[ 0.21411096 -0.2918219 -0.12600707 0.218416...,[ 2.35963523e-01 -9.72562134e-02 -1.77809894e-...
3,104,Modern Townhouse,"321 Birch Rd, Ottawa, ON",620000.0,3.0,2.0,1600.0,Townhouse,For Sale,A modern townhouse with an open-concept design...,https://example.com/image4.jpg,Tom Richards,613-555-8765,tom.richards@example.com,[ 0.26590273 -0.29033634 -0.27822646 0.205300...,[ 3.51263851e-01 -6.64004460e-02 -2.14119211e-...
4,105,Spacious Country Cottage,"654 Cedar Lane, Muskoka, ON",750000.0,4.0,3.0,2500.0,Cottage,For Sale,"Escape to this spacious cottage in Muskoka, of...",https://example.com/image5.jpg,Laura Bennett,705-555-7654,laura.bennett@example.com,[ 1.96441218e-01 1.00613914e-01 -1.32575840e-...,[ 0.27091673 0.18083498 0.11570217 0.199161...


In [3]:
# Convert columns to appropriate data types
df = df.astype({
    'Property_ID': 'str',
    'Title': 'str',
    'Address': 'str',
    'Price_CAD': 'float',
    'Bedrooms': 'int',
    'Bathrooms': 'int',
    'Area_sqft': 'int',
    'Type': 'str',
    'Status': 'str',
    'Description': 'str',
    'Image_URL': 'str',
    'Agent_Name': 'str',
    'Agent_Phone': 'str',
    'Agent_Email': 'str'
})

In [4]:
from lancedb.pydantic import vector, LanceModel
# Define the LanceDB model
# Property ID,Title,Address,Price (CAD),Bedrooms,Bathrooms,Area (sqft),Type,Status,Description,Image URL,Agent Name,Agent Phone,Agent Email,Natural Description
class RealEstateListing(LanceModel):
    Property_ID: str
    Title: str
    Address: str
    Price_CAD: float
    Bedrooms: int
    Bathrooms: int
    Area_sqft: int
    Type: str
    Status: str
    Description: str
    Image_URL: str
    Agent_Name: str
    Agent_Phone: str
    Agent_Email: str
    
    # Vector fields for embeddings
    Preferences_Embeddings: vector(dim=384)
    Natural_Description_Embeddings: vector(dim=384)

In [5]:
import lancedb

db = lancedb.connect("~/.lancedb")
table_name = "RealEstateListing"
db.drop_table(table_name, ignore_missing=True)
table = db.create_table(table_name, schema=RealEstateListing)

In [7]:

# change list of floats in a string to a list of floats
import ast

def parse_embedding(x):
    if isinstance(x, str):
        # Handle string representation of list
        if x.startswith('[') and x.endswith(']'):
            try:
                return ast.literal_eval(x)
            except:
                # If that fails, try manual parsing
                return [float(val.strip()) for val in x.strip('[]').split() if val.strip()]
        else:
            # Handle space-separated string of numbers
            return [float(val.strip()) for val in x.split() if val.strip()]
    return x

df['Preferences_Embeddings'] = df['Preferences_Embeddings'].apply(parse_embedding)
df['Natural_Description_Embeddings'] = df['Natural_Description_Embeddings'].apply(parse_embedding)


# add the data to the LanceDB table
table.add(df.to_dict(orient='records'))

AddResult(version=2)

In [9]:
import numpy as np
from sentence_transformers import SentenceTransformer
from typing import Union

MODEL_NAME = 'paraphrase-MiniLM-L6-v2'
model = SentenceTransformer(MODEL_NAME)
def generate_embeddings(input_data: Union[str, list[str]]) -> np.ndarray:    
    embeddings = model.encode(input_data)
    return embeddings

# Function to get recommendations based on description similarity
def get_recommendations(description: str, top_k: int = 5) -> list[(str, str, str, float)]:
    # First we retrieve the vector for the input description
    query_vector = generate_embeddings(description)
    
    # Search for the top_k most similar listings based on the Natural Description Embeddings
    results = table.search(query_vector, vector_column_name="Natural_Description_Embeddings").limit(top_k).to_pydantic(RealEstateListing)

    # For each result, return the Property ID, Title, Address, and Price
    return [(c.Property_ID, c.Title, c.Address, c.Price_CAD) for c in results]

In [12]:

questions = [
    "What type of property are you looking for (e.g., Condo, Detached House, Cottage)?",
    "Do you have a preferred city or address?",
    "What is your budget in CAD?",
    "How many bedrooms do you need?",
    "How many bathrooms do you need?",
    "What is the minimum area (in square feet) you're looking for?",
    "Do you prefer a property that is currently for sale or rent?",
    "Do you have any specific features or descriptions you're looking for (e.g., modern kitchen, city view, private dock)?"
]

In [13]:
from langchain.chains import ConversationChain
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI



def select_best_listing_and_describe(preferences: str, listings: list[tuple[str, str, str, float]]) -> tuple[str, str]:
    llm = OpenAI(
        model_name="gpt-4o",
        temperature=0.1,
        openai_api_base=os.environ["OPENAI_API_BASE"],
        openai_api_key=os.environ["OPENAI_API_KEY"]
    )
    listing_descriptions = "\n".join(
        [f"{i+1}. Title: {title}, Address: {address}, Price: {price} CAD"
         for i, (pid, title, address, price) in enumerate(listings)]
    )

    prompt = f"""
You are a helpful real estate assistant.

The user's preferences are:
{preferences}

Here are the top recommended listings:
{listing_descriptions}

Based on the user's preferences, select the ONE listing that best matches their needs.
Then write a personalized and factual description of this property, highlighting why it fits the user's needs.
Do NOT change or add any factual information.
Respond in this format:
Here is the best listing for you:
- Title: <title>
- Description: <personalized description>
"""

    response = llm.invoke(prompt).strip()
    return response

# Function to get recommendations based on user preferences
def get_recommendations_from_preferences(preferences: str, top_k: int = 5) -> list[(str, str, str, float)]:
    # Generate embeddings for the user preferences
    query_vector = generate_embeddings(preferences)
    
    # Search for the top_k most similar listings based on the Preferences Embeddings
    results = table.search(query_vector, vector_column_name="Preferences_Embeddings").limit(top_k).to_pydantic(RealEstateListing)

    # For each result, return the Property ID, Title, Address, and Price
    return [(c.Property_ID, c.Title, c.Address, c.Price_CAD) for c in results]

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import ast

llm = ChatOpenAI(
    model_name="gpt-4o",
    temperature=0.1,
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_key=os.environ["OPENAI_API_KEY"]
)
fields = [
    {"key": "property_type", "question": "What type of property are you looking for?", "type": str},
    {"key": "city", "question": "Which city or area do you prefer?", "type": str},
    {"key": "budget", "question": "What is your budget in CAD?", "type": float},
    {"key": "bedrooms", "question": "How many bedrooms do you need?", "type": int},
    {"key": "bathrooms", "question": "How many bathrooms do you need?", "type": int},
    {"key": "area", "question": "What is the minimum area (in square feet)?", "type": float},
    {"key": "status", "question": "Do you want a property that is for sale or rent?", "type": str},
    {"key": "features", "question": "Do you have any specific features or descriptions you're looking for (e.g., modern kitchen, city view, private dock)?", "type": str},
]

chat_template = ChatPromptTemplate.from_messages([
    ("system", "You are a friendly assistant helping collect user preferences for buying a property. \n here is a question you need to ask the user use your own words to ask the user this question"),
    ("human", "{question}")
])

def get_valid_input(prompt, expected_type):
    while True:
        answer = input(prompt + "\n> ").strip()
        try:
            if expected_type == int:
                return int(answer)
            elif expected_type == float:
                return float(answer)
            #assure that the answer is a string
            elif expected_type == str:
                # Check if the answer is a valid string (not empty or whitespace or number)
                if answer and not answer.isspace() and not answer.isdigit():
                    return str(answer)               
                else:
                    print("⚠️ Please enter a non-empty string.")
            else:
                print(f"⚠️ Please enter a valid {expected_type.__name__}.")
        except ValueError:
            print(f"⚠️ Please enter a valid {expected_type.__name__}.")

def run_llm_questionnaire():
    preferences = {}

    for field in fields:
        # Ask LLM to pose the question
        messages = chat_template.format_messages(question=field["question"])

        response = llm.invoke(messages).content
        if isinstance(response, str):
            ai_response = response
        else:
            ai_response = response.content
        print(f"\n🤖 {ai_response} \n")
        prompt= f"\n🤖 {ai_response} \n"
        # Get user input with type validation
        user_input = get_valid_input(prompt, field["type"])
        print(f"Your answer: {user_input} \n")
        
        preferences[field["key"]] = user_input

    print("\n✅ Preferences collected:")
    for k, v in preferences.items():
        print(f"{k}: {v}")

    return preferences

In [17]:
# use the following preferences to test the code
# address: Ottawa, ON	
# price 620000.0	
# bedrooms 3.0	
# bathrooms 2.0	
# area: 1600.0	
# type: Townhouse	
# sale or rent: sale	
# discription: A modern townhouse with an open-concept design

# Run the LLM questionnaire to collect user preferences
preferences=run_llm_questionnaire()
user_preferences = "\n".join([f"{v}" for k, v in preferences.items()])
# Get recommendations based on user preferences
recommendations = get_recommendations_from_preferences(user_preferences)
print("Recommendations based on user preferences:")
for rec in recommendations:
    print(f"Property ID: {rec[0]}, Title: {rec[1]}, Address: {rec[2]}, Price: {rec[3]} CAD")

best_listing_description = select_best_listing_and_describe(user_preferences, recommendations)
print("\nBest Matched Personalized Description:\n")
print(best_listing_description)



🤖 Could you tell me what kind of property you're interested in? 

⚠️ Please enter a non-empty string.
⚠️ Please enter a non-empty string.
Your answer: Townhouse 


🤖 Could you let me know which city or area you're interested in for your property search? 

Your answer: Ottawa 


🤖 Could you please let me know what your budget is in Canadian dollars for purchasing a property? 

⚠️ Please enter a valid float.
Your answer: 620000.0 


🤖 Could you let me know how many bedrooms you're looking for in your new home? 

Your answer: 3 


🤖 When considering your ideal home, how many bathrooms would you prefer it to have? 

Your answer: 2 


🤖 Could you please let me know the smallest size, in square feet, that you're considering for your new property? 

Your answer: 1600.0 


🤖 Are you looking to purchase a property or are you interested in renting one? 

Your answer: sale 


🤖 What specific features or characteristics are you hoping to find in your new property? For example, are you interested 




Best Matched Personalized Description:

Here is the best listing for you:
- Title: Modern Townhouse
- Description: This modern townhouse located at 321 Birch Rd, Ottawa, ON, perfectly aligns with your preferences. Priced at 620,000.0 CAD, it offers a contemporary open-concept design, ideal for those seeking a modern living space. With 3 bedrooms and 2 bathrooms, this townhouse provides ample space for comfortable living. Its location in Ottawa ensures you are in the desired area, making it a perfect match for your needs.
