# HomeMatch: Semantic Real Estate Listing Matching Application

In [103]:
import os
from langchain.llms import OpenAI
import numpy as np
import json
from typing import List
from sentence_transformers import SentenceTransformer
from lancedb.pydantic import vector, LanceModel
import pyarrow as pa
import lancedb
import gradio as gr
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.retrievers import RePhraseQueryRetriever
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

device = "mps"

### Generating Real Estate Listings with an LLM
I used ChatGPT 3.5 to generate over 20 listing from the openai website. The prompt used to generate the listing is screenshorted as seen below

<img src="data/Screenshot 2024-03-15 at 5.50.35 PM.png" width="50%">

<img src="data/Screenshot 2024-03-15 at 5.51.09 PM.png" width="50%">

The generated listing was manually copied to `data/listing.json` file


### Creating a Vector Database and Storing Listings
I will be doing the folowing

- Creating a pydantic model to capture the fields of the listing object
- Loading data from the json file into a LanceDB table

In [104]:
model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device=device)


class HomeMatchModel(LanceModel):
    Neighborhood: str
    Price: str
    Bedrooms: int
    Bathrooms: int
    HouseSize: int
    Description: str
    NeighborhoodDescription: str
    vector: vector(384) # type: ignore


def generate_embeddings(input_data: Union[str, list[str]]) -> np.ndarray:
    """
    Generate embeddings for the given input data using a pre-trained model.

    Args:
        input_data (Union[str, list[str]]): Input data to generate embeddings for. It can be a single string or a list of strings.

    Returns:
        np.ndarray: An array containing the embeddings generated for the input data.

    Raises:
        Any errors raised by the underlying model during encoding.
    """  
    embeddings = model.encode(input_data)
    return embeddings


def generate_listing() -> List:
    """
    Generate the listing from file into a list of distionaries containd the home listing items
    
    """
    listing = []
    with open('data/listing.json', 'r') as csvfile:
        listings = json.load(csvfile)
        for row in listings:
            row["vector"] = generate_embeddings(f'{row["Description"]}\n{row["NeighborhoodDescription"]}')
            listing.append(row)
            
    return listing
home_listing = generate_listing()

In [105]:

def create_table_from_data(data):
    """
    Creates a table in the specified database using the home listing data.

    Args:
        data (list): A list of data to be inserted into the table.

    Returns:
        table: The created table object.
    """
    db = lancedb.connect("./data/lancedb")
    table_name = "home_match"
    db.drop_table(table_name, ignore_missing=True)
    data = pa.Table.from_pylist(data, schema=HomeMatchModel.to_arrow_schema())
    table = db.create_table(table_name, mode="overwrite", data=data)
    return table

table = create_table_from_data(home_listing)

[2024-03-16T20:25:51Z WARN  lance::dataset] No existing dataset at /Users/philip/Project/HomeMatch/data/lancedb/home_match.lance, it will be created


### Semantic Search of Listings Based on Buyer Preferences

In [106]:


def interpret_user_preferences(*user_answers):
    """
    Simple custom parsing logic to get/interprete user preference
    """
    global collected_preferences
    parsed_preferences = ""
    for answer in user_answers:
        parsed_preferences += " "+answer
        collected_preferences = parsed_preferences
    return parsed_preferences

In [107]:

def user_preference_interface():
    """
    Collects buyer preferences in natural language using the gradio UI framework

    Returns the buyers preferences
    """
    question1 = gr.Textbox(label="How big do you want your house to be?",
                        value="A comfortable three-bedroom house with a spacious kitchen and a cozy living room.")
    question2 = gr.Textbox(label="What are 3 most important things for you in choosing this property?",
                        value="A quiet neighborhood, good local schools, and convenient shopping options.")
    question3 = gr.Textbox(label="Which amenities would you like?",
                        value="A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.")
    question4 = gr.Textbox(label="Which transportation options are important to you?",
                        value="Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.")
    question5 = gr.Textbox(label="How urban do you want your neighborhood to be?",
                        value="A balance between suburban tranquility and access to urban amenities like restaurants and theaters.")
    output1 = gr.Textbox(label="Raw Parsed Output")
    demo = gr.Interface(
        fn=interpret_user_preferences,
        inputs=[question1, question2, question3, question4, question5],
        outputs=[output1],
        title="Interface for collecting buyer's preferences for demontration of sumantic search",
        description="Enter your preferences in natual language based on the questions below"
    )
    demo.launch(inline=False)
user_preference_interface()

Running on local URL:  http://127.0.0.1:7872

To create a public link, set `share=True` in `launch()`.


Exception in thread Thread-69 (_do_normal_analytics_request):
Traceback (most recent call last):
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpx/_transports/default.py", line 69, in map_httpcore_exceptions
    yield
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpx/_transports/default.py", line 233, in handle_request
    resp = self._pool.handle_request(req)
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py", line 216, in handle_request
    raise exc from None
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpcore/_sync/connection_pool.py", line 196, in handle_request
    response = connection.handle_request(
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpcore/_sync/connection.py", line 99, in handle_request
    raise exc
  File "/Users/philip/Project/HomeMatch/.venv/lib/python3.10/site-packages/httpcore/

### Step 5: Searching Based on Preferences
Will be the the sentence_transformers model as our retrieval
Will also be using cosine similarity because the embeddings are not normalised

In [108]:
def semantic_listing_search(buyer_preferences):
    """
    Perform a semantic search on listings based on given buyer preferences.

    This function enables searching listings using semantic analysis techniques to find matches that closely align with the provided buyer preferences.

    Args:
        buyer_preferences (dict): A dictionary containing the buyer's preferences.

    Returns:
        list: A list of listings closely matching the input preferences.
        
    """
    query_vector = generate_embeddings(parsed_preferences)
    results = table.search(query_vector).limit(k).metric("cosine").to_pydantic(HomeMatchModel)
    return results
home_matched_listings = home_match_retriever(collected_preferences)

### Augmented Response Generation

In [109]:

model_name = 'gpt-3.5-turbo'

def get_personalised_listing(description):
    """
    Search and augment real estate listings based on buyer preferences.

    """
    QUERY_PROMPT = PromptTemplate(
        input_variables=["description"],
        template="""You are an assistant tasked with taking a property listing description from a user
        and augmenting the description. In the process, subtly emphasize aspects of the property that align with what a buyer
        of the property is looking for. In the new property listing description, ensure that the augmentation process 
        enhances the appeal of the listing without altering factual information.
        Here is the user description: {description} """,
    )

    llm = ChatOpenAI(temperature=0, model_name=model_name)
    llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)
    result = llm_chain.run(description)
    return result


In [110]:

augmented_descriptions = []
for (i, home_preference) in enumerate(home_matched_listings):
    print("LISTING", i+1)
    original_description = home_preference.Description + " " + home_preference.NeighborhoodDescription
    print(f"Original Description", "\n", original_description, "\n")
    augmented_description = get_personalised_listing(original_description)
    augmented_descriptions.append(augmented_description)
    print(f"Personalised Listing", "\n" ,augmented_description, "\n")


LISTING 1
Original Description 
 Unique 3-bedroom, 2.5-bathroom urban treehouse. Nestled among towering trees, this eco-friendly home features sustainable materials, a rooftop garden, and a cozy tree-shaded patio. Urban Treehouse is an oasis in the heart of the city. Live in harmony with nature while enjoying the convenience of urban living, surrounded by lush greenery and a peaceful atmosphere. 

Personalised Listing 
 Introducing a one-of-a-kind 3-bedroom, 2.5-bathroom urban treehouse that seamlessly blends modern living with nature's tranquility. This eco-friendly sanctuary is enveloped by majestic trees, offering a serene escape from the hustle and bustle of city life. The sustainable materials used in its construction, along with a rooftop garden and a charming tree-shaded patio, create a harmonious environment that promotes a sustainable lifestyle. Urban Treehouse is not just a home, but a retreat where you can immerse yourself in the beauty of lush greenery and enjoy a peaceful 

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-dIYsSj4CiWwXV1W0XbeyaNTK on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing..


Personalised Listing 
 Nestled in the picturesque Maple Grove Estates, this timeless 3-bedroom, 2.5-bathroom home exudes charm and tranquility. The property is adorned with majestic mature trees, creating a serene backdrop for everyday living. Step inside to discover a warm and inviting atmosphere highlighted by a cozy fireplace, perfect for relaxing evenings. The sunlit breakfast nook offers a delightful space to enjoy your morning coffee while overlooking the beautifully landscaped backyard.

Maple Grove Estates is a sought-after, tree-lined community that epitomizes the essence of a peaceful suburban lifestyle. Immerse yourself in the beauty of nature with nearby parks and walking trails, providing endless opportunities for outdoor recreation. Families will appreciate the proximity to top-rated schools, ensuring a quality education for children of all ages. Embrace the charm of the changing seasons as you create lasting memories in this idyllic setting. Don't miss the chance to make

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-dIYsSj4CiWwXV1W0XbeyaNTK on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-dIYsSj4CiWwXV1W0XbeyaNTK on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/

Personalised Listing 
 Welcome to Skyline Retreat, a stunning modern 4-bedroom, 3.5-bathroom sanctuary boasting panoramic skyline views. This contemporary masterpiece is not just a home, but a lifestyle experience. Step into luxury with a rooftop garden perfect for relaxing or entertaining, a state-of-the-art home gym for your fitness needs, and smart home technology seamlessly integrated throughout the property.

Skyline Retreat offers more than just a high-altitude living experience - it provides a gateway to a world of convenience and tranquility. Immerse yourself in the breathtaking views of the cityscape from the comfort of your own home, where urban living meets the serenity of a private sanctuary. Don't just settle for a house, elevate your living experience at Skyline Retreat. 

