This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "voc-10428826251266774227972687a76f7c7b7b8.94443740"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"


from langchain.llms import OpenAI


# Step 1: Set Up the Environment
This step sets up the environment and imports the necessary libraries for the project.

In [1]:
# ✅ Step 1: Setup & Imports
import os
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.schema import SystemMessage, HumanMessage
from pydantic import BaseModel, NonNegativeInt
from typing import List
import pandas as pd
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
import shutil

# Step 2: Generate Listings using ChatOpenAI
Define the schema for listings, create a prompt, and use the LLM to generate sample listings.

In [5]:
# ✅ Step 2: Define Schema for Real Estate Listings
class RealEstateListing(BaseModel):
    neighborhood: str
    price: NonNegativeInt
    bedrooms: NonNegativeInt
    bathrooms: NonNegativeInt
    house_size: NonNegativeInt
    description: str
    neighborhood_description: str

class ListingCollection(BaseModel):
    listing: List[RealEstateListing]

# ✅ Parser and Sample Listing
parser = PydanticOutputParser(pydantic_object=ListingCollection)

sample_listing = """Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."""

# ✅ Create Prompt Template
prompt = PromptTemplate(
    template="{instruction}\n\n{sample}\n\n{format_instructions}\n",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# ✅ Format query
query = prompt.format(
    instruction="Generate 13 real estate listings in JSON format. Follow the structure shown below.",
    sample=sample_listing
)

# ✅ LLM: gpt-3.5-turbo (chat model)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# ✅ Send prompt to ChatOpenAI
messages = [
    SystemMessage(content="You are a helpful assistant that generates real estate listings."),
    HumanMessage(content=query)
]

response = llm(messages)
output = response.content

# ✅ Parse the response into Python objects
listings = parser.parse(output)

# ✅ Convert to DataFrame & save
df = pd.DataFrame([listing.dict() for listing in listings.listing])
df.to_csv("listings.csv", index_label="id")
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunny Hills,650000,4,3,2400,"Step into this bright and spacious 4-bedroom, ...",Sunny Hills offers a family-friendly environme...
2,Lakeview Estates,950000,5,4,3500,"Luxury awaits in this stunning 5-bedroom, 4-ba...",Lakeview Estates is an exclusive community kno...
3,Maple Grove,550000,3,2,1800,"Charming 3-bedroom, 2-bathroom home located in...",Maple Grove is a quiet residential area with t...
4,Riverfront Terrace,720000,4,3,2200,Welcome to this riverside retreat in the desir...,Riverfront Terrace is a waterfront community w...


# Step 3: Store Listings in ChromaDB
We use vector embeddings to store and retrieve listings based on similarity search.

In [6]:
CHROMA_PATH = "chroma"

# Prepare documents
documents = [
    Document(page_content=row['description'], metadata={'id': str(i)}) for i, row in df.iterrows()
]

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=100)
split_docs = splitter.split_documents(documents)

# Reinitialize ChromaDB
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

vector_db = Chroma.from_documents(
    split_docs,
    embedding=OpenAIEmbeddings(),
    persist_directory=CHROMA_PATH
)

vector_db.persist()

# Step 4: Search + Personalization
Query the database to get relevant listings and customize responses for buyers.

In [7]:
PROMPT_TEMPLATE = """
Context:

{context}

------
Provide a response which will not only answer the buyer's question but also address the buyer’s specific preferences. Emphasize relevant aspects.

Question: {question}
"""

def get_top_listings(query, prompt_template_str):
    embedding_function = OpenAIEmbeddings()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
    results = db.similarity_search_with_relevance_scores(query, k=5)

    if not results or results[0][1] < 0.7:
        return "No relevant properties found."

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])
    prompt = PromptTemplate.from_template(prompt_template_str).format(context=context_text, question=query)
    model = ChatOpenAI()
    response_text = model.predict(prompt)
    return f"Response: {response_text}\n\nSources: {[doc.metadata['id'] for doc, _ in results]}"


# Step 5: CLI Chat
This provides a command-line interface for interacting with the HomeMatch system.

In [8]:
if __name__ == "__main__":
    print("Welcome to HomeMatch! Ask your real estate question below. Type 'exit' to quit.")
    while True:
        query = input("\n🏠 Your Query: ")
        if query.lower() == 'exit':
            break
        print("\n", get_top_listings(query, PROMPT_TEMPLATE))

Welcome to HomeMatch! Ask your real estate question below. Type 'exit' to quit.

🏠 Your Query: I'm looking for a quiet neighborhood with good schools and parks nearby for my kids.

 Response: Based on your preferences, I would highly recommend the charming 3-bedroom, 2-bathroom home located in the peaceful Willow Creek neighborhood. This neighborhood offers a tranquil environment with nearby parks for your kids to enjoy. Additionally, Willow Creek is known for its great schools, making it an ideal choice for families. The cozy living room with a brick fireplace and hardwood floors, along with the updated kitchen with granite countertops and stainless steel appliances, will provide a warm and inviting atmosphere for your family. The private backyard with a deck and mature trees is perfect for relaxation. Experience comfort, tranquility, and convenience in Willow Creek.

Sources: ['11', '9', '3', '7', '10']

🏠 Your Query: I want an eco-friendly home with solar panels and a vegetable gard