# HomeMatch

In [42]:
import os
from pydantic import BaseModel, Field
from typing import List
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.schema import Document as LangchainDocument
from langchain_chroma import Chroma
import json
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["OPENAI_API_KEY"] = str(os.environ.get("OPENAI_API_KEY"))
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

In [3]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [5]:
class RealEstateDescription(BaseModel):
    """Information of a property"""
    neighborhood: str = Field(..., description="Name of the neighborhood")
    price: int = Field(..., description="Listing price in USD")
    bedrooms: int = Field(..., description="Number of bedrooms")
    bathrooms: int = Field(..., description="Number of bathrooms")
    house_size: int = Field(..., description="Size of the house in square feet")
    description: str = Field(..., description="Detailed description of about 100 words of the property")
    neighborhood_description: str = Field(..., description="Description of the neighborhood")

class RealEstateListing(BaseModel):
    """A collection of real estate listings"""
    listing: List[RealEstateDescription] = Field(..., description="List of real estate property descriptions")


structured_llm = llm.with_structured_output(RealEstateListing)
prompt = "Generate a real estate listing of 20 properties"
listing = structured_llm.invoke(prompt)



In [44]:
with open('listing.json', 'w') as f:
    json.dump(listing.model_dump(), f)

In [48]:
with open('listing.json') as f:
    listing_data = json.load(f)

In [49]:

documents = []
for i, item in enumerate(listing_data["listing"]):
    formatted_string = "\n".join(f"{key}: {value}" for key, value in item.items())
    doc = LangchainDocument(page_content=formatted_string, metadata=item)
    documents.append(doc)

In [50]:
db = Chroma(embedding_function=embeddings,persist_directory="db",collection_name="homematch")

In [51]:
db.reset_collection()

In [52]:
db.add_documents(documents)

['ea23ae1f-af7a-47e0-9459-0857f7d856c6',
 '4a65d48d-3f43-459b-a8fd-1b2089b01c7b',
 '50b48966-df4a-4d67-b9c3-1567d2031c02',
 '604f16e2-3898-47eb-a493-e868f176b74e',
 '08dea37e-cf38-46c7-9559-46f92d884064',
 'd0a39b6e-b82f-478f-82c7-47b4fd9a840e',
 '7ff76916-7076-4d8c-8875-c2b354ac1f6d',
 '8538aaa9-3a75-4269-8257-14073e4b369e',
 'f332e367-17da-4e11-95bd-92949824d7af',
 '3eb2c0a0-9ef4-4587-a206-9da45662753f',
 '056bdefb-9cf8-438e-ba9f-772a74e7f7c9',
 'd6cb5dba-db2a-41fa-83cf-aab549971578',
 '54c44015-a1d4-4e9d-b02a-654263994469',
 '231e38be-9f53-4301-8434-4ac369bd2238',
 'b4bce77e-91af-4317-ab4e-b3d988e91c76',
 '472ac7d5-74b4-4842-b7ba-ad7e21244041',
 'f675902f-4619-4d0d-88dd-9e4ca26bc4a2',
 '954b21a3-8c76-4b80-97df-1bcf46c797ab',
 '38562ff4-925c-4ba1-a26c-1178563673c0']

In [53]:
db.similarity_search("beachfront")

[Document(id='d0a39b6e-b82f-478f-82c7-47b4fd9a840e', metadata={'price': 800000, 'house_size': 2800, 'bedrooms': 4, 'description': 'Beachfront property with direct access to the sandy shore. Spacious living room with ocean views. Gourmet kitchen with a breakfast bar. Master suite with a walk-in closet. Outdoor deck for entertaining.', 'bathrooms': 3, 'neighborhood_description': 'Prime beachfront location with water sports and beach activities.', 'neighborhood': 'Beachfront'}, page_content='neighborhood: Beachfront\nprice: 800000\nbedrooms: 4\nbathrooms: 3\nhouse_size: 2800\ndescription: Beachfront property with direct access to the sandy shore. Spacious living room with ocean views. Gourmet kitchen with a breakfast bar. Master suite with a walk-in closet. Outdoor deck for entertaining.\nneighborhood_description: Prime beachfront location with water sports and beach activities.'),
 Document(id='50b48966-df4a-4d67-b9c3-1567d2031c02', metadata={'description': 'Luxurious waterfront condo wi