## Import Dependencies and Setup Enviroment

In [11]:
import os
import openai
import json
import time
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

In [2]:
_ = load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")

## Generate Seed Data via LLM

In [7]:
def generate_home_listings(prompt):
    system_prompt = """
    You are a real estate listing assistant specializing in crafting compelling and informative home descriptions. Your task is to generate real estate listings in **valid JSON format** based on user-provided details.
    
    Each listing should strictly follow this JSON format:

    {
        "Neighborhood": "[Neighborhood Name]",
        "Price": "$[Price]",
        "Bedrooms": [Number of Bedrooms],
        "Bathrooms": [Number of Bathrooms],
        "House_Size": "[Size in sqft]",
        "Description": "[Compelling property description]",
        "Neighborhood_Description": "[Engaging neighborhood description]"
    }

    **Rules:**
    - Return output in **pure JSON format** without any extra text.
    - Ensure values are properly formatted.
    - Use double quotes `" "` for JSON keys and values.
    
    Description:
    Provide a captivating yet concise property description, emphasizing key selling points such as architectural style, unique features, energy efficiency, or location benefits. Use persuasive language to make the home appealing to potential buyers. Highlight amenities, natural lighting, interior design elements, and lifestyle advantages.
    
    Neighborhood Description:
    Offer a brief, engaging description of the neighborhood. Highlight aspects such as community atmosphere, parks, restaurants, transportation, and local attractions. Focus on features that would be appealing to potential buyers considering the area.
    
    Maintain a professional and inviting tone, ensuring the listing feels aspirational yet grounded in reality. Avoid exaggeration while emphasizing the home’s best features.
    """
    
    try:
        response = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",
          messages=[
          {
            "role": "system",
            "content": system_prompt
          },
          {
            "role": "user",
            "content": prompt
          }
          ],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
        )

        raw_response = response.choices[0].message.content.strip()  

        if raw_response.startswith("{") and raw_response.endswith("}"):
            listing = json.loads(raw_response)  
        else:
            raise ValueError("OpenAI did not return valid JSON")

        return listing

    except Exception as e:
        return f"An error occurred: {e}" 

def generate_multiple_listings(prompts):
    listings = []

    for i, prompt in enumerate(prompts):
        print(f"Generating listing {i+1}/{len(prompts)}...")
        listing = generate_home_listings(prompt)
        
        if "An error occurred" not in listing:
            listings.append(listing)
        else:
            print(f"Error in listing {i+1}: {listing}")

        time.sleep(1)  

    with open("home_listings.json", "w") as f:
        json.dump(listings, f, indent=4)

    print(f"Saved {len(listings)} listings to home_listings.json")

    return listings

In [14]:
prompts = [
    "Generate a listing for a 4-bedroom, 2 bathroom house, with 4,000 sqft in Baltimore, priced at $400,000. The home has a parking pad and is near the park.",
    "Generate a listing for a 2-bedroom, 1 bathroom house, with 1,200 sqft in Washington DC, priced at $600,000. The home has a back patio and has street parking.",
    "Generate a listing for a 6-bedroom, 3 bathroom house, with 8,000 sqft in Dallas, priced at $500,000. The home has a large backyard and has three garages.",
    "Generate a listing for a 1-bedroom, 1 bathroom apartment, with 400 sqft in New York, priced at $800,000. The apartment has bathroom kitchen and features a window.",
    "Generate a listing for a 2-bedroom, 1 bathroom house, with 1,100 sqft in Baltimore, priced at $205,000. The home has a view of the harbor and is walking distance to the stadiums.",
    "Generate a listing for a 2-bedroom, 1 bathroom house, with 1,100 sqft in San Francisco, priced at $812,000. The home has is close to public transit and is near a park.",
    "Generate a listing for a 3-bedroom, 1 bathroom apartment, with 1,100 sqft in Chicago, priced at $700,000. The home is an apartment and has a view of the river.",
    "Generate a listing for a 3-bedroom, 2 bathroom house, with 2,100 sqft in New York, priced at $500,000. The home is in upstate New York and is near a wings restaurant.",
    "Generate a listing for a 3-bedroom, 2 bathroom house, with 1,100 sqft in Baltimore, priced at $190,000. The home near Canton square and has many dining options near it.",
    "Generate a listing for a 2-bedroom, 2 bathroom house, with 1,700 sqft in Dallas, priced at $195,000. The home is near the PGA headquarters and offers many opportunities to golf.",
    "Generate a listing for a 4-bedroom, 2 bathroom house, with 1,900 sqft in Eldersburg, priced at $450,000. The home is in a great school district.",
    "Generate a listing for a 4-bedroom, 2 bathroom house, with 1,700 sqft in Phoenix, priced at $400,000. The home is in the desert, has great views and is in a valley.",
    "Generate a listing for a 6-bedroom, 2 bathroom house, with 6,700 sqft in Dallas, priced at $600,000. The home is in the suberbs of Dallas and is only a 25 min drive from the city.",
    "Generate a listing for a 1-bedroom, 1 bathroom apartment, with 900 sqft in New York, priced at $800,000. The home is an apartment and is a 15 min walk from the park.",
    "Generate a listing for a 1-bedroom, 1 bathroom apartment, with 1,200 sqft in Miami, priced at $800,000. The home is an apartment and is a 15 min walk from the beach.",
]

listings = generate_multiple_listings(prompts)
print(json.dumps(listings[:2], indent=4))

Generating listing 1/15...
Generating listing 2/15...
Generating listing 3/15...
Generating listing 4/15...
Generating listing 5/15...
Generating listing 6/15...
Generating listing 7/15...
Generating listing 8/15...
Generating listing 9/15...
Generating listing 10/15...
Generating listing 11/15...
Generating listing 12/15...
Generating listing 13/15...
Generating listing 14/15...
Generating listing 15/15...
Saved 15 listings to home_listings.json
[
    {
        "Neighborhood": "Baltimore",
        "Price": "$400,000",
        "Bedrooms": 4,
        "Bathrooms": 2,
        "House_Size": "4,000 sqft",
        "Description": "Welcome to your spacious 4-bedroom haven in Baltimore! This charming house boasts ample natural light, a modern layout, and a convenient parking pad. With 4,000 sqft of living space, enjoy the comfort and room to grow. Nestled near a tranquil park, this home offers a perfect balance of relaxation and urban living.",
        "Neighborhood_Description": "Experience th

## Storing Listings in Vector Database

In [18]:
with open("home_listings.json", "r") as f:
    listings = json.load(f)

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = []

for listing in listings:
    full_text = f"""
    Neighborhood: {listing["Neighborhood"]}
    Price: {listing["Price"]}
    Bedrooms: {listing["Bedrooms"]}
    Bathrooms: {listing["Bathrooms"]}
    House Size: {listing["House_Size"]}
    
    Description: {listing["Description"]}
    Neighborhood Description: {listing["Neighborhood_Description"]}
    """

    chunks = text_splitter.split_text(full_text)

    for chunk in chunks:
        doc = Document(page_content=chunk, metadata={
            "Neighborhood": listing["Neighborhood"],
            "Price": listing["Price"],
            "Bedrooms": listing["Bedrooms"],
            "Bathrooms": listing["Bathrooms"],
            "House_Size": listing["House_Size"]
        })
        documents.append(doc)

print(f"documents: {documents}")

documents: [Document(page_content="Neighborhood: Baltimore\n    Price: $400,000\n    Bedrooms: 4\n    Bathrooms: 2\n    House Size: 4,000 sqft\n    \n    Description: Welcome to your spacious 4-bedroom haven in Baltimore! This charming house boasts ample natural light, a modern layout, and a convenient parking pad. With 4,000 sqft of living space, enjoy the comfort and room to grow. Nestled near a tranquil park, this home offers a perfect balance of relaxation and urban living.\n    Neighborhood Description: Experience the vibrant community of Baltimore where convenience meets serenity. Discover nearby parks for leisurely strolls and recreational activities. Embrace the local charm and explore a variety of dining options and entertainment venues. Easy access to transportation makes commuting a breeze, ensuring that you're always connected to the pulse of the city.", metadata={'Neighborhood': 'Baltimore', 'Price': '$400,000', 'Bedrooms': 4, 'Bathrooms': 2, 'House_Size': '4,000 sqft'}), 

In [19]:
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(documents, embeddings)