This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [1]:
import os
from config import OpenAPI_key
os.environ["OPENAI_API_KEY"] = OpenAPI_key
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

from langchain_openai import ChatOpenAI

## Data Generation for Real Estate Listings

In [2]:
from langchain_core.prompts import ChatPromptTemplate
model_name = 'gpt-3.5-turbo'
llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=4000)
json_llm = llm.bind(response_format={"type": "json_object"})
num_listings = 15
user_input = """
Generate a JSON list of {} diverse real estate listings in which each object has keys deduced from the example listing.

**Example Listing:**

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""
user_input = user_input.format(num_listings)
listing_generation_template = ChatPromptTemplate.from_messages([
    ("system", "You are a real estate assistant specializing in creating detailed and attractive real estate listings. Each listing should include information about the neighborhood, price, number of bedrooms and bathrooms, house size, and descriptions of both the house and the neighborhood. Follow the format and style of the example listing provided below."),
    ("human", "{user_input}"),
])
listing_generation_prompt = listing_generation_template.invoke(
    {
        "user_input": user_input
    }
)
print(listing_generation_prompt)

# ai_msg = json_llm.invoke(listing_generation_prompt)

messages=[SystemMessage(content='You are a real estate assistant specializing in creating detailed and attractive real estate listings. Each listing should include information about the neighborhood, price, number of bedrooms and bathrooms, house size, and descriptions of both the house and the neighborhood. Follow the format and style of the example listing provided below.'), HumanMessage(content='\nGenerate a JSON list of 15 diverse real estate listings in which each object has keys deduced from the example listing.\n\n**Example Listing:**\n\nNeighborhood: Green Oaks\nPrice: $800,000\nBedrooms: 3\nBathrooms: 2\nHouse Size: 2,000 sqft\n\nDescription: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and di

### Separating Quantitative and Textual data from Generated Listings.

In [3]:
import json
## Storing generated data in JSON and txt format
# with open("Generated_listings.txt", "w") as text_file:
#     text_file.write(ai_msg.content)
# gen_listings_json = json.loads(ai_msg.content)
# print(type(gen_listings_json))
# with open('Generated_listings.json', 'w') as f:  
#     json.dump(gen_listings_json, f)

## Loading saved JSON data
f = open('Generated_listings.json')
data_listings = json.load(f)
print(type(data_listings), len(data_listings['listings']))

semantic_keys = ['description', 'neighborhood_description']
listing_texts = []
listing_metadata = []
for listing in data_listings['listings']:
    curr_text = ''
    for i, sk in enumerate(semantic_keys):
        curr_text += sk + ': '
        if i < len(semantic_keys)-1:
            curr_text += listing[sk] + ' '
        else:
            curr_text += listing[sk] # No space after adding the text of the last semantic key
    listing_texts.append(curr_text)
    dict_metadata = {k: listing[k] for k in listing.keys() if k not in semantic_keys}
    listing_metadata.append(dict_metadata)



<class 'dict'> 15


### Generating and Storing Embeddings in Vector Database

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings()
vector_db = Chroma.from_texts(listing_texts, embedding=embeddings, metadatas=listing_metadata, persist_directory="./chroma_db_listings")

### User Preference Interface

In [30]:
preference_questions = [   
                "How big do you want your house to be?",
                "What are 3 most important things for you in choosing this property?", 
                "Which amenities would you like?", 
                "Which transportation options are important to you?",
                "How urban do you want your neighborhood to be?",   
            ]

user_answers = [ ] 
for question in preference_questions:
   answer = input(question)
   user_answers.append(answer)

15 1536


In [None]:
#TODO: Utilize chat history from last 4 questions and ask LLM to come up with a search query that can be used for semantic retrieval.
#TODO: Utilize the first 2 questions and filter/sort the retrieved results based on their metadata.
model_name = 'gpt-4o-mini'
llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=4000)
