This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

In [23]:
import openai
import os
import csv

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.question_answering import load_qa_chain

In [3]:
openai_api_key=" "
os.environ["OPENAI_API_KEY"] = openai_api_key

## Synthetic Data Generation

In [24]:
# Generating Real Estate Listings with an LLM
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 4000)



In [25]:
data_gen_template = """
generate csv formatted {num_listings} listings for different imaginary real estates. come up with different name and values for each one. 
An example of a listing is:
Neighborhood: Green Oaks
Price: $800000
Bedrooms: 3
Bathrooms: 2
House Size: 2000 sqft
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.
Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
each listings will have these fields in the csv: neighborhood,price,bedrooms,bathrooms,house_size, description, neighborhood_description.
be creative in your listings, amaze us, csv format is a must.
"""

# Prompt to generate the listing
data_gen_prompt = PromptTemplate.from_template(data_gen_template)

In [26]:
# Listings in CSV format
listings_csv = llm(data_gen_prompt.format(num_listings=10))

In [27]:
listings_csv

"neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description\nSunset Heights,$950000,4,3,2500 sqft,Welcome to this luxurious 4-bedroom, 3-bathroom home in the prestigious Sunset Heights neighborhood. This stunning property features high-end finishes, a gourmet kitchen, and a spacious backyard with a sparkling pool. Perfect for entertaining or relaxing in style. Sunset Heights is known for its upscale dining options, boutique shops, and beautiful parks. Enjoy the vibrant community and upscale living in this exclusive neighborhood.\nDowntown Loft,$700000,2,2,1800 sqft,Live in the heart of the city in this modern 2-bedroom, 2-bathroom loft in downtown. This chic property boasts high ceilings, exposed brick walls, and floor-to-ceiling windows with stunning city views. The open-concept living space is perfect for urban living and entertaining. Downtown offers a vibrant nightlife, trendy restaurants, and easy access to public transportation. Experience city living a

## Creating a Vector Database and Storing Listings

In [None]:
# Function to save the listings in CSV
def generate_csv_from_text(csv_text, output_filename):
    # Split the text into lines
    lines = csv_text.strip().split('\n')
    
    # Create a CSV reader from the lines
    reader = csv.reader(lines)
    
    # Open the output file in write mode
    with open(output_filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write each row to the CSV file
        for row in reader:
            writer.writerow(row)
    
    print(f"CSV file '{output_filename}' generated successfully.")
    
# Generate CSV with listings 
generate_csv_from_text(listings_csv, 'listings.csv')

In [4]:
loader = CSVLoader(file_path='./listings.csv')
data = loader.load()

In [5]:
# Character Splitter
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(data)

# Embeddings
embeddings = OpenAIEmbeddings()

# Vector Database
db = Chroma.from_documents(split_docs, embeddings)

## Semantic Search of Listings Based on Buyer Preferences

In [8]:
def query_user_and_create_prompt(questions):
    answers = []
    
    for question in questions:
        while True:
            if question.startswith("How big do you want your house to be"):
                answer = input(question + " ")
                
                if not answer.strip():
                    print("Answer cannot be empty. Please provide a valid response.")
                    continue
                
                try:
                    size = float(answer)
                    if size <= 0:
                        print("Please enter a valid positive number for house size.")
                        continue
                    answers.append(answer)
                    break
                except ValueError:
                    print("Please enter a valid number for house size.")
                    continue
            
            elif question.startswith("Enter the estimated price"):
                answer = input(question + " ")
                
                if not answer.strip():
                    print("Answer cannot be empty. Please provide a valid response.")
                    continue
                
                try:
                    price = float(answer)
                    if price <= 0:
                        print("Please enter a valid positive number for house price.")
                        continue
                    answers.append(answer)
                    break
                except ValueError:
                    print("Please enter a valid number for house price.")
                    continue
            
            elif question.startswith("Which amenities would you like"):
                amenities_list = ["pool", "gym", "parking", "security", "balcony"]
                #print(f"Available amenities: {', '.join(amenities_list)}")
                answer = input(question + f" (Choose from {', '.join(amenities_list)}): ")
                
                if not answer.strip():
                    print("Answer cannot be empty. Please provide a valid response.")
                    continue
                
                chosen_amenities = [amenity.strip().lower() for amenity in answer.split(',') if amenity.strip().lower() in amenities_list]
                if not chosen_amenities:
                    print("Invalid selection. Please choose from the provided list.")
                    continue
                answers.append(','.join(chosen_amenities))
                break
            
            elif question.startswith("Which transportation options are important"):
                transport_list = ["bus", "train", "subway", "car", "bike"]
                # print(f"Available transportation options: {', '.join(transport_list)}")
                answer = input(question + f" (Choose from {', '.join(transport_list)}): ")
                
                if not answer.strip():
                    print("Answer cannot be empty. Please provide a valid response.")
                    continue
                
                chosen_transports = [transport.strip().lower() for transport in answer.split(',') if transport.strip().lower() in transport_list]
                if not chosen_transports:
                    print("Invalid selection. Please choose from the provided list.")
                    continue
                answers.append(','.join(chosen_transports))
                break
            
            elif question.startswith("How urban do you want your neighborhood to be"):
                urban_choices = ["Sub-Urban", "Urban", "Rural"]
                #print(f"Urban preference options: {', '.join(urban_choices)}")
                answer = input(question + f" (Choose from {', '.join(urban_choices)}): ")
                
                if not answer.strip():
                    print("Answer cannot be empty. Please provide a valid response.")
                    continue
                
                answer = answer.strip().title()  # Capitalize the user's choice
                if answer not in urban_choices:
                    print("Invalid selection. Please choose from the provided list.")
                    continue
                answers.append(answer)
                break
    
    ## Generate the formatted property search criteria paragraph
    prompt = f"We are looking for a property that meets our specific preferences. Our ideal home should be around {answers[0]} square feet, with an estimated budget of ${answers[1]}. We would prefer amenities such as {answers[2]}, and transportation options like {answers[3]} are essential. Additionally, we envision our neighborhood to be {answers[4]}.\n"
    return prompt

In [9]:
# Example usage:
questions = [
    "How big do you want your house to be? Enter in sqft: ", 
    "Enter the estimated price for the house you are looking for: ", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?", 
    "How urban do you want your neighborhood to be?"
]
prompt = query_user_and_create_prompt(questions)

How big do you want your house to be? Enter in sqft:  9
Enter the estimated price for the house you are looking for:  800
Which amenities would you like? (Choose from pool, gym, parking, security, balcony): pool, gym, parking
Which transportation options are important to you? (Choose from bus, train, subway, car, bike): bus, train, subway
How urban do you want your neighborhood to be? (Choose from Sub-Urban, Urban, Rural): urban


## Use of LLM for Generating Personalized Descriptions

In [29]:
## Logic for Searching and Augmenting Listing Descriptions
def augmented_response_generation(query):
    use_chain_helper = False
    
    if use_chain_helper:
        rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
        response = rag.run(query)
    else:
        similar_docs = db.similarity_search(query, k=3)
        for i in range(len(similar_docs)):
            print(similar_docs[i].page_content.capitalize(), "\n")
        
        prompt_template = PromptTemplate(
            template="{query}\nContext: {context}",
            input_variables=["query", "context"],
        )
        print(prompt_template, "\n")
        chain = load_qa_chain(llm, prompt=prompt_template, chain_type="stuff")
        response = chain.run(input_documents=similar_docs, query=query)
    
    print(response)
    
## Generate Response
augmented_response_generation(prompt)

Neighborhood: modern townhouse
price: $700000
bedrooms: 3
bathrooms: 2
house_size: 1800
description: live in style in this modern townhouse with sleek design and urban amenities. this 3-bedroom, 2-bathroom townhouse features a gourmet kitchen, a private patio, and a rooftop deck with city views. the contemporary finishes and open layout create a chic and sophisticated living space perfect for modern living.
neighborhood_description: modern townhouse is a trendy neighborhood with hip cafes, art galleries, and boutique shops. residents can walk to the nearby park, bike along the riverfront, or attend a yoga class at the local studio. 

Neighborhood: urban chic loft
price: $600000
bedrooms: 2
bathrooms: 1
house_size: 1500
description: live in style in this urban chic loft located in the heart of the city. this 2-bedroom, 1-bathroom loft features exposed brick walls, high ceilings, and industrial accents. the open floor plan and modern design create a trendy and vibrant living space perfec