# Project Setup

## Run this code to install required libraries

In [51]:
!pip install -U --quiet lancedb pandas langchain-community

In [52]:
import os
#os.environ["OPENAI_API_KEY"] = ""

In [2]:
!pip install lancedb;



In [1]:
import os
# os.environ["OPENAI_API_KEY"] = "<paste your key here or specify as environment variable>"

In [2]:
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.chains import RetrievalQA, ConversationChain
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory, CombinedMemory, ChatMessageHistory
from langchain_community.vectorstores.lancedb import LanceDB as vectorstore
from pydantic import BaseModel, Field
from typing import List, Union, Dict, Any
import lancedb
from lancedb.pydantic import vector, LanceModel
from tqdm import tqdm
from itertools import chain
import json
import pandas as pd

In [3]:
model_name = "gpt-3.5-turbo"
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 1500)



# Synthetic Data Generation


## Generate a list of Neighborhood names

In [4]:
class Neighborhood(LanceModel):
    name: str = Field(description="Neighborhood Name")
    description: str = Field(description="Neighborhood Description")
        
class NeighborhoodList(BaseModel):
    neighborhood_list: List[Neighborhood] = Field(description="Neighborhood Description")

In [5]:
neighborhood_parser = PydanticOutputParser(pydantic_object=NeighborhoodList)

In [6]:
initial_prompt = PromptTemplate(
    input_variables=["region", "state", "num_neighborhoods"],
    template="Example Results for New York City, New York:\
Upper East Side\
Greenwich Village\
Lower East Side\
Chelsea\
Bedford-Stuyvesant\
Fort Greene\
Clinton Hill\
Brooklyn\
Queens\
Jackson Heights\
\
Instructions: \
List {num_neighborhoods} popular neighborhoods in {region}, {state} including their description.\
Make sure to include neighborhoods within major cities.\
\
{format_instructions}",
    partial_variables={"format_instructions": neighborhood_parser.get_format_instructions}
)

In [7]:
region = "Oahu"
state = "Hawaii"
num_neighborhoods = 15

In [8]:
response = llm(initial_prompt.format(region=region, state=state, num_neighborhoods=num_neighborhoods))

  warn_deprecated(


In [9]:
result = json.loads(response)

In [10]:
neighborhood_objs = [Neighborhood(**r) for r in result.get("neighborhood_list")]

In [11]:
neighborhoods = [obj.name for obj in neighborhood_objs]

In [12]:
neighborhoods

['Waikiki',
 'Kailua',
 'Kapolei',
 'Pearl City',
 'Kaneohe',
 'Mililani',
 'Ewa Beach',
 'Wahiawa',
 'Aiea',
 'Waipahu',
 'Haleiwa',
 'Kahala',
 'Makiki',
 'Kapahulu',
 'Kalihi']

## Using the list of neighborhoods, generate descriptions of mock real estate listings

In [13]:
class Listing(LanceModel):
#     id: str = Field(description="Simple identifier of property")
    description: str = Field(description="Description of property")
    price: str = Field(description="Price of property in dollars")
    num_bedrooms: str = Field(description="Number of bedrooms")
    num_bathrooms: str = Field(description="Number of bathrooms")
    area: str = Field(description="Area of property in square feet")
    property_type: str = Field(description="Property type (condo, single-family home, etc)")
    neighborhood: str = Field(description="Neighborhood of property")
    neighborhood_description: str = Field(description="Description of neighborhood")
        
class ListingResult(BaseModel):
    listings: List[Listing] = Field(description="List of properties per neighborhood")

In [14]:
listing_parser = PydanticOutputParser(pydantic_object=ListingResult)

In [15]:
with open('./real_estate_prompt.txt') as f:
    template = f.read()

In [16]:
neighborhood_prompt = PromptTemplate(
    input_variables=["neighborhood", "region", "state"],
    template=template,
    partial_variables={"format_instructions": listing_parser.get_format_instructions}
)

In [17]:
listings_chain = LLMChain(llm=llm, prompt=neighborhood_prompt)

  warn_deprecated(


In [18]:
results = [
    listings_chain(
        {
            "neighborhood": n, 
            "region": region, 
            "state": state
        })
    for n in tqdm(neighborhoods, position=0, leave=True)]

  warn_deprecated(
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [02:12<00:00,  8.81s/it]


In [19]:
listings = list(chain(*[json.loads(result.get("text")).get("listings") for result in results]))

In [20]:
with open("./listings.txt", "w") as f:
    f.write(json.dumps(listings))

In [21]:
listing_objs = [Listing(**listing) for listing in listings]

# Semantic Search

### Generate Embeddings

In [22]:
embeddings = OpenAIEmbeddings()

  warn_deprecated(


In [23]:
def generate_text(obj: Union[str, Listing, Neighborhood], id: int):
    return (
        f"{obj.description} This {obj.property_type} is located in {obj.neighborhood}."
        f"{obj.neighborhood} is described as {obj.neighborhood_description}."
        f"It features {obj.num_bedrooms} bedrooms and {obj.num_bathrooms} bathrooms, "
        f"covering an area of {obj.area} square feet. The listed price is {obj.price}."
        f"Property identifier: {id + 1}"
    )

In [24]:
listing_data = [generate_text(obj=obj, id=idx) for idx, obj in enumerate(listing_objs)] 

In [25]:
lance = lancedb.connect("./.lancedb")

In [26]:
db = vectorstore(lance,embedding=embeddings)

In [27]:
db.add_texts(listing_data);

# Augmented Response Generation

In [28]:
# Logic for Searching and Augmenting Listing Descriptions

# The project must demonstrate a logical flow where buyer preferences are used to search and then augment the description of real estate listings. 
# The augmentation should personalize the listing without changing factual information.

In [29]:
# Use of LLM for Generating Personalized Descriptions

# The submission must utilize an LLM to generate personalized descriptions for the real estate listings based on buyer preferences. 
# The descriptions should be unique, appealing, and tailored to the preferences provided.

### User Preference Questions

In [30]:
questions = [   
    "How big do you want your house to be?" 
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]

In [31]:
# persona = "persona_1"
persona = "persona_2"

### Persona 1

In [32]:
persona_01_answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

### Persona 2

In [33]:
persona_02_answers = [
    "I'm looking for a small 1 bedroom condominium for myself",
    "Trendy neighborhood close to shopping and nightlife, near the beach, great city views",
    "Convenient parking, doorman and luxury",
    "Easy access to a reliable bus line and current or planned stops of the light rail system",
    "A big city neighborhood that is super lively"
]

In [34]:
answers = ""

match persona:
    case "persona_1":
        answers = persona_01_answers
    case "persona_2":
        answers = persona_02_answers

### Interactive Answers

In [35]:
#answers = [ ] 
#for question in questions:
#    answer = input(question)
#    answers.append(answer)

### Construct Conversation History

In [36]:
history = ChatMessageHistory()
history.add_user_message(
    (
        f"You are AI that will recommend user various properties."
        "The recommendations are based on answers to personal questions."
        f"Ask user {len(questions)} questions"
    )
)
    
for i in range(len(questions)):
    history.add_ai_message(questions[i])
    history.add_user_message(answers[i])
    
summary_memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="recommendation_summary", 
    input_key="input",
    buffer=(
        f"The human answered {len(questions)} personal questions)."
    ),
    return_messages=True
)


class MementoBufferMemory(ConversationBufferMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        input_str, output_str = self._get_input_output(inputs, outputs)
        self.chat_memory.add_ai_message(output_str)
    
conversational_memory = MementoBufferMemory(
    chat_memory=history,
    memory_key="questions_and_answers", 
    input_key="input"
)

# Combined
memory = CombinedMemory(memories=[conversational_memory, summary_memory])

In [37]:
RECOMMENDER_TEMPLATE = """The following is a friendly conversation between a human and an AI Real Estate Recommender.
The AI follows human instructions and provides real estate recommendations based on
a human's persona derived from their answers to questions. Make sure that the neighborhood of the property matches
the users preferences.

Summary of Recommendations:
{recommendation_summary}
Personal Questions and Answers:
{questions_and_answers}
Human: {input}
AI:"""
PROMPT = PromptTemplate(
    input_variables=["recommendation_summary", "input", "questions_and_answers"],
    template=RECOMMENDER_TEMPLATE
)
recommender = ConversationChain(llm=llm, verbose=True, memory=memory, prompt=PROMPT)

In [38]:
prediction = recommender.predict(input="Summarize this conversation\
and generate a description of the ideal property and neighborhood for user based on their preferences.")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI Real Estate Recommender.
The AI follows human instructions and provides real estate recommendations based on
a human's persona derived from their answers to questions. Make sure that the neighborhood of the property matches
the users preferences.

Summary of Recommendations:
[SystemMessage(content='The human answered 4 personal questions).')]
Personal Questions and Answers:
Human: You are AI that will recommend user various properties.The recommendations are based on answers to personal questions.Ask user 4 questions
AI: How big do you want your house to be?What are 3 most important things for you in choosing this property?
Human: I'm looking for a small 1 bedroom condominium for myself
AI: Which amenities would you like?
Human: Trendy neighborhood close to shopping and nightlife, near the beach, great city views
AI: Which transpor

In [39]:
print(prediction)

Based on your preferences, I recommend a small 1 bedroom condominium in a trendy neighborhood close to shopping and nightlife, near the beach, with great city views. The ideal property should have convenient parking, a doorman, and luxury amenities. The neighborhood should have easy access to a reliable bus line and current or planned stops of the light rail system. This combination of features will provide you with the perfect urban living experience.


In [40]:
similar_docs = db.similarity_search(prediction, k=3)

In [45]:
query = "Casually address the user. Based on the property listings in the context, describe to me why each property would be a good fit.\
Explain how the listing matches the user preferences as they expressed in the chat history.\
Include a catchy nickname for this property followed by the property identifier. Use emojis for expression.\
Finally, include listing details beneath each explanation including price, area, number of bedrooms and bathrooms."

# Provide a concise summary of each property to the user

In [46]:
prompt = PromptTemplate(
    template="{query}\nContext: {context}",
    input_variables=["query", "context"],
)
chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
print(chain.run(input_documents=similar_docs, memory=memory, query = query))

Hey there! 🌴 Looking for your dream beachfront property in Pearl City? Check out this stunning 2-bedroom, 2-bathroom condo in Pearl City Beachfront - Paradise by the Ocean (Property 10). With high-end finishes, a spacious balcony, and resort-style amenities, this unit is perfect for water sports enthusiasts like yourself. Priced at $1,200,000, this 1,500 sqft condo offers the ultimate island living experience.

Hey beach lover! 🏖️ Want to wake up to ocean views in Waikiki? This 2-bedroom, 2-bathroom condo in Waikiki - Beachfront Bliss (Property 1) is calling your name. With a gourmet kitchen, resort-style amenities, and a private beach access, this $1,500,000, 1,500 sqft unit is perfect for your beachfront retreat.

Hey luxury seeker! 🏙️ Looking for a spacious condo with panoramic views in Aiea? This 4-bedroom, 3-bathroom condominium in Aiea - Urban Oasis (Property 25) is the perfect fit for you. With high-end finishes, resort-style amenities, and easy access to Honolulu, this $1,200,0

## Spot check data accuracy

Check the original listing objects, making sure the property data is consistent. ID's are zero indexed in code, so subtract 1 from number listed above. (Listing id's will vary since the results are non-deterministic.)

In [48]:
print(listing_objs[24])

description='Experience luxury living in this stunning 4-bedroom, 3-bathroom condominium in the heart of Aiea. With panoramic views of the ocean and mountains, this spacious unit offers high-end finishes and top-of-the-line appliances. Enjoy resort-style amenities including a pool, fitness center, and concierge services. Conveniently located near major highways for easy access to Honolulu and beyond. Explore nearby hiking trails and beaches, or indulge in shopping and dining at the nearby Aiea Shopping Center.' price='$1,200,000' num_bedrooms='4' num_bathrooms='3' area='2,500 sqft' property_type='Condominium' neighborhood='Aiea' neighborhood_description='Aiea offers a mix of urban convenience and natural beauty, with top-rated schools and easy access to outdoor activities.'
