In [2]:
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "your api key"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"


In [4]:
# Initialize chat model
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3, max_tokens=2000)

# Step 1: Generate Home Listings using LLM
prompt_template = PromptTemplate(
    template="""
Generate a CSV table about {topic} with these columns: {attributes}.
Use real-world examples. Generate {rows} rows. Output only the CSV content.
""",
    input_variables=["topic", "attributes", "rows"]
)


In [4]:
prompt = '''
generate a csv file that store information about {topic} 
make sure to include the following attriutes {attributes}. use a real word examples.
generate {rows} rows

print the data only without intro or end
'''
prompt_template = PromptTemplate(template=prompt, input_variables=['topic', 'attributes', 'rows'])

res = llm(prompt_template.format(topic="Homes", attributes="Neighborhood, location, bedrooms, bathrooms, house size (sqft), price (k$)", rows="20"))

print(res)

Neighborhood,Location,Bedrooms,Bathrooms,House Size (sqft),Price (k$)
Westwood,Los Angeles,4,3,2500,1200
Upper East Side,New York City,3,2,1800,1500
Georgetown,Washington D.C.,5,4,3500,2000
South Beach,Miami,2,2,1200,800
Pacific Heights,San Francisco,4,3,2800,1800
Old Town,Chicago,3,2,2000,1000
Capitol Hill,Seattle,3,2,1900,900
Buckhead,Atlanta,4,3,2600,1100
South End,Boston,2,1,1500,1200
Downtown,Austin,1,1,800,500
Queen Anne,Seattle,5,4,4000,2500
South Loop,Chicago,2,2,1400,700
Mission District,San Francisco,3,2,1800,1600
Midtown,Atlanta,1,1,900,600
Dupont Circle,Washington D.C.,2,1,1300,1000
South Beach,Miami,3,3,2200,1500
Greenwich Village,New York City,4,3,2700,1900
Venice Beach,Los Angeles,2,1,1600,1100
Back Bay,Boston,3,2,1900,1300


In [7]:
generated_csv = llm.invoke(
    prompt_template.format(
        topic="Homes",
        attributes="Neighborhood, Location, Bedrooms, Bathrooms, House Size (sqft), Price (k$)",
        rows="20"
    )
)

# Save to CSV
with open("data/listing.csv", "w") as f:
    f.write(generated_csv.content)

In [8]:
loader = CSVLoader(file_path="data/listing.csv")
docs = loader.load()

In [9]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)

embedding = OpenAIEmbeddings()
db = Chroma.from_documents(documents=split_docs, embedding=embedding, persist_directory="db")
retriever = db.as_retriever()

In [10]:
# Step 3: Simulate buyer preferences
personal_questions = [
    "How many rooms do you want in your house?",
    "Please tell me about the surroundings in choosing this property?",
    "Which facilities would you like?",
    "Which transportation options are important to you?",
    "How urban or rural do you want your neighborhood to be?"
]

personal_answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

history = ChatMessageHistory()
history.add_user_message(
    f"You are an AI sales assistant. Ask the user {len(personal_questions)} questions to understand preferences."
)
for question, answer in zip(personal_questions, personal_answers):
    history.add_ai_message(question)
    history.add_user_message(answer)

history.add_ai_message("Now summarize the preferred home features.")

In [11]:
memory = ConversationSummaryMemory(
    llm=llm,
    chat_memory=history,
    memory_key="chat_history",
    input_key="question",
    buffer="Summarize user preferences based on their answers. Focus on location, budget, area, and features.",
    return_messages=True
)

In [12]:
# Step 4: Define prompt for retrieval
qa_prompt = PromptTemplate(
    template="""
You are a sales assistant helping a home buyer.
Use the retrieved context and customer preferences to suggest a suitable home.
Keep it attractive and concise. Max 5 sentences.

Context: {context}
Preferences: {chat_history}
Question: {question}

Answer:
""",
    input_variables=["context", "chat_history", "question"]
)

In [13]:
# Step 5: Build Conversational Retrieval Chain
chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type="stuff",
    combine_docs_chain_kwargs={"prompt": qa_prompt}
)

In [14]:
# Step 6: Run assistant
query = "As a sales assistant, show the best matching home for this user in an appealing format."
result = chain({"question": query})
print("\nPersonalized Recommendation:\n")
print(result["answer"])


Personalized Recommendation:

Based on your preferences for a 4-bedroom, 3-bathroom home in a desirable location within your budget, I recommend the property in the Mission District, San Francisco. This home offers the perfect combination of space, amenities, and location, all at a competitive price of $1800k. Don't miss out on this opportunity to own a beautiful home in one of San Francisco's most sought-after neighborhoods!


# Creating the Agent

In [18]:
# import libraries
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
import os

personal_questions = [
    "How many rooms do you want in your house?",
    "Please tell me about the surroundings in choosing this property?",
    "Which facilities would you like?",
    "Which transportation options are important to you?",
    "How urban or rural do you want your neighborhood to be?"
]

# load gpt model
model_name = 'gpt-3.5-turbo'
llm = ChatOpenAI(model_name=model_name, temperature=0.3, max_tokens=100)

# read and load the csv file that store homes data
loader = CSVLoader(file_path='data/listing.csv')
docs = loader.load()

# create vector store index and query the data
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

db = Chroma.from_documents(documents=split_docs, embedding=embeddings)

# create a chat with the customer and summarize it
history = ChatMessageHistory()
history.add_user_message(
    f"""You are AI sales assisstant that will recommend user a home based on their answers to personal questions. Ask user {len(personal_questions)} questions""")
for question in personal_questions:
    history.add_ai_message(question)
    user_answer = input(f"{question}\nAnswer: ")
    history.add_user_message(user_answer)

history.add_ai_message("""Now tell me a summary of a home you're considering in points""")
memory = ConversationSummaryMemory(
    llm=llm,
    chat_memory=history,
    memory_key="chat_history",
    input_key="question",
    buffer=f"The person answered {len(personal_questions)} personal questions. Use them to extract home attributes like location, price, home area and number of rooms",
    return_messages=True)


# create a prompt
prompt = PromptTemplate(
    template="You are an sales assistant who buy homes. Use the following pieces of retrieved context and customer prefrences to provide the customer with information about available home. Use five sentences maximum and keep the answer attractive. \nContext: {context} \nCustomer's prefernced: {chat_history} \nQuestion: {question}\nAnswer:",
    input_variables=['context', 'chat_history', 'question']
)

# create question and answer model to retrieve answers from retrived information
chain_type_kwargs = {'prompt': prompt}

chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(),
    combine_docs_chain_kwargs=chain_type_kwargs,
    memory=memory
)

# take input from user
query = "as a sales assisstant, represent the answer in professional way"

# run the query
result = chain({"question": query})
print(result['answer'])

How many rooms do you want in your house?
Answer: at least 3 rooms
Please tell me about the surroundings in choosing this property?
Answer: sunset, beach, plazground
Which facilities would you like?
Answer: market, school
Which transportation options are important to you?
Answer: bus and car
How urban or rural do you want your neighborhood to be?
Answer: urban
Based on your preferences, I have found a stunning 4 bedroom, 3 bathroom home in the desirable Mission District of San Francisco. This spacious 2200 sqft home is listed at $1800k, offering ample space for your family. The vibrant neighborhood and prime location make it a perfect choice for you. Don't miss out on this amazing opportunity to own a beautiful home in the heart of San Francisco!
