This is a starter notebook for the project, you'll have to import the libraries you'll need, you can find a list of the ones available in this workspace in the requirements.txt file in this workspace. 

# Step 1: Setting Up the Python Application

In [None]:
pip install pandas


In [None]:
pip install langchain-core

In [None]:
# We don't have access to install libraries in the workspace
# Please make sure those are installed
!pip install -q -r ./requirements.txt

In [1]:
import os, json
from IPython.display import Markdown, display
import pandas as pd
import numpy as np
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
# load up the .env file with secrets
#_ = load_dotenv(find_dotenv())

from pydantic import BaseModel, Field, NonNegativeInt
from typing import List, Optional, Type

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.output_parsers import PydanticOutputParser
from langchain.document_loaders import CSVLoader  #JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain.prompts import (ChatPromptTemplate,
                               PromptTemplate,
                               SystemMessagePromptTemplate,
                               AIMessagePromptTemplate,
                               HumanMessagePromptTemplate,
                               )
from langchain.schema import AIMessage, HumanMessage, SystemMessage, FunctionMessage
from langchain.utils.openai_functions import convert_pydantic_to_openai_function
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.tools import BaseTool, format_tool_to_openai_function
from langchain import LLMChain

import openai

In [2]:

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate

# Environment variables
os.environ["OPENAI_API_KEY"] = "voc-51499078612667733770986696158922c6d0.58646338"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"
model_name = "gpt-3.5-turbo"


# Step 2: Generating Real Estate Listings
Generate real estate listings using a Large Language Model. Generate at least 10 listings This can involve creating prompts for the LLM to produce descriptions of various properties

In [3]:
from langchain.llms import OpenAI
from langchain.llms import OpenAI

llm = OpenAI(model_name=model_name, temperature=0)

SYSTEM_PROMPT = "Generate ten realistic real estate listings from diverse neighborhoods."
PROMPT = \
"""
Here's a sample listing:

Neighborhood: Green Oaks
Price ($): 800,000
Bedrooms: 3
Bathrooms: 2
House Size (sqft): 2,000
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Example Entry Format:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description
Green Oaks,"$800,000",3,2,"2,000 sqft","Nestled in Green Oaks, this eco-friendly haven features a 3-bedroom, 2-bathroom layout with solar panels and efficient insulation. Highlights include abundant natural light, hardwood floors, and an open-concept kitchen that leads to a lush backyard, embodying a sanctuary for eco-conscious living. The neighborhood of Green Oaks is celebrated for its vibrant and environmentally-aware community, boasting organic stores, community gardens, and convenient transit options, rendering it perfect for those prioritizing sustainability and community engagement".


"""





In [4]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, NonNegativeInt
from typing import List

class RealEstateListing(BaseModel):
    neighborhood: str = Field(description="Name of the neighborhood")
    price: NonNegativeInt = Field(description="Price of the property in USD")
    bedrooms: NonNegativeInt = Field(description="Number of bedrooms in the property")
    bathrooms: NonNegativeInt = Field(description="Number of bathrooms in the property")
    house_size: NonNegativeInt = Field(description="Size of the property in square feet")
    description: str = Field(description="Description of the property.")   
    #neighborhood_description: str = Field(description="Description of the neighborhood.")  

class ListingCollection(BaseModel):
    listing: List[RealEstateListing] = Field(description="List of available real estate")
        
parser = PydanticOutputParser(pydantic_object=ListingCollection)

In [5]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template="{instruction}\n{sample}\n{format_instructions}\n",
    input_variables=["instruction", "sample"],
    partial_variables={"format_instructions": parser.get_format_instructions},
)
query = prompt.format(instruction = SYSTEM_PROMPT, sample = PROMPT)
print(query)

Generate ten realistic real estate listings from diverse neighborhoods.

Here's a sample listing:

Neighborhood: Green Oaks
Price ($): 800,000
Bedrooms: 3
Bathrooms: 2
House Size (sqft): 2,000
Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Example Entry Format:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description
Green Oaks,"$800,000",3,2,"2,000 sqft","Nestled in Green Oaks, this eco-friendly haven features a 3-bedroom, 2-bathroom layout with solar panels and efficient insulation. Highlights includ

# Load LLM Model

In [6]:
temperature = 0.0
llm = OpenAI(model_name=model_name, temperature=temperature, max_tokens = 500)



In [7]:
output = llm(query)

  warn_deprecated(


In [8]:
from fastapi.encoders import jsonable_encoder
import pandas as pd

result = parser.parse(output)
df = pd.DataFrame(jsonable_encoder(result.listing))
df.head()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description
0,Downtown,1200000,4,3,2500,"Luxurious 4-bedroom, 3-bathroom penthouse in t..."
1,Suburbia Estates,600000,5,4,3000,"Welcome to this charming 5-bedroom, 4-bathroom..."
2,Waterfront District,1800000,3,3,2200,Stunning waterfront property with 3 bedrooms a...
3,Historic Old Town,900000,2,2,1800,"Quaint 2-bedroom, 2-bathroom cottage in the hi..."
4,Mountain View,750000,3,2,2000,"Escape to this peaceful 3-bedroom, 2-bathroom ..."


### Store the RealEstate Listing to the file.

In [21]:
df.to_csv('RealEstate_listings.csv',index_label = 'id')

# Step 3: Storing Listings in a Vector Database
Vector Database Setup: Initialize and configure ChromaDB or a similar vector database to store real estate listings.
Generating and Storing Embeddings: Convert the LLM-generated listings into suitable embeddings that capture the semantic content of each listing, and store these embeddings in the vector database.

In [25]:
df=pd.read_csv('RealEstate_listings.csv')
df.head()

Unnamed: 0,id,id.1,neighborhood,price,bedrooms,bathrooms,house_size,description
0,0,0,Downtown,1200000,4,3,2500,"Luxurious 4-bedroom, 3-bathroom penthouse in t..."
1,1,1,Suburbia Estates,600000,5,4,3000,"Welcome to this charming 5-bedroom, 4-bathroom..."
2,2,2,Waterfront District,1800000,3,3,2200,Stunning waterfront property with 3 bedrooms a...
3,3,3,Historic Old Town,900000,2,2,1800,"Quaint 2-bedroom, 2-bathroom cottage in the hi..."
4,4,4,Mountain View,750000,3,2,2000,"Escape to this peaceful 3-bedroom, 2-bathroom ..."


In [26]:
from langchain.schema import Document
import shutil
# Configuration
CHROMA_PATH = "data/vectordb/1"
CSV_PATH = "RealEstate_listings.csv" 

df = pd.read_csv(CSV_PATH)
documents = []
for index, row in df.iterrows():
    documents.append(Document(page_content=row['description'], metadata={'id': str(index)}))


# Split Text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

if chunks:
    document = chunks[10]
    print(document.page_content)
    print(document.metadata)

# Save to Chroma
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

db = Chroma.from_documents(
    chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
)
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

Split 5 documents into 14 chunks.
Quaint 2-bedroom, 2-bathroom cottage in the historic Old Town neighborhood. This charming home features original hardwood floors, a cozy fireplace,
{'id': '3', 'start_index': 0}
Saved 14 chunks to data/vectordb/1.


# Step 5: Implementing Semantic Search and Augmented Response Generation

In [27]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Given the context provided above, craft a response that not only answers the question {question}, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements
"""

query_text = "Would like to buy home in calm neighbourhood" 

# Prepare the DB.
embedding_function = OpenAIEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=3)
if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(f"Generated Prompt:\n{prompt}")

    model = ChatOpenAI()
    response_text = model.predict(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)

Generated Prompt:
Human: 
Answer the question based only on the following context:

Estates. This well-maintained property boasts a large backyard, perfect for outdoor gatherings, and is located near top-rated schools and parks.

---

original hardwood floors, a cozy fireplace, and a beautifully landscaped garden. Walk to local shops, cafes, and galleries from this prime location.

---

in the family-friendly neighborhood of Suburbia Estates. This well-maintained property boasts a large backyard, perfect for outdoor gatherings, and

---

Given the context provided above, craft a response that not only answers the question Would like to buy home in calm neighbourhood, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements

Response: Based on the information provided, it sounds like the home in Suburbia Estates would b

# Step 6: Personalizing Listing Descriptions ( Two Examples)

In [28]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Given the context provided above, craft a response that not only answers the question {question}, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements
"""

query_text = "What are 3 most important things for you in choosing this property?" 

# Prepare the DB.
embedding_function = OpenAIEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=3)
if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(f"Generated Prompt:\n{prompt}")

    model = ChatOpenAI()
    response_text = model.predict(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)

Generated Prompt:
Human: 
Answer the question based only on the following context:

Estates. This well-maintained property boasts a large backyard, perfect for outdoor gatherings, and is located near top-rated schools and parks.

---

original hardwood floors, a cozy fireplace, and a beautifully landscaped garden. Walk to local shops, cafes, and galleries from this prime location.

---

Enjoy breathtaking views of the bay from every room, a private dock for your boat, and a spacious deck for entertaining guests.

---

Given the context provided above, craft a response that not only answers the question What are 3 most important things for you in choosing this property?, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements

Response: The three most important things for me in choosing this property are the large back

In [30]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Given the context provided above, craft a response that not only answers the question {question}, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements
"""

query_text = "How urban do you want your neighborhood to be?"

# Prepare the DB.
embedding_function = OpenAIEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=3)
if len(results) == 0 or results[0][1] < 0.7:
    print(f"Unable to find matching results.")
else:
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    print(f"Generated Prompt:\n{prompt}")

    model = ChatOpenAI()
    response_text = model.predict(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)

Generated Prompt:
Human: 
Answer the question based only on the following context:

rooftop terrace. Enjoy the convenience of urban living with top-rated restaurants, shopping, and entertainment just steps away.

---

Estates. This well-maintained property boasts a large backyard, perfect for outdoor gatherings, and is located near top-rated schools and parks.

---

original hardwood floors, a cozy fireplace, and a beautifully landscaped garden. Walk to local shops, cafes, and galleries from this prime location.

---

Given the context provided above, craft a response that not only answers the question How urban do you want your neighborhood to be?, but also ensures that your explanation is distinct, captivating, and customized to align with the specified preferences. Strive to present your insights in a manner that resonates with the audience's interests and requirements

Response: If you desire a neighborhood that offers the perfect blend of urban convenience and suburban charm, then