In [1]:
import os
# os.environ["OPENAI_API_KEY"] =  "OPENAI_API_KEY"


from tqdm import tqdm
from pprint import pprint


from langchain_openai import OpenAI
from langchain.vectorstores import Chroma

from langchain.prompts import ChatPromptTemplate
from langchain.embeddings import OpenAIEmbeddings


# Construct Skeleton Prompt Template

In [2]:
template_string = """Generate a real estate description
of a house, it has to be maximum 100 words long and have 
the following characteristics:

Neighborhood: {neighborhood}\n
Price: {price}\n sqft
Bedrooms: {bedrooms}\n
Bathrooms: {bathrooms}\n
House Size: {size}\n

It's mandatory to include these exact information in the description, 
Here are some topics that you can discuss to enrich the output, you
don't have to include them all and they don't have to be all positive, 
but if you come up with something negative please do it kindly.

{description_topics}

Take a look at the following example to have an idea of
the expected output, the text is delimited by triple backticks:

```{example}```

Try not to start the description with the words "Welcome to".

"""

prompt_template = ChatPromptTemplate.from_template(template_string)

In [3]:
prompt_template.messages[0].input_variables

['bathrooms',
 'bedrooms',
 'description_topics',
 'example',
 'neighborhood',
 'price',
 'size']

# Suggesting Description Topics to the Bot

In [4]:
ADDITIONAL_DESCRIPTION_TOPICS = """
Neighborhood details:
- The house might be located in a quiet residential area, a bustling city environment, or something in between
- In the case the house is downtown, it could be close to touristic places, you can use the neighborhood to suggest any
- In the case the house is downtown, it could be close to nice restaurants or bars, you can use the neighborhood to suggest any
- Easiness to get public transportation
- Security in the neighborhood
- Closeness schools, work, malls, supermarkets and drugstores 

House specific details:
- The house can be single-family home, condo or townhouse
- It can have implementation of solar energy
- Space for parking could be generous or not having anything at all
- The presence or not of gardens
- Existence of a pool
- If the building needs remodeling
- Architectural style
- Bike-friendly roads
- Easy access to people with reduced mobility
"""

# Setting Sample Expected Output

In [5]:
SAMPLE_DESCRIPTION = """INPUT:

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

OUTPUT:

Welcome to this eco-friendly oasis
nestled in the heart of Green Oaks. This charming
3-bedroom, 2-bathroom home boasts energy-efficient
features such as solar panels and a well-insulated
structure. Natural light floods the living spaces,
highlighting the beautiful hardwood floors and
eco-conscious finishes. The open-concept kitchen and
dining area lead to a spacious backyard with a 
vegetable garden, perfect for the eco-conscious family.
Embrace sustainable living without compromising on style 
in this Green Oaks gem.
"""
print(SAMPLE_DESCRIPTION)

INPUT:

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

OUTPUT:

Welcome to this eco-friendly oasis
nestled in the heart of Green Oaks. This charming
3-bedroom, 2-bathroom home boasts energy-efficient
features such as solar panels and a well-insulated
structure. Natural light floods the living spaces,
highlighting the beautiful hardwood floors and
eco-conscious finishes. The open-concept kitchen and
dining area lead to a spacious backyard with a 
vegetable garden, perfect for the eco-conscious family.
Embrace sustainable living without compromising on style 
in this Green Oaks gem.



# Generate Random Input Data for Template

In [6]:
import random

def random_data_generation():

    boroughs_list = ["Manhattan", "Brooklyn", "Queens", "The Bronx", "Staten Island"]
    price_list = list(range(100_000, 1_000_000, 50_000))
    size_list = list(range(800, 10_000, 100))

    neighborhood = random.choice(boroughs_list)
    price = random.choice(price_list)
    size = random.choice(size_list)
    bathrooms = random.randint(1, 5)
    bedrooms = random.randint(1, 5)

    return neighborhood, price, size, bathrooms, bedrooms

In [7]:
for _ in range(10):
    print(random_data_generation())

('Staten Island', 350000, 7900, 4, 5)
('Brooklyn', 650000, 8700, 3, 3)
('Brooklyn', 700000, 8400, 5, 1)
('Brooklyn', 900000, 2300, 3, 4)
('Queens', 650000, 3700, 1, 5)
('The Bronx', 650000, 8200, 5, 2)
('The Bronx', 400000, 9700, 3, 4)
('The Bronx', 600000, 5000, 3, 5)
('Manhattan', 600000, 6200, 3, 2)
('The Bronx', 450000, 4800, 4, 3)


# Testing Template

In [8]:
INPUT_NEIGHBORHOOD, INPUT_PRICE, INPUT_SIZE, INPUT_BATHROOMS, INPUT_BEDROOMS  = random_data_generation()

prompt_description = prompt_template.format_messages(
    neighborhood = INPUT_NEIGHBORHOOD, 
    price = INPUT_PRICE, 
    bedrooms = INPUT_BEDROOMS, 
    bathrooms= INPUT_BATHROOMS, 
    size = INPUT_SIZE,  
    description_topics = ADDITIONAL_DESCRIPTION_TOPICS,
    example = SAMPLE_DESCRIPTION,
)

print(prompt_description)

 

[HumanMessage(content='Generate a real estate description\nof a house, it has to be maximum 100 words long and have \nthe following characteristics:\n\nNeighborhood: The Bronx\n\nPrice: 950000\n sqft\nBedrooms: 1\n\nBathrooms: 3\n\nHouse Size: 7200\n\n\nIt\'s mandatory to include these exact information in the description, \nHere are some topics that you can discuss to enrich the output, you\ndon\'t have to inlcude them all and they don\'t have to be all positive, \nbut if you come up with something negative please do it kindly.\n\n\n- The house might be in the suburbs\n- In the case the house is not in the suburbs, it could be close to touristic places, you can use the neighborhood to suggest any. \n- Easiness to get public transportation\n- Security in the neighborhood\n- Closeness schools, malls, supermarkets and drugstores. \n- Implementation of solar energy\n- Space for parking\n- The presence or not of gardens\n- Existence of a pool\n- If the building needs remodeling\n- Architec

# Testing ChatBot Descriptions

In [9]:
completion_model_name = "gpt-3.5-turbo-instruct"

bot = OpenAI(
    model_name=completion_model_name, 
    temperature=0.5, 
    # max_tokens = 100,
)

OpenAI(client=<openai.resources.completions.Completions object at 0x7f96b8ab5eb0>, async_client=<openai.resources.completions.AsyncCompletions object at 0x7f96b2f8af70>, temperature=0.5, openai_api_key='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', openai_proxy='')

In [10]:
bot_description = bot.invoke(prompt_description)
print(bot_description)


This charming 1-bedroom, 3-bathroom home is located in the desirable neighborhood of The Bronx. With a spacious 7200 sqft house size, this property offers plenty of room for comfortable living. The house is in close proximity to popular tourist destinations, making it a great location for those who love to explore. Public transportation is easily accessible, making daily commutes a breeze. The neighborhood is known for its security and is in close proximity to schools, malls, supermarkets, and drugstores. The house also features solar energy implementation, reducing your carbon footprint and saving on energy costs. With ample parking space, a beautiful garden, and a pool, this house is perfect for entertaining. The architectural style is modern and bike-friendly roads make it easy to get around. Don't miss the opportunity to own this gem in The Bronx.


In [11]:
print(len(bot_description.split(" ")))

136


# Generate Random Data

In [12]:
NUM_SAMPLES = 10
description_list = []

for _ in tqdm(range(NUM_SAMPLES)):

    INPUT_NEIGHBORHOOD, INPUT_PRICE, INPUT_SIZE, INPUT_BATHROOMS, INPUT_BEDROOMS  = random_data_generation()

    prompt_description = prompt_template.format_messages(
        neighborhood = INPUT_NEIGHBORHOOD, 
        price = INPUT_PRICE, 
        bedrooms = INPUT_BEDROOMS, 
        bathrooms= INPUT_BATHROOMS, 
        size = INPUT_SIZE,  
        description_topics = ADDITIONAL_DESCRIPTION_TOPICS,
        example = SAMPLE_DESCRIPTION,
    )

    bot_description = bot.invoke(prompt_description)
    description_list.append(bot_description)    


100%|██████████| 30/30 [00:53<00:00,  1.78s/it]


In [13]:

for description in description_list:
    pprint(description)
    description_length = len(description.split(" "))
    pprint(f"Description length: {description_length} words.")
    print("_"*150)



('\n'
 'This stunning home is located in the desirable neighborhood of Queens, known '
 'for its vibrant community and convenient location. With 3 bedrooms and 5 '
 'bathrooms spread across 3100 sqft, this house offers plenty of space for '
 'comfortable living. The neighborhood is safe and secure, making it ideal for '
 'families. Public transportation is easily accessible, making it a breeze to '
 'explore the nearby tourist attractions. Schools, malls, supermarkets, and '
 'drugstores are just a short distance away, providing convenience at your '
 'fingertips. The house also features a spacious backyard, perfect for outdoor '
 'gatherings or gardening. With its modern architectural style and '
 'bike-friendly roads, this house is perfect for those looking for a suburban '
 'oasis.')
'Description length: 112 words.'
______________________________________________________________________________________________________________________________________________________
('\n'
 'Located in

In [14]:
print(description_list[3])


Located in the bustling neighborhood of The Bronx, this charming house offers the perfect blend of suburban living and urban convenience. With 2 bedrooms and 1 bathroom spread over 7100 sqft, this home is ideal for a small family or a couple looking for a cozy space. The house features a modern architectural style and is equipped with solar panels for eco-friendly living. The neighborhood is bike-friendly and offers easy access to public transportation, making it a breeze to explore the nearby tourist attractions. With schools, malls, supermarkets, and drugstores in close proximity, this house offers the perfect balance of convenience and comfort. Don't miss out on the opportunity to make this house your dream home!


# Store Data in Vector Database

In [15]:
# !rm -r chroma

In [16]:
embedding = OpenAIEmbeddings()

PERSIST_CHROMA_DIR = "chroma/"

vector_data = Chroma.from_texts(
    texts = description_list, 
    embedding = embedding, 
    persist_directory = PERSIST_CHROMA_DIR
)


  warn_deprecated(


In [17]:
print(vector_data._collection.count())

30


# Send Queries to the Database

In [21]:
question = "Suggest houses in The Bronx that have a pool."
search_result = vector_data.similarity_search(
    question, 
    k=3
)

In [22]:
print(len(search_result))

3


In [23]:
for result in search_result:
    print(result)
    print("_"*200)

page_content='\nNestled in the vibrant neighborhood of The Bronx, this charming 2-bedroom, 3-bathroom house is a perfect blend of comfort and convenience. With a spacious 3200 sqft, this home offers ample space for a growing family. The suburban location provides a peaceful and secure environment, while still being close to popular tourist spots such as the Bronx Zoo and Yankee Stadium. Public transportation is easily accessible, making daily commutes a breeze. Schools, malls, supermarkets, and drugstores are all within a short distance, making errands a quick and easy task. The house also features a garden and a pool, perfect for outdoor relaxation. With its modern architectural style and bike-friendly roads, this house is a must-see!'
________________________________________________________________________________________________________________________________________________________________________________________________________
page_content="\nThis spacious 3-bedroom, 4-bathroom 

In [24]:
!ls

01_exercise.ipynb     [34mchroma[m[m                data_generation.ipynb


In [25]:
import pandas as pd

data = pd.DataFrame()
data["descriptions"] = description_list
data.head()

Unnamed: 0,descriptions
0,\nThis stunning home is located in the desirab...
1,\nLocated in the desirable neighborhood of Que...
2,"\nThis stunning 2-bedroom, 5-bathroom home is ..."
3,\nLocated in the bustling neighborhood of The ...
4,\nThis spacious and charming home is located i...


In [26]:
data.to_csv("listings.csv")