In [1]:
import os

from constants import OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] =  OPENAI_API_KEY


from tqdm import tqdm
from pprint import pprint


from langchain_openai import OpenAI
from langchain.vectorstores import Chroma

from langchain.prompts import ChatPromptTemplate
from langchain.embeddings import OpenAIEmbeddings


# Construct Skeleton Prompt Template

In [20]:
template_string = """Generate a real estate description
of a house, it has to be maximum 100 words long and have 
the following characteristics:

Neighborhood: {neighborhood}\n
Price: {price}\n sqft
Bedrooms: {bedrooms}\n
Bathrooms: {bathrooms}\n
House Size: {size}\n


Here are characteristics about the neighborhood:
{neighborhood_details}

And here some house specific details:
{house_specific_details}

It's mandatory to include these exact information in the description.
Try not to start the description with the words "Welcome to".

"""

extracted_text = """
Take a look at the following example to have an idea of
the expected output, the text is delimited by triple backticks:

```{example}```
"""

prompt_template = ChatPromptTemplate.from_template(template_string)

In [21]:
prompt_template.messages[0].input_variables

['bathrooms',
 'bedrooms',
 'house_specific_details',
 'neighborhood',
 'neighborhood_details',
 'price',
 'size']

# Setting Neighborhood Random Details

In [22]:
import random 

neighborhood_additional_details_list = [
        "Secure neighborhood",
        "Close to touristic places", 
        "Nice restaurants in the area", 
        "Good bars in the area", 
        "Fantastic disco bars in the area", 
        "Easiness to get public transportation",
        "Bike-friendly roads",
        "Closeness to schools", 
        "Closeness to work", 
        "Closeness to malls", 
        "Closeness to supermarkets",
        "Closeness to drug stores",  
]

def random_list_sample(input_list):
    len_list = len(input_list)
    num_samples = random.randint(1, len_list)
    return random.sample(input_list, num_samples)

random_list_sample(neighborhood_additional_details_list)



['Fantastic disco bars in the area',
 'Bike-friendly roads',
 'Closeness to drug stores',
 'Easiness to get public transportation',
 'Secure neighborhood',
 'Closeness to work',
 'Closeness to malls']

In [23]:
neighborhood_residential_location_details_list = [
        "Quiet residential location", 
        "Bustling residential location", 
        "Moderately quiet residential location", 
]

choice = random.choice(neighborhood_residential_location_details_list)
print(choice)
print(type(choice))

Bustling residential location
<class 'str'>


In [24]:
neighborhood_details_list = random_list_sample(neighborhood_additional_details_list)
print(neighborhood_details_list)
print(type(neighborhood_details_list))

neighborhood_random_style = random.choice(neighborhood_residential_location_details_list)
print(neighborhood_random_style)
print(type(neighborhood_random_style))

neighborhood_details_list.append(neighborhood_random_style)
print(neighborhood_details_list)


['Closeness to work', 'Closeness to supermarkets', 'Closeness to drug stores', 'Bike-friendly roads', 'Nice restaurants in the area', 'Fantastic disco bars in the area']
<class 'list'>
Quiet residential location
<class 'str'>
['Closeness to work', 'Closeness to supermarkets', 'Closeness to drug stores', 'Bike-friendly roads', 'Nice restaurants in the area', 'Fantastic disco bars in the area', 'Quiet residential location']


In [25]:
neighborhood_details = "\n".join(neighborhood_details_list)
print(neighborhood_details)

Closeness to work
Closeness to supermarkets
Closeness to drug stores
Bike-friendly roads
Nice restaurants in the area
Fantastic disco bars in the area
Quiet residential location


# Setting House Specific Random Details

In [32]:
house_type_details = [
    "Single-family home",
    "Condo", 
    "Townhouse", 
]
random_house_type = random.choice(house_type_details)

house_style_details = [
    "Classic style", 
    "Contemporary style", 
    "Minimalist style"
]
random_house_style = random.choice(house_style_details)

house_additional_details = [
        "Solar energy", 
        "Generous parking",
        "Gardens", 
        "Pool", 
        "Reduce mobility friendly", 
        "Pet friendly", 
        "Requires remodelling", 
]

house_details_list = random_list_sample(house_additional_details)
house_details_list.append(random_house_type)
house_details_list.append(random_house_style)



house_details = "\n".join(house_details_list)
print(house_details)

Pool
Reduce mobility friendly
Solar energy
Generous parking
Townhouse
Contemporary style


# Setting Sample Expected Output

In [33]:
SAMPLE_DESCRIPTION = """INPUT:

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

OUTPUT:

Welcome to this eco-friendly oasis
nestled in the heart of Green Oaks. This charming
3-bedroom, 2-bathroom home boasts energy-efficient
features such as solar panels and a well-insulated
structure. Natural light floods the living spaces,
highlighting the beautiful hardwood floors and
eco-conscious finishes. The open-concept kitchen and
dining area lead to a spacious backyard with a 
vegetable garden, perfect for the eco-conscious family.
Embrace sustainable living without compromising on style 
in this Green Oaks gem.
"""
print(SAMPLE_DESCRIPTION)

INPUT:

Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

OUTPUT:

Welcome to this eco-friendly oasis
nestled in the heart of Green Oaks. This charming
3-bedroom, 2-bathroom home boasts energy-efficient
features such as solar panels and a well-insulated
structure. Natural light floods the living spaces,
highlighting the beautiful hardwood floors and
eco-conscious finishes. The open-concept kitchen and
dining area lead to a spacious backyard with a 
vegetable garden, perfect for the eco-conscious family.
Embrace sustainable living without compromising on style 
in this Green Oaks gem.



# Generate Random Input Data for Template

In [34]:
import random

def random_data_generation():

    boroughs_list = ["Manhattan", "Brooklyn", "Queens", "The Bronx", "Staten Island"]
    price_list = list(range(100_000, 1_000_000, 50_000))
    size_list = list(range(800, 10_000, 100))

    neighborhood = random.choice(boroughs_list)
    price = random.choice(price_list)
    size = random.choice(size_list)
    bathrooms = random.randint(1, 5)
    bedrooms = random.randint(1, 5)

    return neighborhood, price, size, bathrooms, bedrooms

In [35]:
for _ in range(10):
    print(random_data_generation())

('Brooklyn', 150000, 5200, 4, 4)
('Manhattan', 950000, 6400, 4, 1)
('Manhattan', 900000, 2700, 2, 5)
('The Bronx', 950000, 7400, 1, 3)
('Staten Island', 700000, 4100, 4, 1)
('Staten Island', 800000, 4200, 5, 5)
('Manhattan', 800000, 2000, 5, 4)
('The Bronx', 500000, 1300, 5, 5)
('Staten Island', 200000, 5900, 4, 5)
('Brooklyn', 400000, 7200, 1, 3)


In [40]:
def random_neighborhood_data_generation():
    neighborhood_details_list = random_list_sample(neighborhood_additional_details_list)
    neighborhood_random_style = random.choice(neighborhood_residential_location_details_list)

    neighborhood_details_list.append(neighborhood_random_style)

    neighborhood_details = "\n".join(neighborhood_details_list)

    return neighborhood_details


print(random_neighborhood_data_generation())

Secure neighborhood
Bike-friendly roads
Closeness to supermarkets
Fantastic disco bars in the area
Closeness to schools
Bustling residential location


In [44]:
def random_house_details_generation():
    random_house_type = random.choice(house_type_details)


    random_house_style = random.choice(house_style_details)


    house_details_list = random_list_sample(house_additional_details)
    house_details_list.append(random_house_type)
    house_details_list.append(random_house_style)
    house_details = "\n".join(house_details_list)
    return house_details

print(random_house_details_generation())

Solar energy
Pool
Generous parking
Single-family home
Contemporary style


# Testing Template

In [49]:


NEIGHBORHOOD_DETAILS = random_neighborhood_data_generation()
HOUSE_DETAILS = random_house_details_generation()

INPUT_NEIGHBORHOOD, INPUT_PRICE, INPUT_SIZE, INPUT_BATHROOMS, INPUT_BEDROOMS  = random_data_generation()

prompt_description = prompt_template.format_messages(
    neighborhood = INPUT_NEIGHBORHOOD, 
    price = INPUT_PRICE, 
    bedrooms = INPUT_BEDROOMS, 
    bathrooms= INPUT_BATHROOMS, 
    size = INPUT_SIZE,  
    neighborhood_details = NEIGHBORHOOD_DETAILS,
    house_specific_details = HOUSE_DETAILS,

    example = SAMPLE_DESCRIPTION,
)

print(prompt_description)

 

[HumanMessage(content='Generate a real estate description\nof a house, it has to be maximum 100 words long and have \nthe following characteristics:\n\nNeighborhood: Brooklyn\n\nPrice: 500000\n sqft\nBedrooms: 4\n\nBathrooms: 5\n\nHouse Size: 1900\n\n\n\nHere are characteristics about the neighborhood:\nCloseness to schools\nSecure neighborhood\nCloseness to drug stores\nFantastic disco bars in the area\nCloseness to work\nBike-friendly roads\nClose to touristic places\nNice restaurants in the area\nCloseness to supermarkets\nCloseness to malls\nGood bars in the area\nBustling residential location\n\nAnd here some house specific details:\nPool\nSolar energy\nPet friendly\nGenerous parking\nGardens\nReduce mobility friendly\nTownhouse\nContemporary style\n\nIt\'s mandatory to include these exact information in the description.\nTry not to start the description with the words "Welcome to".\n\n')]


# Testing ChatBot Descriptions

In [46]:
completion_model_name = "gpt-3.5-turbo-instruct"

bot = OpenAI(
    model_name=completion_model_name, 
    temperature=0.5, 
    # max_tokens = 100,
)

In [47]:
bot_description = bot.invoke(prompt_description)
print(bot_description)


Located in the heart of The Bronx, this contemporary style house offers a unique opportunity for those seeking a vibrant and convenient lifestyle. With a spacious 5800 sqft house size, this 1 bedroom, 4 bathroom home is perfect for those looking for a low-maintenance living space. The neighborhood boasts fantastic disco bars, nice restaurants, and close proximity to supermarkets, making it an ideal location for entertainment and daily necessities. The house is also reduce mobility friendly and equipped with solar energy, providing both comfort and sustainability. Enjoy the pool and gardens, perfect for relaxation and hosting guests. This condo also offers generous parking and the potential for remodelling to make it your dream home. Don't miss out on this amazing opportunity in a moderately quiet residential location. 


In [48]:
print(len(bot_description.split(" ")))

128


# Generate Random Data

In [50]:
NUM_SAMPLES = 40
description_list = []

for _ in tqdm(range(NUM_SAMPLES)):

    INPUT_NEIGHBORHOOD, INPUT_PRICE, INPUT_SIZE, INPUT_BATHROOMS, INPUT_BEDROOMS  = random_data_generation()

    prompt_description = prompt_template.format_messages(
        neighborhood = INPUT_NEIGHBORHOOD, 
        price = INPUT_PRICE, 
        bedrooms = INPUT_BEDROOMS, 
        bathrooms= INPUT_BATHROOMS, 
        size = INPUT_SIZE,  
        neighborhood_details = NEIGHBORHOOD_DETAILS,
        house_specific_details = HOUSE_DETAILS,
    )

    bot_description = bot.invoke(prompt_description)
    description_list.append(bot_description)    


  0%|          | 0/40 [00:00<?, ?it/s]

100%|██████████| 40/40 [01:12<00:00,  1.81s/it]


In [51]:

for description in description_list:
    pprint(description)
    description_length = len(description.split(" "))
    pprint(f"Description length: {description_length} words.")
    print("_"*150)



('\n'
 'This stunning contemporary townhouse in the bustling residential location of '
 'The Bronx is the perfect place to call home. Boasting 1 bedroom and 5 '
 'bathrooms, this spacious 5600 sqft house offers ample living space for you '
 'and your family. The neighborhood is highly sought after for its closeness '
 'to schools, work, and a variety of amenities including drug stores, '
 "supermarkets, and malls. Plus, you'll love the fantastic disco bars, nice "
 'restaurants, and good bars just steps away from your front door. Enjoy the '
 'convenience of being in a secure and bike-friendly neighborhood, with '
 'generous parking and gardens. And with solar energy and reduced mobility '
 "features, this pet-friendly home has it all. Don't miss out on the "
 'opportunity to own this gem for just $700,000!')
'Description length: 124 words.'
__________________________________________________________________________________________________________________________________________________

In [52]:
print(description_list[3])


Welcome to this charming contemporary townhouse located in the bustling residential neighborhood of Queens. Perfect for a single person or couple, this 1 bedroom, 2 bathroom house boasts a spacious 3600 sqft of living space. The secure neighborhood offers peace of mind while the closeness to schools, drug stores, and supermarkets provides convenience. Enjoy the fantastic disco bars, nice restaurants, and good bars in the area for entertainment. This pet-friendly house also features a pool, solar energy, and generous parking. The bike-friendly roads and reduce mobility accessibility make it easy to get around. Don't miss the chance to own a piece of this vibrant and desirable neighborhood for just $400,000.


# Store Data in Vector Database

In [15]:
# !rm -r chroma

In [53]:
embedding = OpenAIEmbeddings()

PERSIST_CHROMA_DIR = "chroma/"

vector_data = Chroma.from_texts(
    texts = description_list, 
    embedding = embedding, 
    persist_directory = PERSIST_CHROMA_DIR
)


  warn_deprecated(


In [54]:
print(vector_data._collection.count())

40


In [55]:
!ls

01_exercise.ipynb                     [34mflagged[m[m
[34m__pycache__[m[m                           gradio_exercises.ipynb
chatgpt_descriptions.csv              personalized_listing_generation.ipynb
[34mchroma[m[m                                real_state_chatbot.ipynb
constants.py                          test_gradio.ipynb
data_generation.ipynb


# Send Queries to the Database

In [56]:
question = "Suggest houses in The Bronx that have a pool."
search_result = vector_data.similarity_search(
    question, 
    k=3
)

In [57]:
print(len(search_result))

3


In [23]:
for result in search_result:
    print(result)
    print("_"*200)

page_content='\nNestled in the vibrant neighborhood of The Bronx, this charming 2-bedroom, 3-bathroom house is a perfect blend of comfort and convenience. With a spacious 3200 sqft, this home offers ample space for a growing family. The suburban location provides a peaceful and secure environment, while still being close to popular tourist spots such as the Bronx Zoo and Yankee Stadium. Public transportation is easily accessible, making daily commutes a breeze. Schools, malls, supermarkets, and drugstores are all within a short distance, making errands a quick and easy task. The house also features a garden and a pool, perfect for outdoor relaxation. With its modern architectural style and bike-friendly roads, this house is a must-see!'
________________________________________________________________________________________________________________________________________________________________________________________________________
page_content="\nThis spacious 3-bedroom, 4-bathroom 

In [59]:
import pandas as pd

data = pd.DataFrame()
data["descriptions"] = description_list
data.head()

Unnamed: 0,descriptions
0,\nThis stunning contemporary townhouse in the ...
1,\nExperience the best of Brooklyn living in th...
2,\nNestled in the bustling residential location...
3,\nWelcome to this charming contemporary townho...
4,Introducing this stunning contemporary townhou...


In [60]:
data.to_csv("listings.csv")

# Generating Items in Common with the Search Result

In [161]:
inference_bot = OpenAI(
    model_name=completion_model_name, 
    temperature=0.0, 
)

In [162]:
template_generate_features_string = """
You are a piece of software that is helping a Housing Chatbot that
is supporting users in New York City.

Your job is to take a query from an user and generate a list of what the user 
requires in the house or neighborhood, call that list user's requirements. 

Here is a housing query asked by the customer, it is delimited by triple backticks:
```{query_user}```

Please be accurate, don't assume anything that wasn't mentioned by the user.
"""


features_template = ChatPromptTemplate.from_template(template_generate_features_string)
print(f"Input variables: {features_template.messages[0].input_variables}")

# input_query_user = "I'm looking for a house in Manhattan with a pool."
input_query_user = "A house in The Bronx with a big garden, 3 bedrooms and two bathrooms, located in a safe neighborhood"

print(f"Input Query User: {input_query_user}")

prompt_features = features_template.format_messages(
    query_user = input_query_user, 
)

bot_response_features = inference_bot.invoke(prompt_features)
print(bot_response_features)

Input variables: ['query_user']
Input Query User: A house in The Bronx with a big garden, 3 bedrooms and two bathrooms, located in a safe neighborhood

User's Requirements:
- House located in The Bronx
- Big garden
- 3 bedrooms
- 2 bathrooms
- Safe neighborhood


In [180]:

template_comparison_string = """
You are a piece of software that is helping a Real State Chatbot that
is supporting users in New York City.

Your job is to take a look at two things: 
1. A list of user's requirements
2. A house description found in the database

Using the requirements list you have to create two additional lists, 
1. The first list is for the user's requirements that were found 
in the database search, the similarities list.
2. A second list is for the user's requirements that were not found
in the database search, the not similarities list.

There is no need to invent new items, just splitting the list in 2. 

The criteria for selecting items depends whether they are categorical or numerical variables:

- For categorical variables you have to be very specific. For instance, if the house is 
located one boroughs like Manhattan and the user requires it to be in The Bronx then that item 
should go to the not similarities list, boroughs should equally match in order to go to the similarities list.
If user requires the house to be in a safe neighborhood and there is no mention of that in the description, 
then that item should go to the not similarities list.

- For numerical values we have two cases:
    - If the price that the user can pay is equal or lower than the one found in the description, 
    then that item should go to the similarities list, if it's higher it has to go to the not
    similarities list.

    - For variables like the number of bathrooms, bedrooms, or size of the house, it is ok to put in 
    the similarities list if the value found in the database is equal or higher than the user requires it
    If the value found in the database is smaller, then it should go to the not similarities list.



Here is the list of user's requirements, it is delimited by triple backticks:
```{user_list_requirements}```

And here is the output found in the database, it is delimited by triple backticks:
```{output_database}```


"""


comparison_template = ChatPromptTemplate.from_template(template_comparison_string)
print(f"Input variables: {comparison_template.messages[0].input_variables}")
print("*"*100)

sample_output_database = data["descriptions"].iloc[-1]
print(f"Database Output: {sample_output_database}")
print("_"*100)

print(f"Input User's Requirements: {bot_response_features}")
print("_"*100)

prompt_comparison = comparison_template.format_messages(
    user_list_requirements = bot_response_features, 
    output_database = sample_output_database
)

print("*"*100)
bot_response_comparison = inference_bot.invoke(prompt_comparison)
print(bot_response_comparison)




Input variables: ['output_database', 'user_list_requirements']
****************************************************************************************************
Database Output: 
Located in the bustling residential neighborhood of The Bronx, this contemporary townhouse offers the perfect blend of convenience and luxury living. Boasting a spacious 1900 sqft house size, this one-bedroom home is perfect for those seeking a low-maintenance lifestyle. The neighborhood offers a secure and bike-friendly environment, with easy access to schools, drug stores, and supermarkets. Enjoy the convenience of solar energy and generous parking, along with the added bonus of a pool and gardens for outdoor relaxation. With its close proximity to work, touristic places, and fantastic disco bars, this home is ideal for those seeking a vibrant and active lifestyle. Don't miss out on this pet-friendly gem in the heart of The Bronx.
___________________________________________________________________________

In [None]:
from langchain.chains import SimpleSequentialChain
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI




chain_one = LLMChain(llm=inference_bot, prompt=prompt_features)
chain_two = LLMChain(llm=inference_bot, prompt=comparison_template)
overall_simple_chain = SimpleSequentialChain(
    chains=[
        chain_one, 
        chain_two
    ],
    verbose=True
)