In [2]:
import os
import gradio as gr 

from constants import OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] =  OPENAI_API_KEY


from langchain_openai import OpenAI
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_openai.embeddings import OpenAIEmbeddings


completion_model_name = "gpt-3.5-turbo-instruct"


In [15]:
embeddings_generator = OpenAIEmbeddings()

DATABASE_PATH = "chroma/"

vector_database = Chroma(
    persist_directory = DATABASE_PATH, 
    embedding_function = embeddings_generator
)


def generate_suggestions(input_user_query):

    search_result = vector_database.similarity_search(
        input_user_query, 
        k=1
    )
    search_result_string = search_result[0].page_content
    return search_result_string




In [17]:
inference_bot = OpenAI(
    model_name=completion_model_name, 
    temperature=0.0, 
)

In [35]:
template_generate_features_string = """
You are a piece of software that is helping a Housing Chatbot that
is supporting users in New York City.

Your job is to take a query from an user and generate a list of what the user 
requires in the house or neighborhood, call that list user's requirements. 

Here is a housing query asked by the customer, it is delimited by triple backticks:
```{query_user}```

Please be accurate, don't assume anything that wasn't mentioned by the user.
"""



template_comparison_string = """
You are a piece of software that is helping a Real State Chatbot that
is supporting users in New York City.

Your job is to take a look at two things: 
1. A list of user's requirements
2. A house description found in the database

You need to traverse the user's requirements list and assign every 
item to one of the following groups:

1. The similarities list, if the item is present in the house description
found in the database.
2. The not similarities list otherwise.

There is no need to invent new items for the user's requirements list, just assign every 
item to one of the output lists.
If all requirement items get assigned to one of the output lists, then add None to the empty list, 
do not use "House" as a default value for an empty list.



The criteria for assigning items to the lists depends on whether the item has a  
categorical or numerical variables:

- For categorical variables you have to be very specific. For instance, if the house is 
located one boroughs like Manhattan and the user requires it to be in The Bronx then that item 
should go to the not similarities list, boroughs should equally match in order to go to the similarities list.
If user requires the house to be in a safe neighborhood and there is no mention of that in the description, 
then that item should go to the not similarities list.

- For numerical values we have two cases:
    - If the price that the user can pay is equal or lower than the one found in the description, 
    then that item should go to the similarities list, if it's higher it has to go to the not
    similarities list.

    - For variables like the number of bathrooms, bedrooms, or size of the house, it is ok to put in 
    the similarities list if the value found in the database is equal or higher than the user requires it
    If the value found in the database is smaller, then it should go to the not similarities list.



Here is the list of user's requirements, it is delimited by triple backticks:
```{user_list_requirements}```

And here is the output found in the database, it is delimited by triple backticks:
```{output_database}```
"""


features_template = ChatPromptTemplate.from_template(template_generate_features_string)

comparison_template = ChatPromptTemplate.from_template(template_comparison_string)


In [36]:
def generate_items_in_common(input_query, database_suggestion):


    prompt_features = features_template.format_messages(
        query_user = input_query, 
    )

    bot_response_features = inference_bot.invoke(prompt_features)



    prompt_comparison = comparison_template.format_messages(
        user_list_requirements = bot_response_features, 
        output_database = database_suggestion
    )

    bot_response_comparison = inference_bot.invoke(prompt_comparison)
    return bot_response_comparison


In [37]:
with gr.Blocks() as demo:

    with gr.Row():

        with gr.Column(scale=1):
            query_user = gr.Textbox(label="Introduce what you are looking for in a house!")
            suggestion_button = gr.Button("Generate Suggestion!")

        with gr.Column(scale=1):
            output_database = gr.Textbox(label="Suggested output")
            similarities_button = gr.Button("Find Similarities!")

            suggestion_button.click(
                generate_suggestions, 
                inputs=[query_user], 
                outputs=[output_database]
            )


        with gr.Column(scale=1):
            similarities = gr.Textbox(label="Items in common!")
            similarities_button.click(
                generate_items_in_common, 
                inputs=[query_user, output_database], 
                outputs=[similarities]
            )

if __name__ == "__main__":
    demo.launch()

Running on local URL:  http://127.0.0.1:7898

To create a public link, set `share=True` in `launch()`.


In [2]:
DATABASE_DIRECTORY = 'chroma/'
embedding_generator = OpenAIEmbeddings()

vector_database = Chroma(
    persist_directory = DATABASE_DIRECTORY, 
    embedding_function = embedding_generator
)