In [19]:
import json
import os
from dotenv import load_dotenv

load_dotenv()

True

In [1]:
import pandas as pd

def dedup_csv_content(file_name: str, unique_key_columns: list[str]):
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file_name)

    # Remove duplicate records based on unique key identifiers
    df.drop_duplicates(subset=unique_key_columns, inplace=True)

    # Save the unique records to a new CSV file
    df.to_csv("unique_records.csv", index=False)


def remove_csv_columns(file_name: str, columns_to_remove: list[str]):
    # Load the CSV file into a DataFrame
    df = pd.read_csv(file_name)

    # Remove the specified columns
    df.drop(columns=columns_to_remove, inplace=True)

    # Save the modified DataFrame to a new CSV file
    df.to_csv("modified_records.csv", index=False)



remove_csv_columns("Toronto_apartment_rentals_2018.csv", ["Den", "Lat", "Long"])
dedup_csv_content("modified_records.csv", ["Bedroom", "Bathroom", "Address"])


In [20]:
import os
import pandas as pd
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Qdrant


collection_name = "apartment_collection"
QDRANT_URL = os.environ['QDRANT_URL']
QDRANT_API_KEY = os.environ['QDRANT_API_KEY']


# Read from the unique_records.csv file, then call the create_document function to create a document for each row
def read_csv_and_create_documents():
    df = pd.read_csv("unique_records.csv")

    # store the created documents in a list
    documents = []
    for index, row in df.iterrows():
        document = create_document(
            bedroom=row["Bedroom"],
            bathroom=row["Bathroom"],
            address=row["Address"],
            price=row["Price"]
        )
        documents.append(document)
    return documents


def create_document(bedroom: int, bathroom: float, address: str, price: str):
    template = """This apartment has {bedroom} bedrooms and {bathroom} bathrooms. The address of the apartment is "{address}" and the monthly rent is {price}."""
    final_text = template.format(bedroom=bedroom, bathroom=float_to_str(bathroom), address=address, price=price)
    return Document(
        page_content=final_text,
        metadata={
            "bedroom": bedroom,
            "bathroom": float_to_str(bathroom),
            "price": price_float_value(price)
        }
    )


def float_to_str(float_number):
    if int(float_number) == float_number:
        return str(int(float_number))
    else:
        return str(float_number)


def price_float_value(string_value):
    cleaned_string = string_value.replace("$", "").replace(",", "")
    return float(cleaned_string)


def insert_data_to_vector_store(docs, collection_name: str, embeddings: Embeddings):
    """
    This method inserts the documents into the Qdrant vector store.
    :param docs:
    :param collection_name:
    :param embeddings:
    :return:
    """
    Qdrant.from_documents(
        docs,
        embeddings,
        url=QDRANT_URL,
        prefer_grpc=True,
        api_key=QDRANT_API_KEY,
        collection_name=collection_name,
    )
    print("Data inserted successfully")


rent_apartment_docs = read_csv_and_create_documents()
insert_data_to_vector_store(rent_apartment_docs, collection_name, OpenAIEmbeddings(openai_api_key="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"))


Data inserted successfully


In [21]:
import os

from qdrant_client import models
import qdrant_client
from qdrant_client import QdrantClient

QDRANT_URL = os.environ['QDRANT_URL']
QDRANT_API_KEY = os.environ['QDRANT_API_KEY']

def get_qdrant_client() -> QdrantClient:
    """
    This method returns the Qdrant client object.
    """
    return qdrant_client.QdrantClient(
        url=QDRANT_URL,
        prefer_grpc=True,
        api_key=QDRANT_API_KEY)


client = get_qdrant_client()


def must_clause_example_1():
    must_filter = models.Filter(
        must=[
            models.FieldCondition(
                key="metadata.bedroom",
                match=models.MatchValue(value=2),
            ),
            models.FieldCondition(
                key="metadata.bathroom",
                match=models.MatchValue(value="1.5"),
            )
        ]
    )

    retrieved_docs = client.scroll(
        collection_name="apartment_collection",
        scroll_filter=must_filter
    )

    print(retrieved_docs)


must_clause_example_1()


([Record(id='06250478-95c5-4717-9857-294160b146d0', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 3200.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is "12 York St, Toronto, ON M5J 2Z2, Canada" and the monthly rent is $3,200.00.'}, vector=None, shard_key=None), Record(id='08143049-db64-44c7-882f-c5e8c1c83d5a', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 1300.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is ", NA M3N 1J2 ON, Canada" and the monthly rent is $1,300.00.'}, vector=None, shard_key=None), Record(id='1915206b-4ce7-4626-a034-25774d11c890', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 1250.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is "63 Drew St, Oshawa, ON L1H 5A2, Canada" and the monthly rent is $1,250.00.'}, vector=None, shard_key=None), Record(id='1a3706da-40

In [22]:
def must_clause_example_2():
    must_filter = models.Filter(
        must=[
            models.FieldCondition(
                key="metadata.bedroom",
                match=models.MatchValue(value=2),
            ),
            models.FieldCondition(
                key="metadata.bathroom",
                match=models.MatchValue(value="1.5"),
            ),
            models.FieldCondition(
                key="metadata.price",
                range=models.Range(
                    lt=None,
                    lte=1300,
                )
            )
        ]
    )

    retrieved_docs = client.scroll(
        collection_name="apartment_collection",
        scroll_filter=must_filter
    )

    print(retrieved_docs)


must_clause_example_2()


([Record(id='08143049-db64-44c7-882f-c5e8c1c83d5a', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 1300.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is ", NA M3N 1J2 ON, Canada" and the monthly rent is $1,300.00.'}, vector=None, shard_key=None), Record(id='1915206b-4ce7-4626-a034-25774d11c890', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 1250.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is "63 Drew St, Oshawa, ON L1H 5A2, Canada" and the monthly rent is $1,250.00.'}, vector=None, shard_key=None), Record(id='1a3706da-40ab-44bf-aa49-360b2608a8d5', payload={'metadata': {'bedroom': 2, 'bathroom': '1.5', 'price': 1250.0}, 'page_content': 'This apartment has 2 bedrooms and 1.5 bathrooms. The address of the apartment is "63 Drew St, Oshawa, ON L1H 5A2, Canada" and the monthly rent is $1,250.00.'}, vector=None, shard_key=None), Record(id='2d39fd13-f80

In [23]:
from langchain.chains.query_constructor.base import AttributeInfo

document_content_description = "Detailed description of the rented apartment"

attribute_info = [
    AttributeInfo(
        name="bedroom",
        description="Number of bedrooms in the rented apartment.",
        type="integer",
    ),
    AttributeInfo(
        name="bathroom",
        description="Number of bathrooms in the rented apartment.",
        type="string",
    ),
    AttributeInfo(
        name="price", description="The monthly rent of the apartment", type="float"
    ),
]


In [24]:
os.environ['OPENAI_API_KEY']

'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'

In [25]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True,openai_api_key=os.environ['OPENAI_API_KEY'])

In [26]:
from langchain.chains.query_constructor.base import (
    get_query_constructor_prompt, StructuredQueryOutputParser
)
from langchain_openai import ChatOpenAI

def get_query_constructor():
    prompt = get_query_constructor_prompt(document_content_description, attribute_info)
    output_parser = StructuredQueryOutputParser.from_components()
    query_constructor = prompt | llm | output_parser
    return query_constructor


In [27]:
user_query = "Find me a 2 bedroom and 1 bathroom apartment at Dufferin Street"
query_constructor = get_query_constructor()
structured_query = query_constructor.invoke({"query": user_query})
print(structured_query)


query='Dufferin Street' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bedroom', value=2), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bathroom', value='1')]) limit=None


In [28]:
from langchain.chains.query_constructor.base import (
    get_query_constructor_prompt, StructuredQueryOutputParser
)
from langchain_openai import ChatOpenAI


def get_query_constructor():
    input_output_pairs = [
        (
            "Find me a 2 bedroom and 1 bathroom apartment at Yonge St",
            {
                "query": "Yonge St",
                "filter": "and(eq(\"bedroom\", 2), eq(\"bathroom\", \"1\"))"
            }
        )
    ]
    prompt = get_query_constructor_prompt(document_content_description, attribute_info, examples = input_output_pairs)

    output_parser = StructuredQueryOutputParser.from_components()
    query_constructor = prompt | llm | output_parser
    return query_constructor


user_query = "Find me a 2 bedroom and 1 bathroom apartment at Dufferin Street"
query_constructor = get_query_constructor()
structured_query = query_constructor.invoke({"query": user_query})
print(structured_query)


query='Dufferin Street' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bedroom', value=2), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bathroom', value='1')]) limit=None


In [29]:
from langchain.chains.query_constructor.base import AttributeInfo

document_content_description = "Detailed description of the rented apartment"

attribute_info = [
    AttributeInfo(
        name="bedroom",
        description="Number of bedrooms in the rented apartment.",
        type="integer",
    ),
    AttributeInfo(
        name="bathroom",
        description="Number of bathrooms in the rented apartment.",
        type="string",
    ),
    AttributeInfo(
        name="price", description="The monthly rent of the apartment", type="float"
    ),
]


In [30]:
from langchain.chains.query_constructor.base import (
    get_query_constructor_prompt, StructuredQueryOutputParser
)
from langchain_openai import ChatOpenAI

def get_query_constructor():
    prompt = get_query_constructor_prompt(document_content_description, attribute_info)
    output_parser = StructuredQueryOutputParser.from_components()
    query_constructor = prompt | llm | output_parser
    return query_constructor


In [31]:
user_query = "Find me a 1 bedroom and 1 bathroom apartment at Grand Park Drive"
query_constructor = get_query_constructor()
structured_query = query_constructor.invoke({"query": user_query})
print(structured_query)


query='Grand Park Drive' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bedroom', value=1), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='bathroom', value=1)]) limit=None
