In [10]:
from langchain_openai import OpenAIEmbeddings

openai_embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    dimensions=1024  # size of the embeddings you want returned.
)

In [11]:
from langchain_huggingface import HuggingFaceEmbeddings

def get_hf_embeddings(model_name):

    embeddings = HuggingFaceEmbeddings(
        model_name=model_name
    )

    return embeddings

mpnet_embeddings = get_hf_embeddings("sentence-transformers/all-mpnet-base-v2")

In [13]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

def get_bge_embeddings(model_name, model_kwargs, encode_kwargs):

    embeddings = HuggingFaceBgeEmbeddings(
        model_name=model_name, 
        model_kwargs=model_kwargs, 
        encode_kwargs=encode_kwargs
    )

    return embeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": True}

bge_embeddings = get_bge_embeddings(model_name, model_kwargs, encode_kwargs)

In [14]:
from langchain.schema import Document
import json

def prepare_qa_documents(file_path):
    with open(file_path, 'r') as f:
        qa_data = json.load(f)
    
    documents = [
        Document(
            page_content=item["answer"],
            metadata={"question": item["question"]}
        )
        for item in qa_data
    ]
    
    return documents

test_documents = prepare_qa_documents("../data/home0001qa.json")

In [15]:
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import FAISS

def basic_retriever(documents, embeddings):

    # vectorstore = Chroma.from_documents(documents, embeddings)
    vectorstore = FAISS.from_documents(documents, embeddings)

    retriever = vectorstore.as_retriever()

    return retriever

In [23]:
test_embeddings = {
    "openai": openai_embeddings,
    "mpnet": mpnet_embeddings,
    "bge": bge_embeddings
}

In [28]:
print(test_embeddings.keys())

for model in test_embeddings:

    vectorstore = FAISS.from_documents(test_documents, test_embeddings[model])
    vectorstore.save_local("./FAISS", model)
    # retriever = vectorstore.as_retriever()


dict_keys(['openai', 'mpnet', 'bge'])


In [29]:
test_retrievers = []

for model in test_embeddings:
    vectorstore = FAISS.load_local(
        folder_path="./FAISS", 
        embeddings=test_embeddings[model], 
        index_name=model, 
        allow_dangerous_deserialization=True
    )
    retriever = vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 4}
    )
    test_retrievers.append(retriever)



In [34]:
test_retrievers[0].invoke("what is home0001")

[Document(metadata={'question': 'Who is behind home0001?'}, page_content='Home0001 is initiated by a multi-disciplinary collective working across art, architecture, technology, and design, and currently based in los angeles, new york, paris, berlin, and london. Designed together with world renowned architects, 0001 homes are fully equipped and furnished and are part of an expanding network.'),
 Document(metadata={'question': 'Can i change the design of my home?'}, page_content="Legally you own your home and are free to do what you want with it. However, to maintain access to home0001's network in other locations, your home does need to meet our standards and our team can support you in making changes where desired."),
 Document(metadata={'question': 'How do i book an 0001 home somewhere else?'}, page_content="Whenever you want to spend time in other home0001 locations, just text us your dates and we'll confirm availability right away. You cover one cleaning fee each time you swap homes

In [31]:
test_retrievers[1].invoke("what is home0001")

[Document(metadata={'question': 'What are the perks of joining the home0001 network?'}, page_content='Home0001 is a distributed housing collective: in addition to community dinners and events, homeowners get access to 0001 homes in other cities for free. No nightly rate; just a cleaning fee each time. Own one home, live in many places. '),
 Document(metadata={'question': 'Are 0001 homes move-in ready?'}, page_content='Developed in collaboration with world-renowned architects, every single thing in an 0001 home is thoughtfully designed with a focus on simplicity and functionality, so homebuyers can literally move in with nothing but their suitcase.'),
 Document(metadata={'question': 'Can i change the design of my home?'}, page_content="Legally you own your home and are free to do what you want with it. However, to maintain access to home0001's network in other locations, your home does need to meet our standards and our team can support you in making changes where desired."),
 Document(

In [33]:
test_retrievers[2].invoke("what is home0001")

[Document(metadata={'question': 'How does the home0001 network function?'}, page_content='Home0001 is a distributed housing collective: in addition to community dinners and events, homeowners get access to 0001 homes in other cities for free. No nightly rate; just a cleaning fee each time. Own one home; live flexibly between multiple locations.'),
 Document(metadata={'question': 'Who founded home0001?'}, page_content='Home0001 is a new form of housing initiated by a collective of architects, artists, technologists, and designers currently based in los angeles, new york, paris, rotterdam, berlin, and london.'),
 Document(metadata={'question': 'What is home0001?'}, page_content='Home0001 is a global housing network. Each 0001 home is fully-equipped and furnished. Move in with just your suitcase. Swap cities whenever you like.'),
 Document(metadata={'question': 'Can i buy a home as a non-us citizen?'}, page_content="The process for buying an 0001 home is the same wherever you’re from. The