In [None]:
! source ./setup.sh  # setup environment variables

In [None]:
import os
import huggingface_hub


hf_token = os.environ.get("HF_TOKEN")

# Use the token with the Hugging Face library
huggingface_hub.login(token=hf_token)

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

In [None]:
from langchain_chroma import Chroma

def initialize_retriever(dataset_name, embeddings):
    # Define the path to the saved collection
    persist_directory = os.path.join(PATH_TO_DRIVE, "Data/Vanilla_RAG", dataset_name)

    # Load the vector store from the saved directory
    vector_store = Chroma(
        collection_name=dataset_name,
        embedding_function=embeddings,
        persist_directory=persist_directory
    )

    # Set up the retriever
    retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 7, "fetch_k": 15})
    return retriever

In [None]:
# SELECT DATA SIZE TO INDEX
dataset_size = ["0-10", "0-100", "0-1000", "0-10000"]

# Retrieve and generate using the relevant snippets of the blog.
retriever = initialize_retriever(dataset_size[3], embeddings)

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
llm.invoke("What is the meaning of life?")

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
retriever.invoke("With all the products you have seen, give different categories of the product and how they are related")

In [None]:
rag_chain.invoke("With all the products you have seen, give different categories of the product and how they are related")