# Advanced Free Rag

In [6]:
from dotenv import load_dotenv, find_dotenv
import os
load_dotenv(find_dotenv())

True

## Define the functions

In [9]:
from langchain_community.chat_models.openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import (
    DocumentCompressorPipeline,
    EmbeddingsFilter,
    LLMChainFilter,
)
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import (
    EmbeddingsRedundantFilter,
    LongContextReorder,
)
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_experimental.text_splitter import SemanticChunker
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain import hub
from typing import List

# Remember you have to use the same embedding your database was built with

llm_model = "gpt-3.5-turbo"
embed_model = (
    "text-embedding-3-large"  # This embed model returns a vector of 3056 dimensions
)

def get_llm(model: str = llm_model):
    return ChatOpenAI(name=model, temperature=0)


def get_embedding(model: str = embed_model):
    embedding_function = OpenAIEmbeddings(model=model, max_retries=3)
    return embedding_function


def semantic_chunk_documents(documents: list[Document], threshold: int = 60):
    text_splitter = SemanticChunker(
        embeddings=get_embedding(),
        breakpoint_threshold_type="percentile",  # "percentile" "standard_deviation", "interquartile",
        breakpoint_threshold_amount=threshold,
    )
    documents = text_splitter.split_documents(documents)
    return documents


def delete_collection(collection="Rdi_vectorDb"):
    db = Chroma(
        collection_name=collection,
        embedding_function=get_embedding(model=embed_model),
        persist_directory="./temp/Databases",
    )
    return db.delete_collection()


def add_to_chroma(
    chunks: List[str], metadatas: List[dict], ids: List[str], collection="Rdi_vectorDb"
) -> List[str]:
    db = Chroma(
        collection_name=collection,
        embedding_function=get_embedding(model=embed_model),
        persist_directory="./temp/Databases",
    )
    return db.add_texts(chunks, metadatas, ids)


def get_db(collection="Rdi_vectorDb"):
    return Chroma(
        persist_directory="./temp/Databases",
        collection_name=collection,
        embedding_function=get_embedding(model=embed_model),
    )


def get_retriever(vectorstore, local_llm):
    import logging
    logging.basicConfig()
    logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)
    simple_retriever = vectorstore.as_retriever(
        search_function="similarity",  # Can be "similarity" (default), "mmr", or "similarity_score_threshold"
        search_kwargs={"k": 5},
    )  # k=5 returns the 5 most relevant chunks in the vector database
    multiquery_retriever = MultiQueryRetriever.from_llm(
        retriever=simple_retriever, llm=local_llm
    )

    # Split the chunks even more
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=300, chunk_overlap=0, separators=[". "]
    )

    # Remove the redundant documents by using an EmbeddingsRedundantFilter
    redundant_filter = EmbeddingsRedundantFilter(
        embeddings=get_embedding(model=embed_model)
    )

    # The EmbeddingsFilter # provides a cheaper and faster option by embedding the documents and
    # return only the ones with sufficiently similar embeddings to the query.
    relevant_filter = EmbeddingsFilter(
        embeddings=get_embedding(model=embed_model), similarity_threshold=0.2
    )

    reorder_filter = LongContextReorder()

    llm_filter = LLMChainFilter.from_llm(
        local_llm
    )  # THIS ONE USES LLM to check similarity, it will slow the process

    pipeline_compressor = DocumentCompressorPipeline(
        transformers=[
            splitter,
            redundant_filter,
            relevant_filter,
            reorder_filter,
            llm_filter,
        ]
    )

    compression_retriever = ContextualCompressionRetriever(
        base_compressor=pipeline_compressor, base_retriever=multiquery_retriever
    )

    return compression_retriever


def rag(question: str, collection="Rdi_vectorDb", stream: bool = False):
    local_llm = get_llm(model=llm_model)
    vectorstore = get_db(collection=collection)
    prompt = hub.pull("rlm/rag-prompt") #https://smith.langchain.com/hub/rlm/rag-prompt

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    chain = (
        RunnableParallel(
            {
                "context": get_retriever(vectorstore, local_llm) | format_docs,
                "question": RunnablePassthrough(),
            }
        )
        | prompt
        | local_llm
        | StrOutputParser()
    )

    if not stream:
        print(chain.invoke(question))
    else:
        for result in chain.stream(question):
            print(result, end="", flush=True)

ModuleNotFoundError: No module named 'pydantic.v1'

In [None]:
# What is Hub pull ?
# #https://smith.langchain.com/hub
prompt = hub.pull("rlm/rag-prompt")
from rich import print

print(prompt)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from typing import cast
prompt = cast(ChatPromptTemplate, prompt)
print(prompt.partial(question="What is RDI?"))

## Load the Data

In [None]:
#Load your data
delete_collection(collection="Rdi_vectorDb")

#HTML
from langchain_community.document_loaders import WebBaseLoader
from rich import print

loader_HTML = WebBaseLoader(web_paths=["https://www.rdisoftware.com/"])
docsWeb = loader_HTML.load() #load
print(docsWeb)
#Expected execution time: 1s

  warn_deprecated(


## Chunk the Data into smaller semantic similar pieces

In [None]:
#Add to the vector database
chunks = semantic_chunk_documents(docsWeb)
print("total chunks: ", len(chunks))
print(chunks)

## Add the Chunks to the Vector Database

In [None]:
import hashlib
texts = [doc.page_content for doc in chunks]
metadatas = [doc.metadata for doc in chunks]
ids = [hashlib.sha256(text.encode()).hexdigest() for text in texts]

delete_collection(collection="Rdi_vectorDb")
added_ids = add_to_chroma(texts, metadatas, ids, collection="Rdi_vectorDb")
print("total added ids: ", len(added_ids))
print(added_ids)

In [None]:
# Do not join multiple questions in one example!!! 
# How to define the semantic similarity between questions?
rag(question="""
             1. What is RDI Software specialized in?
             2. When was RDI bought by Capgemini?
             3. Are there new openings ?
             4. What are the benefits of working in RDI?
             5. Who is Sarolta Mezei and what she said about RDI ?
             """)

#Expected execution time: 8min on local Ollama
#Expected execution time with OpenAI: 21.7s

  warn_deprecated(


In [None]:
questions = [
     "What is RDI Software specialized in ?",
     "When was RDI bought by Capgemini?",
     "Are there new openings ?",
     "What are the benefits of working in RDI?",
     "Who is Sarolta Mezei and what she said about RDI ?"
]

answers = []
for i,question in enumerate(questions):
    rag(question)
    
#Expected execution time: 1min 20s











In [None]:
rag("Who works as a Software Quality Assurance Analyst at RDI?", stream=True)
#Expected execution time: 15s

