In [None]:
import os
from langchain.embeddings import HuggingFaceEmbeddings  # Correct class from HuggingFace
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core import Settings, VectorStoreIndex, PromptTemplate, SimpleDirectoryReader
from llama_index.llms.ollama import Ollama  # Assuming you corrected the import here
from llama_index.core.retrievers  import VectorIndexRetriever

from llama_index.core.postprocessor import LLMRerank
from IPython.display import Markdown, display
from IPython.display import HTML
import pandas as pd



# Loading documents from directory
docs = SimpleDirectoryReader("/mnt/c/Users/edeep/RAG/RAG_Codebase/project3_se-final_with_openai/project3_se-final_with_openai/", recursive=True).load_data()

# Load the embedding model
def load_embedding_model(model_name="sentence-transformers/all-mpnet-base-v2", device="cuda"):
    model_kwargs = {"device": device}
    encode_kwargs = {"normalize_embeddings": True}
    return HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

# Initializing components
lc_embedding_model = load_embedding_model()
embed_model = LangchainEmbedding(lc_embedding_model)
Settings.embed_model = embed_model

# Assuming local_directory and extensions are part of some other operations not shown here
local_directory = '/mnt/c/Users/edeep/RAG/RAG_Codebase'
extensions = [".py", ".ipynb", ".js", ".ts", ".md"]

# Indexing documents
index = VectorStoreIndex.from_documents(docs)

# Setting up LLM and querying capabilities
llm = Ollama(model="mistral", request_timeout=60.0)
Settings.llm = llm
query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

# Template for queries
qa_prompt_tmpl_str = (
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information above, I want you to think step by step to answer the query in a crisp manner, in case you don't know the answer say 'I don't know!'.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})




ReRanking

In [None]:

query_engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[
        LLMRerank(
            choice_batch_size=5,
            top_n=2,
        )
    ],
    response_mode="tree_summarize",
)
response = query_engine.query(
    "What does vendor service do?",
)
print(response)

In [None]:
vector_top_k = 10
reranker_top_n = 2

query_str = "What does product service do?"

# configure retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=vector_top_k,
)
retrieved_nodes = retriever.retrieve(query_str)
# # configure reranker
# reranker = LLMRerank(choice_batch_size=5, top_n=reranker_top_n , service_context=None)
# retrieved_nodes = reranker.postprocess_nodes(retrieved_nodes, query_bundle)

In [None]:
print(len(retrieved_nodes))

In [None]:

pd.set_option("display.max_colwidth", 10)
def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))


visualize_retrieved_nodes(retrieved_nodes)

In [None]:
from llama_index.core.schema import QueryBundle
query_str = "What does product service do?"
query_bundle = QueryBundle(query_str, None)

reranker = LLMRerank(choice_batch_size=5, top_n=reranker_top_n , service_context=None)
retrieved_nodes = reranker.postprocess_nodes(retrieved_nodes, query_bundle)

In [None]:
visualize_retrieved_nodes(retrieved_nodes)