In [2]:
# Install required packages for the RAG system
%pip install -q langchain-ollama langchain langchain-community faiss-cpu langchain_huggingface rank_bm25 gradio nest_asyncio markdown2 ipywidgets langchain_openai

Note: you may need to restart the kernel to use updated packages.


## Chunks and Embedding loader 

In [3]:
import pickle
import os
from typing import List
from langchain.schema import Document


def load_chunks_from_disk(chunks_path: str) -> List[Document]:
    """
    Load document chunks from disk.
    
    Args:
        chunks_path: Path to the saved chunks file
        
    Returns:
        List of document chunks
    """
    print(f"\nLoading chunks from {chunks_path}...")
    
    # Check if file exists
    if not os.path.exists(chunks_path):
        raise FileNotFoundError(f"Chunks file not found at {chunks_path}")
    
    # Load the chunks from disk
    with open(chunks_path, "rb") as f:
        chunks = pickle.load(f)
    
    print(f"Loaded {len(chunks)} chunks from disk")
    return chunks

In [35]:
# Initialize embedding model and load vector store
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load pre-existing vector store
loaded_faiss_store = FAISS.load_local(
    "/workspaces/RAG_BOT/LocalEmbeddings/Chatgpt_Enriched_Policy_Only_Embedding",
    embedding_model,
    allow_dangerous_deserialization=True
)
print("FAISS vector store loaded successfully.")

# Load the chunks also
chunks = load_chunks_from_disk("/workspaces/RAG_BOT/LocalChunks/document_chunks_20250715_020245.pkl")


FAISS vector store loaded successfully.

Loading chunks from /workspaces/RAG_BOT/LocalChunks/document_chunks_20250715_020245.pkl...
Loaded 58 chunks from disk


## LLM Configuration

In [5]:
# Initialize the LLM with rate limiting
from langchain_ollama import ChatOllama
from langchain_core.rate_limiters import InMemoryRateLimiter
from langchain_openai import ChatOpenAI
from getpass import getpass

# Configure rate limiting to prevent overloading the model
rate_limiter = InMemoryRateLimiter(
    requests_per_second=0.1,
    check_every_n_seconds=0.1,
    max_bucket_size=10,
)

openai_api_key = getpass("Enter your OpenAI API key: ")

# Initialize the LLM
llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    openai_api_key = openai_api_key,
    temperature=0.1,
    rate_limiter=rate_limiter
)


# Retrival Method-1  ensemble

In [36]:
# Set up retrievers
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

# Extract documents from the docstore
try:
    all_docs = [loaded_faiss_store.docstore._dict[doc_id] for doc_id in loaded_faiss_store.index_to_docstore_id.values()]
except AttributeError:
    # Fallback for different docstore structure
    all_docs = [loaded_faiss_store.docstore.get(doc_id) for doc_id in loaded_faiss_store.index_to_docstore_id.values()]

# Configure different retrieval methods
bm25_retriever = BM25Retriever.from_documents(all_docs)
bm25_retriever.k = 2

# Vector similarity retrieval with MMR
basic_retriever = loaded_faiss_store.as_retriever(search_type="mmr", search_kwargs={"k": 2})

# Vector similarity with score threshold
sst_retriever = loaded_faiss_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.3, "k": 2}
)

# Create ensemble retriever combining multiple retrieval methods
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, basic_retriever, sst_retriever],
    weights=[0.4, 0.3, 0.3]
)

In [37]:
ensemble_retriever.invoke("How to delete a policy?")

[Document(id='4c2f4d67-0345-46a2-a528-ae783c6aa4cc', metadata={'source': '/workspaces/RAG_BOT/DataEnriching/enriched_endpoints/PolicyMangement_endpoint_004.txt', 'doc_name': 'PolicyMangement_endpoint_004', 'doc_index': 4, 'chunk_index': 1, 'total_chunks_in_doc': 5, 'is_first_chunk': False, 'is_last_chunk': False, 'prev_doc_index': 3, 'next_doc_index': 5}, page_content='# Example User Questions  \n- How can I delete a policy block using the API?  \n- What is the endpoint to remove a policy?  \n- What authentication do I need for deleting a policy block?  \n- Can I get a response if the policy deletion fails?  \n- What format should the request body be in for deleting a policy?  \n- What does the response look like after deleting a policy block?  \n- Is the policy deletion action secure?  \n- What parameters do I need to provide to delete a policy?  \n- How do I check if the policy block was successfully deleted?  \n- What should I do if I receive an error when trying to delete a policy?

In [None]:
# Set up prompt templates
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

# Define system instructions for the model
SYSTEM_PROMPT = """
You are a highly knowledgeable and helpful CyberArk API documentation assistant. Your primary role is to answer developer questions accurately and clearly, *using only the provided API documentation context*.

**GENERAL RULES FOR ALL RESPONSES:**
1.  **Context Reliance:** Answer *ONLY* based on the provided "Documentation Context". Do not use external knowledge or invent information.
2.  **Handling Missing Information:** If the answer to the user's question is not explicitly found within the provided documentation context, politely state: "I don't have that specific information in the documentation I can access." Do NOT guess or invent details.
3.  **Markdown Formatting:** Always use Markdown for structuring your answers (headers, code blocks, bullet points, etc.) to enhance readability.


**SPECIFIC RESPONSE BEHAVIORS:**

* **IF the user input is a general greeting (e.g., "hello", "hi", "hey"):**
    * Respond politely as a friendly assistant.
    * Example: "Hello! I'm your CyberArk API assistant. How can I help you with the CyberArk API today?"

* **ELSE IF the user is asking about a specific API endpoint:**
    * Provide detailed endpoint information. This should include:
        * Path and HTTP method (GET, POST, PUT, DELETE)
        * Required parameters (query, path, body)
        * Security requirements
        * Request body schema (in JSON if available)
        * Response body schema (in JSON if available)
        * Any available sample requests and responses.
    * Format your response with markdown, using headers for sections and code blocks for JSON examples.

* **ELSE IF the user is asking a general question about CyberArk API functionality (not tied to a single endpoint):**
    * Answer based ONLY on the provided context.
    * Include relevant code examples from the context if available.

* **ELSE IF the Documentation Context is not available or question is outside the scope of CyberArk API documentation:**
    * Politely state: "I'm specialized in CyberArk API documentation. I don't have information about that topic in my knowledge base."
"""


system_message = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT)

# Define how user questions and context are formatted
human_message = HumanMessagePromptTemplate.from_template(
    """
You are answering questions about CyberArk's API. Use the documentation context

Documentation Context:
----------------------
{context}

New User Question:
----------------------
{question}
"""
)


In [46]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from typing import List, Tuple, Optional
import time

def get_answer(query: str):
    
    # Skip retrieval for simple greetings
    greeting_terms = ["hello", "hi", "hey", "greetings"]
    is_greeting = any(term in query.lower() for term in greeting_terms) and len(query.split()) < 4
    
    if is_greeting:
        # Direct response for greetings without using retrieval
        response = "Hello! I'm your CyberArk API assistant. How can I help you with the CyberArk API today?"
    else:
        # Create a retrieval chain that combines context and chat history
        retrieval_chain = RunnableParallel(
            {"context": ensemble_retriever, "question": RunnablePassthrough()}
        )
        
        # Execute retrieval
        retrieval_output = retrieval_chain.invoke(query)
        
        # Format the retrieved documents into a single context string
        context_docs = retrieval_output["context"]
        context_text = "\n\n".join([doc.page_content for doc in context_docs])
        
        # Create the complete prompt
        chat_prompt = ChatPromptTemplate.from_messages([
            system_message,
            HumanMessagePromptTemplate.from_template(
                """
                You are answering questions about CyberArk's API. Use the documentation context

                Documentation Context:
                ----------------------
                {context}

                New User Question:
                ----------------------
                {question}
                """
            )
        ])
        
        # Create and execute the generation chain
        generation_chain = chat_prompt | llm
        
        # Add rate limiting to prevent API overload
        time.sleep(0.5)  # Simple rate limiting
        
        # Generate response
        response = generation_chain.invoke({
            "context": context_text,
            "question": query
        })
        
        # Extract the content from the response
        if hasattr(response, 'content'):
            response = response.content
    
    return response

get_answer("How to delete a policy?")

'To delete a policy block using the CyberArk API, you can follow the details below:\n\n### Endpoint Information\n- **Endpoint:** Delete Policy\n- **Path:** `/Policy/DeletePolicyBlock`\n- **Method:** POST\n\n### Authentication\n- You must provide a valid **bearer token** for authentication to access this endpoint.\n\n### Request Body\nThe request body must be in JSON format. However, the specific schema for the request body when deleting a policy block is not provided in the documentation context.\n\n### Response\nThe response format after deleting a policy block is not explicitly detailed in the documentation context.\n\n### Error Handling\nIf you encounter an error while trying to delete a policy, the documentation does not specify the exact steps to take.\n\n### Summary\nTo delete a policy, send a POST request to `/Policy/DeletePolicyBlock` with the required bearer token for authentication. Unfortunately, the documentation does not provide further details on the request body or respo

In [40]:
get_answer("How to delete a policy?")

'To delete a policy block using the CyberArk API, you can follow the details below:\n\n### Endpoint Information\n- **Endpoint:** Delete Policy\n- **Path:** `/Policy/DeletePolicyBlock`\n- **Method:** POST\n\n### Authentication\n- You must provide a valid **bearer token** for authentication to access this endpoint.\n\n### Request Body\nThe request body must be in JSON format. However, the specific schema for the request body when deleting a policy block is not provided in the documentation context.\n\n### Response\nThe response format after deleting a policy block is also not explicitly detailed in the documentation context.\n\n### Error Handling\nIf you encounter an error while trying to delete a policy, the documentation does not specify the exact steps to take.\n\n### Summary\nTo delete a policy:\n1. Send a POST request to `/Policy/DeletePolicyBlock`.\n2. Include a valid bearer token for authentication.\n3. Format the request body in JSON (specific schema not provided).\n\nIf you have

# Retrieval Method-2 Neighbourhood Expansion

In [41]:
def find_common_documents(docs1, docs2):
    """
    Find common documents between two document arrays based on doc_index and chunk_index in metadata.
    If nothing is common, return the top document from each array.
    
    Args:
        docs1: First array of documents
        docs2: Second array of documents
        
    Returns:
        List of common documents or top documents if nothing in common
    """
    # Handle empty array scenarios
    if not docs1 and not docs2:
        return []
    if not docs1:
        return [docs2[0]] if docs2 else []
    if not docs2:
        return [docs1[0]] if docs1 else []
    
    # Create a set of (doc_index, chunk_index) tuples from the first array for faster lookups
    doc_identifiers = {(doc.metadata.get('doc_index'), doc.metadata.get('chunk_index')) 
                      for doc in docs1 
                      if 'doc_index' in doc.metadata and 'chunk_index' in doc.metadata}
    
    # Find common documents based on doc_index and chunk_index
    common_docs = [doc for doc in docs2 
                  if 'doc_index' in doc.metadata and 'chunk_index' in doc.metadata and
                  (doc.metadata.get('doc_index'), doc.metadata.get('chunk_index')) in doc_identifiers]
    
    # If no common documents, take top document from each array
    if not common_docs:
        return [docs1[0], docs2[0]]
    
    return common_docs


In [42]:
from typing import List, Dict, Any

def get_adjacent_docs(chunks: List[Document], chunk_index: int, n: int = 1) -> Dict[str, Any]:
    """
    Get the current chunk and its adjacent chunks within the same document.
    
    Args:
        chunks: List of all document chunks
        chunk_index: Index of the current chunk
        n: Number of adjacent chunks to retrieve in each direction (default: 1)
    
    Returns:
        Dictionary containing the current chunk and lists of previous and next chunks
    """
    if chunk_index < 0 or chunk_index >= len(chunks):
        raise ValueError(f"Chunk index {chunk_index} is out of bounds")
    
    current_chunk = chunks[chunk_index]
    current_doc_index = current_chunk.metadata["doc_index"]
    current_chunk_idx_in_doc = current_chunk.metadata["chunk_index"]
    
    print(f"\nRetrieving adjacent chunks for chunk {chunk_index}:")
    print(f"  Document: {current_doc_index} ({current_chunk.metadata['doc_name']})")
    print(f"  Chunk position within document: {current_chunk_idx_in_doc + 1} of {current_chunk.metadata['total_chunks_in_doc']}")
    print(f"  Retrieving {n} chunks before and after")
    
    # Find all chunks from the current document
    current_doc_chunks = [
        chunk for chunk in chunks 
        if chunk.metadata["doc_index"] == current_doc_index
    ]
    
    # Sort chunks by their position in the document
    current_doc_chunks.sort(key=lambda x: x.metadata["chunk_index"])
    
    # Find the position of the current chunk within the document
    current_position = -1
    for i, chunk in enumerate(current_doc_chunks):
        if chunk.metadata["chunk_index"] == current_chunk.metadata["chunk_index"]:
            current_position = i
            break
    
    if current_position == -1:
        raise ValueError("Current chunk not found in its document chunks")
    
    # Get previous n chunks (if available)
    prev_chunks = []
    start_idx = max(0, current_position - n)
    if start_idx < current_position:
        prev_chunks = current_doc_chunks[start_idx:current_position]
    
    # Get next n chunks (if available)
    next_chunks = []
    end_idx = min(len(current_doc_chunks), current_position + n + 1)
    if current_position + 1 < end_idx:
        next_chunks = current_doc_chunks[current_position + 1:end_idx]
    
    print(f"  Found {len(prev_chunks)} previous chunks and {len(next_chunks)} next chunks")
    
    return {
        "current_chunk": current_chunk,
        "prev_chunks": prev_chunks,
        "next_chunks": next_chunks,
        "all_doc_chunks": current_doc_chunks
    }

In [None]:

# Implement the neighborhood expansion retrieval method
def retrieve_docs_with_neighborhood_expansion(query):
    """
    Retrieve documents using both similarity and BM25 methods, then find common documents.
    This implements a form of 'neighborhood expansion' by combining multiple retrieval methods.
    
    Args:
        query: User query string
        
    Returns:
        List of relevant documents
    """

    # RETRIEVERS USED WITH THE FLOW
    similarity_docs = sst_retriever.invoke(query)
    bm25_docs = bm25_retriever.invoke(query)
    
    # Find common or top documents
    relevant_docs = find_common_documents(similarity_docs, bm25_docs)

    final_expanded_docs = []
    # Get the neighbourhood expansion
    if len(relevant_docs)>0:
        for doc in relevant_docs:
            chunk_index = doc.metadata.get('chunk_index')
            response = get_adjacent_docs(chunks, chunk_index, n=2)
            expanded_docs = response["prev_chunks"] + [response["current_chunk"]] + response["next_chunks"]
            final_expanded_docs.extend(expanded_docs) 
    return final_expanded_docs

retrieve_docs_with_neighborhood_expansion("How to delete a policy")



Retrieving adjacent chunks for chunk 1:
  Document: 1 (PolicyMangement_endpoint_001)
  Chunk position within document: 2 of 6
  Retrieving 2 chunks before and after
  Found 1 previous chunks and 2 next chunks


[Document(metadata={'source': '/workspaces/RAG_BOT/DataEnriching/enriched_endpoints/PolicyMangement_endpoint_001.txt', 'doc_name': 'PolicyMangement_endpoint_001', 'doc_index': 1, 'chunk_index': 0, 'total_chunks_in_doc': 6, 'is_first_chunk': True, 'is_last_chunk': False, 'prev_doc_index': 0, 'next_doc_index': 2}, page_content='# Overview  \nThe **Delete Authentication Profile** endpoint allows users to remove an existing authentication profile from the system. This is done by sending a POST request to the path **/AuthProfile/DeleteProfile**. To access this endpoint, users must provide a valid bearer token for authentication.\n\n# Key Search Terms  \n- Delete authentication profile  \n- Remove auth profile  \n- Authentication UUID deletion  \n- API endpoint for deleting profiles  \n- POST request to delete profile  \n- Bearer token authentication  \n- AuthProfile API  \n- JSON request for profile deletion  \n- Error handling in profile deletion  \n- Authentication profile management'),
 

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from typing import List, Tuple, Optional
import time

def get_answer_using_neighbourhood(query: str, debug=True):
    """
    Retrieve documents and generate a response with neighborhood expansion.
    
    Args:
        query: User question
        debug: Whether to print the final prompt for debugging (default: True)
        
    Returns:
        Response from the LLM
    """
    # Skip retrieval for simple greetings
    greeting_terms = ["hello", "hi", "hey", "greetings"]
    is_greeting = any(term in query.lower() for term in greeting_terms) and len(query.split()) < 4
    
    if is_greeting:
        # Direct response for greetings without using retrieval
        response = "Hello! I'm your CyberArk API assistant. How can I help you with the CyberArk API today?"
    else:
        # Format the retrieved documents into a single context string
        context_docs = retrieve_docs_with_neighborhood_expansion(query)
        context_text = "\n\n".join([doc.page_content for doc in context_docs])

        print(f"\nRetrieved {len(context_docs)} relevant documents for the query: '{query}'")
        
        # Create the complete prompt
        chat_prompt = ChatPromptTemplate.from_messages([
            system_message,
            HumanMessagePromptTemplate.from_template(
            """
            You are answering questions about CyberArk's API. Use the documentation context

            Documentation Context:
            ----------------------
            {context}

            New User Question:
            ----------------------
            {question}
            """
            )
        ])
        
        # Format the prompt with actual values to print it
        if debug:
            # Get the messages with variables filled in
            formatted_messages = chat_prompt.format_messages(
                context=context_text,
                question=query
            )
            
            # Print the complete formatted prompt
            print("\n===== FINAL PROMPT SENT TO LLM =====")
            for i, message in enumerate(formatted_messages):
                role = message.type
                content = message.content
                print(f"\n--- Message {i+1} ({role}) ---")
                print(content)
            print("\n====================================")
        
        # Create and execute the generation chain
        generation_chain = chat_prompt | llm
        
        # Add rate limiting to prevent API overload
        time.sleep(0.5)  # Simple rate limiting
        
        # Generate response
        response = generation_chain.invoke({
            "context": context_text,
            "question": query
        })
        
        # Extract the content from the response
        if hasattr(response, 'content'):
            response = response.content
    
    return response

# Example usage with debug printing enabled
get_answer_using_neighbourhood("How to delete a policy?", debug=True)


Retrieving adjacent chunks for chunk 1:
  Document: 1 (PolicyMangement_endpoint_001)
  Chunk position within document: 2 of 6
  Retrieving 2 chunks before and after
  Found 1 previous chunks and 2 next chunks

Retrieved 4 relevant documents for the query: 'How to delete a policy?'

===== FINAL PROMPT SENT TO LLM =====

--- Message 1 (system) ---

You are a highly knowledgeable and helpful CyberArk API documentation assistant. Your primary role is to answer developer questions accurately and clearly, *using only the provided API documentation context*.

**GENERAL RULES FOR ALL RESPONSES:**
1.  **Context Reliance:** Answer *ONLY* based on the provided "Documentation Context". Do not use external knowledge or invent information.
2.  **Handling Missing Information:** If the answer to the user's question is not explicitly found within the provided documentation context, politely state: "I don't have that specific information in the documentation I can access." Do NOT guess or invent detail

"I don't have that specific information in the documentation I can access."