In [57]:
# Install required packages for the RAG system
%pip install -q langchain-ollama langchain langchain-community faiss-cpu langchain_huggingface rank_bm25 gradio nest_asyncio markdown2 ipywidgets

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


# CyberArk API Documentation Assistant
This notebook implements a Retrieval-Augmented Generation (RAG) system using LangChain components to answer questions about CyberArk API documentation.

In [58]:
# Initialize embedding model and load vector store
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load pre-existing vector store
loaded_faiss_store = FAISS.load_local(
    "/workspaces/RAG_BOT/LocalEmbeddings/Hugging_split_enriched_faiss_index",
    embedding_model,
    allow_dangerous_deserialization=True
)
print("FAISS vector store loaded successfully.")

FAISS vector store loaded successfully.


In [59]:
# Set up retrievers
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

# Extract documents from the docstore
try:
    all_docs = [loaded_faiss_store.docstore._dict[doc_id] for doc_id in loaded_faiss_store.index_to_docstore_id.values()]
except AttributeError:
    # Fallback for different docstore structure
    all_docs = [loaded_faiss_store.docstore.get(doc_id) for doc_id in loaded_faiss_store.index_to_docstore_id.values()]

# Configure different retrieval methods
bm25_retriever = BM25Retriever.from_documents(all_docs)
bm25_retriever.k = 2

# Vector similarity retrieval with MMR
basic_retriever = loaded_faiss_store.as_retriever(search_type="mmr", search_kwargs={"k": 2})

# Vector similarity with score threshold
sst_retriever = loaded_faiss_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.3, "k": 2}
)

# Create ensemble retriever combining multiple retrieval methods
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, basic_retriever, sst_retriever],
    weights=[0.4, 0.3, 0.3]
)

In [60]:
# Initialize the LLM with rate limiting
from langchain_ollama import ChatOllama
from langchain_core.rate_limiters import InMemoryRateLimiter

# Configure rate limiting to prevent overloading the model
rate_limiter = InMemoryRateLimiter(
    requests_per_second=0.1,
    check_every_n_seconds=0.1,
    max_bucket_size=10,
)

# Initialize the LLM
llm = ChatOllama(
    model='llama3.2:latest',
    temperature=0.1,
    rate_limiter=rate_limiter
)

In [61]:
# Set up prompt templates
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

# Define system instructions for the model
SYSTEM_PROMPT = """
You are a highly knowledgeable CyberArk API documentation assistant. Your job is to answer developer questions accurately and clearly using only the provided API documentation context.

For general greetings or small talk (like "hello", "hi"), respond politely as a friendly assistant.

For Cyberark API documentation related questions, Your answers must follow these rules:

1. Use only the given context. If the answer is not in the context, say "I don't know based on the provided documentation."
2. If the user asks about an endpoint, provide its details from the context including:
   - Path and method
   - Required parameters (query, path, body)
   - Security requirements
   - Request body schema (in JSON if available)
   - Response body schema (in JSON if available)
   - Sample request and response if present
3. Be clear and structured:
   - Use bullet points for properties
   - Include code blocks for JSON
4. Never invent or guess missing details.
5. If the context includes multiple endpoints, select only the most relevant.
6. For CyberArk API questions, use only the given context. If the answer is not in the context, say "I don't know based on the provided documentation."

Answer as if you are the official CyberArk API documentation.
"""

system_message = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT)

# Define how user questions and context are formatted
human_message = HumanMessagePromptTemplate.from_template(
    """
You are answering questions about CyberArk's API. Use the documentation context

Documentation Context:
----------------------
{context}

New User Question:
----------------------
{question}
"""
)

In [62]:
# Core answer generation function
def get_answer(query, description="", chat_history=[], retriever=ensemble_retriever):
    """
    Generate an answer to a user query using RAG with the provided retriever.
    
    Args:
        query (str): The user's question
        description (str): Optional label for debugging
        chat_history (list): Previous conversation turns
        retriever: The retrieval component to use
        
    Returns:
        dict: Contains the generated answer and source documents
    """
    from langchain_core.runnables import RunnablePassthrough
    
    # Retrieve relevant context
    sample_context = retriever.invoke(query)
    print(f"\n=== {description} ===")
    print(f"Question: {query}")
    print("Retrieved context:")
    print(sample_context)
    
    # Create the prompt with system and human messages
    CONVERSATIONAL_PROMPT = ChatPromptTemplate.from_messages([
        system_message,
        human_message
    ])
    
    # Create chain to process the prompt and generate response
    chain = CONVERSATIONAL_PROMPT | llm
    
    # Combine retrieved documents into context text
    context_text = "\n\n".join([doc.page_content for doc in sample_context])
    
    # Generate the answer
    result = chain.invoke({
        "context": context_text,
        "question": query
    })
    
    print("Answer:")
    print(result.content)
    
    # Return result in standard format
    return {
        "result": result.content,
        "source_documents": sample_context
    }

In [64]:
# Test the retrieval and answer generation
get_answer("How to delete a policy?", "Test Query")


=== Test Query ===
Question: How to delete a policy?
Retrieved context:
Answer:
To delete a policy using the `/Policy/DeletePolicyBlock` endpoint:

*   Send a POST request to `https://your-cyberark-instance.com/Policy/DeletePolicyBlock`.
*   In the request body, provide a JSON payload with the `path` field containing the path of the policy block you want to delete.
*   The `path` field is required and must be a string.

Here's an example of a valid request:

```json
{
  "path": "/Policy/Block1"
}
```

**Security Considerations:**

This endpoint uses bearer authentication for security purposes. You need to authenticate with a valid bearer token before sending the request.

**Response:**

The response will be in JSON format, containing a `Result` field indicating whether the policy block deletion was successful and an optional `Error` field containing error message text if the operation fails.

Here's an example of a successful response:

```json
{
  "Result": true,
  "Error": {}
}
```


{'result': 'To delete a policy using the `/Policy/DeletePolicyBlock` endpoint:\n\n*   Send a POST request to `https://your-cyberark-instance.com/Policy/DeletePolicyBlock`.\n*   In the request body, provide a JSON payload with the `path` field containing the path of the policy block you want to delete.\n*   The `path` field is required and must be a string.\n\nHere\'s an example of a valid request:\n\n```json\n{\n  "path": "/Policy/Block1"\n}\n```\n\n**Security Considerations:**\n\nThis endpoint uses bearer authentication for security purposes. You need to authenticate with a valid bearer token before sending the request.\n\n**Response:**\n\nThe response will be in JSON format, containing a `Result` field indicating whether the policy block deletion was successful and an optional `Error` field containing error message text if the operation fails.\n\nHere\'s an example of a successful response:\n\n```json\n{\n  "Result": true,\n  "Error": {}\n}\n```\n\n**Handling Errors:**\n\nIf the poli

In [65]:
# Streaming answer function for gradual text display
def stream_answer(query, chat_history=[]):
    """
    Generate an answer and stream it in chunks for gradual display.
    
    Args:
        query (str): The user's question
        chat_history (list): Previous conversation turns
        
    Yields:
        str: Chunks of the answer text
    """
    # Incorporate chat history for context if available
    full_query = query
    if len(chat_history) > 0:
        history_context = ""
        for turn in chat_history[-5:]:  # Use last 5 turns
            history_context += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
        full_query = history_context + f"User: {query}"
    
    # Generate the complete answer
    result = get_answer(full_query, "Streaming Answer", chat_history)
    text = result["result"]
    
    # Stream the answer in chunks
    chunk_size = 50
    for i in range(0, len(text), chunk_size):
        yield text[i:i+chunk_size]

In [66]:
# Gradio web interface
import gradio as gr

def gradio_stream_fn(message, history):
    """
    Streaming function for Gradio chat interface.
    
    Args:
        message (str): Current user message
        history (list): Chat history as [(user1, bot1), (user2, bot2), ...]
        
    Yields:
        str: Incremental answer text for display
    """
    answer_chunks = stream_answer(message, history)
    partial_answer = ""
    for chunk in answer_chunks:
        partial_answer += chunk
        yield partial_answer

# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 CyberArk API RAG ChatBot")
    chatbot = gr.ChatInterface(
        fn=gradio_stream_fn,
        chatbot=gr.Chatbot(),
        examples=[
            "How to delete a policy?",
            "What are radiusclientlist?",
        ],
        title="CyberArk API Assistant",
        description="Ask questions about CyberArk API. The assistant will answer using the official documentation.",
        theme="default"
    )

# Launch the web interface
demo.launch()

  chatbot=gr.Chatbot(),


* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.





=== Streaming Answer ===
Question: How to delete a policy?
Retrieved context:
Answer:
To delete a policy using the `/Policy/DeletePolicyBlock` endpoint:

*   Send a POST request to `https://your-cyberark-instance.com/Policy/DeletePolicyBlock`.
*   Include the policy block path in the JSON payload.
*   The `path` field is required and should be a string value.

Here's an example of a valid request:
```json
{
  "path": "/Policy/Block1"
}
```
Make sure to replace `/Policy/Block1` with the actual path of the policy block you want to delete.

**Security Considerations:**

*   This endpoint uses bearer authentication (`bearerAuth`) for security purposes.
*   You must authenticate with a valid bearer token before sending the request.

**Error Handling:**

*   If the policy block does not exist in your system, the response will contain an error message.
*   The `Result` field in the response will be set to `false`, indicating that the deletion failed.
*   You can check the `Error` field for m

# DEBUG UI

In [None]:
import ipywidgets as widgets
from IPython.display import display, HTML, Markdown, clear_output
import markdown2

def start_ui():
    """
    Simple Jupyter notebook UI for the CyberArk API documentation assistant.
    Uses the existing get_answer logic with enhanced display.
    """
    # Create widgets
    header = widgets.HTML("<h1 style='color:#0066cc'>CyberArk API Documentation Assistant</h1>")
    question = widgets.Text(
        description='Question:',
        placeholder='Enter your question here...',
        layout=widgets.Layout(width='80%')
    )

    submit = widgets.Button(
        description='Submit',
        button_style='primary',
        layout=widgets.Layout(width='100px')
    )

    clear_btn = widgets.Button(
        description='Clear',
        button_style='warning',
        layout=widgets.Layout(width='100px')
    )

    context_area = widgets.Output()
    answer_area = widgets.Output()

    # Function to handle question submission
    def on_submit_clicked(b):
        # Clear previous outputs
        with context_area:
            clear_output()
        with answer_area:
            clear_output()

        query = question.value
        if not query.strip():
            with answer_area:
                print("Please enter a valid question.")
            return

        # Get context
        sample_context = ensemble_retriever.invoke(query)

        # Display context
        with context_area:
            print("=== Retrieved Context ===")
            for i, doc in enumerate(sample_context):
                print(f"\nDocument {i+1}:")
                print("-" * 40)
                print(doc.page_content)
                print("-" * 40)

        # Create prompt and get answer
        CONVERSATIONAL_PROMPT = ChatPromptTemplate.from_messages([
            system_message,
            human_message
        ])

        formatted_prompt = CONVERSATIONAL_PROMPT.format(
            context="\n\n".join([doc.page_content for doc in sample_context]),
            question=query
        )

        from langchain.chains import RetrievalQA
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            retriever=ensemble_retriever,
            return_source_documents=True
        )

        result = qa_chain.invoke({"query": formatted_prompt})

        # Display answer with markdown formatting
        with answer_area:
            display(HTML("<h3>Answer:</h3>"))
            # Convert markdown to HTML for better display
            html_content = markdown2.markdown(result["result"], extras=["fenced-code-blocks", "tables"])
            display(HTML(html_content))
            print(f"\nNumber of source documents: {len(result['source_documents'])}")

    # Function to clear the interface
    def on_clear_clicked(b):
        question.value = ""
        with context_area:
            clear_output()
        with answer_area:
            clear_output()

    # Connect buttons to handlers
    submit.on_click(on_submit_clicked)
    clear_btn.on_click(on_clear_clicked)

    # Layout the UI components
    input_row = widgets.HBox([question, submit, clear_btn])

    context_box = widgets.VBox([
        widgets.HTML("<h3>Retrieved Context:</h3>"),
        context_area
    ])

    answer_box = widgets.VBox([
        widgets.HTML("<h3>Answer:</h3>"),
        answer_area
    ])

    # Assemble the final UI
    ui = widgets.VBox([
        header,
        widgets.HTML("<p>Ask any question about the CyberArk API:</p>"),
        input_row,
        widgets.HBox([answer_box, context_box])
    ])

    display(ui)

# Install markdown2 for better HTML rendering
%pip install -q markdown2

# Launch the UI
start_ui()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


VBox(children=(HTML(value="<h1 style='color:#0066cc'>CyberArk API Documentation Assistant</h1>"), HTML(value='…