In [None]:
!pip install colab-xterm
%load_ext colabxterm
%xterm

In [2]:
%pip install -q langchain-ollama langchain langchain-community faiss-cpu langchain_huggingface rank_bm25 gradio

In [3]:
from re import search
# Custom MultiQuery with Output Parser
from typing import List
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts import PromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_ollama import OllamaLLM


# Initialize the LLM
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.2:latest", temperature=0.5)

# Initialize the embedding model
from langchain_huggingface import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load the vector store
from langchain.vectorstores import FAISS
loaded_faiss_store = FAISS.load_local(
    "/content/RAG_BOT/LocalEmbeddings/huggingface_faiss_index",
    embedding_model,
    allow_dangerous_deserialization=True
)
print("FAISS vector store loaded successfully.")

# Configure logging to see generated queries
import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)


# Domain-specific prompt for CyberArk API documentation
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""
You are helping retrieve answers from CyberArk API documentation.

For general greetings or small talk (like "hello", "hi"), respond politely as a friendly assistant.

For Cyberark API Documentation related question, Your task is to rewrite the user's question in 2 different ways, one should talk about the exact user intent and other shoulb be like a rephrased question of the orignial question without losing its intent and remember that the questins are about API's.

Return each variant on its own line with no numbering or bullets.

User question:
{question}

Variations:
"""
)



# Create chain and retriever
llm_chain = QUERY_PROMPT | llm

custom_multi_query_retriever = MultiQueryRetriever(
    retriever=loaded_faiss_store.as_retriever(
        search_type="mmr",
        search_kwargs={"score_threshold" : 0.3 ,"k": 5}
    ),
    llm_chain=llm_chain,
    parser_key="lines"
)

FAISS vector store loaded successfully.


In [4]:
# Ensemble Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever

# Extract documents from the docstore
try:
    all_docs = [loaded_faiss_store.docstore._dict[doc_id] for doc_id in loaded_faiss_store.index_to_docstore_id.values()]
except AttributeError:
    # Fallback for different docstore structure
    all_docs = [loaded_faiss_store.docstore.get(doc_id) for doc_id in loaded_faiss_store.index_to_docstore_id.values()]


# Create BM25 retriever
bm25_retriever = BM25Retriever.from_documents(all_docs)
bm25_retriever.k = 2

In [5]:
# Create ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, custom_multi_query_retriever],
    weights=[0.4, 0.6]
)


In [6]:
from langchain_ollama import ChatOllama
from langchain_core.rate_limiters import InMemoryRateLimiter

rate_limiter = InMemoryRateLimiter(
    requests_per_second=0.1,
    check_every_n_seconds= 0.1,
    max_bucket_size = 10,
)

llm = ChatOllama(
    model = 'llama3.2:latest',
    temperature=0,
    rate_limiter= rate_limiter
)


In [7]:
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate


SYSTEM_PROMPT = """
You are a highly knowledgeable CyberArk API documentation assistant. Your job is to answer developer questions accurately and clearly using only the provided API documentation context.

For general greetings or small talk (like "hello", "hi"), respond politely as a friendly assistant.

For Cyberark API documentation related questions, Your answers must follow these rules:

1. Use only the given context. If the answer is not in the context, say "I don't know based on the provided documentation."
2. If the user asks about an endpoint, provide its details from the context including:
   - Path and method
   - Required parameters (query, path, body)
   - Security requirements
   - Request body schema (in JSON if available)
   - Response body schema (in JSON if available)
   - Sample request and response if present
3. Be clear and structured:
   - Use bullet points for properties
   - Include code blocks for JSON
4. Never invent or guess missing details.
5. If the context includes multiple endpoints, select only the most relevant.
6. For CyberArk API questions, use only the given context. If the answer is not in the context, say "I don't know based on the provided documentation."

Answer as if you are the official CyberArk API documentation.
"""

system_message = SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT)

human_message = HumanMessagePromptTemplate.from_template(
    """
You are answering questions about CyberArk's API. Use the documentation context and the chat history.

Documentation Context:
----------------------
{context}

Chat History:
----------------------
{chat_history}

New User Question:
----------------------
{question}
"""
)

CONVERSATIONAL_PROMPT = ChatPromptTemplate.from_messages([
    system_message,
    human_message
])


In [8]:
def format_chat_history(history):
    return "\n".join([f"{role}: {text}" for role, text in history])

# Add this helper early in your notebook
GREETINGS = {"hi", "hello", "hey", "greetings", "good morning", "good evening", "good afternoon"}

def is_greeting(text: str) -> bool:
    return text.strip().lower() in GREETINGS

async def get_answer(user_question, history, retriever, chat_model):
    # Intercept greetings or very short generic inputs
    if is_greeting(user_question):
        return "Hello! 👋 How can I help you with CyberArk API documentation today?"

    if len(user_question.strip().split()) < 2:
        return "Could you please provide more details about your question related to the CyberArk API?"

    # Retrieval stage
    try:
        docs = retriever.get_relevant_documents(user_question)
        if not docs:
            return "I couldn't find anything relevant in the documentation for that question. Can you rephrase or be more specific?"

        context = "\n\n".join([doc.page_content for doc in docs])
    except Exception as e:
        return f"❌ Error while retrieving documentation: {e}"

    # Format chat history
    formatted_history = format_chat_history(history)

    # Build prompt
    prompt = CONVERSATIONAL_PROMPT.format(
        context=context,
        chat_history=formatted_history,
        question=user_question
    )

    # Call SLM
    try:
        answer = await chat_model.ainvoke(prompt)
    except Exception as e:
        return f"❌ Error while generating answer: {e}"

    return answer

In [9]:
!pip install nest_asyncio



In [None]:
import nest_asyncio
nest_asyncio.apply()

import asyncio
import traceback
import gradio as gr
import time

def sync_get_answer(user_message, history):
    print("\n" + "="*50)
    print(f"🔍 USER QUERY: {user_message}")

    # Log chat history
    formatted_history = [("User", h[0]) if h[1] is None else ("Assistant", h[1]) for h in history]
    if formatted_history:
        print("\n📜 CHAT HISTORY:")
        for role, text in formatted_history:
            print(f"  {role}: {text[:100]}{'...' if len(text) > 100 else ''}")

    start_time = time.time()

    try:
        # Create a new event loop instead of getting the current one
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

        # Custom wrapper to capture retrieval info
        async def get_answer_with_logging(user_question, history, retriever, llm_model):
            # Handle greetings and short inputs
            if is_greeting(user_question):
                print("👋 Greeting detected, skipping retrieval")
                return "Hello! 👋 How can I help you with CyberArk API documentation today?"

            if len(user_question.strip().split()) < 2:
                print("📝 Query too short, skipping retrieval")
                return "Could you please provide more details about your question related to the CyberArk API?"

            # Retrieval stage
            retrieval_start = time.time()
            print("\n🔎 RETRIEVING DOCUMENTS...")
            try:
                docs = retriever.get_relevant_documents(user_question)
                retrieval_time = time.time() - retrieval_start
                print(f"⏱️ Document retrieval time: {retrieval_time:.2f} seconds")

                if not docs:
                    print("❌ No relevant documents found")
                    return "I couldn't find anything relevant in the documentation for that question. Can you rephrase or be more specific?"

                print(f"\n📄 RETRIEVED {len(docs)} DOCUMENTS:")
                for i, doc in enumerate(docs[:3]):  # Show first 3 docs
                    print(f"Document {i+1} (excerpt): {doc.page_content[:200]}...")
                if len(docs) > 3:
                    print(f"...and {len(docs)-3} more documents")

                context = "\n\n".join([doc.page_content for doc in docs])
                context_length = len(context)
                print(f"\n📊 CONTEXT SIZE: {context_length} characters")
            except Exception as error:
                print(f"❌ RETRIEVAL ERROR: {error}")
                return f"❌ Error while retrieving documentation: {error}"

            # Format chat history
            formatted_history_text = format_chat_history(history)

            # Build prompt
            print("\n💬 SENDING TO LLM...")
            llm_start = time.time()

            # Call LLM
            try:
                answer = await llm_model.ainvoke(CONVERSATIONAL_PROMPT.format(
                    context=context,
                    chat_history=formatted_history_text,
                    question=user_question
                ))
                llm_time = time.time() - llm_start
                print(f"⏱️ LLM response time: {llm_time:.2f} seconds")
                print(f"\n✅ ANSWER (first 100 chars): {str(answer)[:100]}...")

                # Extract content from AIMessage object if needed
                if hasattr(answer, 'content'):
                    return answer.content
                return str(answer)  # Convert to string to ensure compatibility
            except Exception as error:
                print(f"❌ LLM ERROR: {error}")
                return f"❌ Error while generating answer: {error}"

        try:
            answer = loop.run_until_complete(
                get_answer_with_logging(user_message, formatted_history, ensemble_retriever, llm)
            )
        finally:
            # Always close the loop, even if an exception occurs
            loop.close()

        total_time = time.time() - start_time
        print(f"\n⏱️ TOTAL PROCESSING TIME: {total_time:.2f} seconds")
        print("="*50 + "\n")

        return answer
    except Exception as error:
        tb = traceback.format_exc()
        print(f"❌ Error while generating answer: {error}\n{tb}")
        return (
            "⚠️ Sorry, there was an error processing your request:\n"
            f"```\n{tb}\n```"
        )

with gr.Blocks(title="🔐 CyberArk API ChatBot") as demo:
    chatbot = gr.ChatInterface(
        fn=sync_get_answer,
        title="🔐 CyberArk API ChatBot",
        description=(
            "Ask any question about CyberArk API documentation.\n"
            "Supports conversation context. For greetings, responds politely. "
            "For API questions, retrieves details from documentation."
        ),
        examples=[
            "Hi",
            "How do I delete an authentication profile?",
            "What is the request body for /Policy/SavePolicyBlock3?",
            "List all parameters for GetPolicyBlock"
        ],
    )

# Launch the Gradio interface
print("🚀 Starting CyberArk API ChatBot...")
print("💻 Check console for detailed logs when questions are asked")
demo.launch(share=True,debug=True)  # Set share=True to get a public URL

  self.chatbot = Chatbot(


🚀 Starting CyberArk API ChatBot...
💻 Check console for detailed logs when questions are asked
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://eab1c77a0a7aa60df9.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)



🔍 USER QUERY: What is the request body for /Policy/SavePolicyBlock3?

🔎 RETRIEVING DOCUMENTS...
❌ RETRIEVAL ERROR: [Errno 111] Connection refused

⏱️ TOTAL PROCESSING TIME: 0.00 seconds



In [None]:
import time

async def debug_get_answer(user_question, history, retriever, llm):
    """
    More robust helper to test get_answer() in Colab.
    Prints context, timing, and errors if any.
    """
    print("⚡️ Debugging get_answer() call")
    print(f"User Question: {user_question}")
    print(f"Chat History: {history}")
    print("=" * 50)

    start_time = time.time()
    try:
        answer = await get_answer(user_question, history, retriever, llm)
        duration = time.time() - start_time

        print("\n✅ Answer:")
        print(answer)
        print(f"\n⏱️ Response time: {duration:.2f} seconds")
        return answer
    except Exception as e:
        print("\n❌ ERROR in get_answer():")
        import traceback
        traceback.print_exc()


In [None]:
await debug_get_answer(
    "How do I delete an authentication profile?",
    history=[],
    retriever=ensemble_retriever,
    llm=llm
)


⚡️ Debugging get_answer() call
User Question: How do I delete an authentication profile?
Chat History: []
