##Install necessary libraries in your Python environment where you run Jupyter:
Bash
pip install jupyterlab langchain sqlalchemy psycopg2-binary pgvector sentence-transformers pypdf python-dotenv google-generativeai # Add other loaders like docx if needed

#jupyter notebook --no-browser --ip=127.0.0.1 --port=8888



In [None]:
#Python
# Cell 1: Install Libraries (if not already installed in your env)
%pip install torch langchain_google_genai langchain_community langchain sqlalchemy psycopg2-binary pgvector sentence-transformers pypdf python-dotenv google-generativeai
%pip install --upgrade sentence-transformers huggingface_hub

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Cell 2: Load Environment Variables
import os
from dotenv import load_dotenv

load_dotenv()

# Replace with your actual K3s database details (get from Step 5 and Step 6)
DB_USER = os.getenv("DB_USER", "postgres")
DB_PASSWORD = os.getenv("DB_PASSWORD", "BTxg8LN5lH") # CHANGE THIS!
DB_HOST = os.getenv("DB_HOST", "localhost") # Use localhost because of port-forwarding
DB_PORT = os.getenv("DB_PORT", "5432")
DB_NAME = os.getenv("DB_NAME", "rag_db")

# AI Studio API Key (if you want to experiment with Google Embeddings/LLM)
# Get your API key from https://aistudio.google.com/app/apikey
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Your chosen embedding model for Sentence Transformers
EMBEDDING_MODEL_NAME = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")

# Define the connection string
CONNECTION_STRING = f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

print("Environment variables loaded.")


Environment variables loaded.


In [None]:
#Python
# Cell 3: Initialize Database Connection and Vector Store
import sentence_transformers
print(sentence_transformers.__version__)
from sqlalchemy import create_engine, text
from pgvector.sqlalchemy import Vector
from sqlalchemy.orm import sessionmaker, declarative_base
from sqlalchemy import Column, Text, Integer

# Langchain components
from langchain_community.vectorstores.pgvector import PGVector
from langchain_community.embeddings import SentenceTransformerEmbeddings
# from langchain_google_genai import GoogleGenerativeAIEmbeddings # Uncomment to use Google Embeddings

# Define table name for your vectors
COLLECTION_NAME = "internal_knowledge"

# Define base for SQLAlchemy models
Base = declarative_base()

# Define a simple model for your chunks
class DocumentChunk(Base):
    __tablename__ = "document_chunks"
    id = Column(Integer, primary_key=True)
    content = Column(Text)
    embedding = Column(Vector(384)) # Adjust dimension based on your embedding model
    source = Column(Text) # Add metadata like source file

# Create engine and session maker
engine = create_engine(CONNECTION_STRING)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

# Create table if it doesn't exist
print(f"Ensuring table '{DocumentChunk.__tablename__}' exists...")
Base.metadata.create_all(engine)
print("Table check complete.")

# Initialize Embedding Model (Sentence Transformers)
try:
    print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
    embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME)
    print("Sentence Transformers embedding model loaded.")
except Exception as e:
    print(f"Error loading Sentence Transformers model: {e}")
    # Fallback or alternative embedding method can be added here

# # Optional: Initialize Google Generative AI Embeddings
# if GOOGLE_API_KEY:
#     try:
#         print("Initializing Google Generative AI Embeddings...")
#         google_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
#         print("Google Generative AI Embeddings initialized.")
#     except Exception as e:
#         print(f"Error initializing Google Embeddings: {e}")
#         google_embeddings = None
# else:
#     google_embeddings = None
#     print("GOOGLE_API_KEY not set. Skipping Google Embeddings.")


# Initialize PGVector store (Langchain wrapper)
# We'll initialize this later once chunks and embeddings are ready to be added or queried
vector_store = None

print("Database setup and embedding models initialized.")


5.0.0
Ensuring table 'document_chunks' exists...


OperationalError: (psycopg2.OperationalError) connection to server at "localhost" (127.0.0.1), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [None]:
#Python
# Cell 4: Document Loading and Chunking Experimentation

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import glob # To find documents

# --- Configuration for Experimentation ---
DOCUMENT_PATH = "../pv_chatbot_general/design/" # Make sure this directory exists and contains PDFs. Can be relative.
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
# ---------------------------------------
# --- Get and Print the Absolute Path ---
# Get the absolute path based on the kernel's current working directory
# This 'document_path_absolute_dir' should be used for consistency
document_path_absolute_dir = os.path.abspath(DOCUMENT_PATH) # This is the directory
print(f"Attempting to load documents from the absolute directory: {document_path_absolute_dir}")
print(f"Current working directory of the kernel: {os.getcwd()}")
# -----------------------------------------


# Load documents
documents = []
# Use the absolute directory path for glob
for rel_file_path_in_glob in glob.glob(os.path.join(document_path_absolute_dir, "*.pdf")):
    # rel_file_path_in_glob is already absolute because document_path_absolute_dir is absolute
    abs_file_path = os.path.abspath(rel_file_path_in_glob) # Ensure it's truly absolute and normalized
    print(f"Loading {abs_file_path}...")
    loader = PyPDFLoader(abs_file_path) # Load using absolute path
    loaded_docs_for_file = loader.load()
    # Ensure metadata['source'] is the absolute path for all loaded documents
    for doc in loaded_docs_for_file:
        doc.metadata['source'] = abs_file_path # Explicitly set to ensure consistency
    documents.extend(loaded_docs_for_file)

print(f"Loaded {len(documents)} raw documents.")
if documents:
    print(f"Example source from first loaded document: {documents[0].metadata.get('source')}")

# Text Splitter experimentation
# Try different chunk sizes and overlaps
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len,
    is_separator_regex=False,
)

print(f"Splitting documents into chunks (size={CHUNK_SIZE}, overlap={CHUNK_OVERLAP})...")
# The 'chunks' variable will be used in Cell 5
# Each chunk will inherit metadata['source'] from its parent document
chunks = text_splitter.split_documents(documents)
print(f"Created {len(chunks)} chunks.")

if chunks:
    print(f"Example source from first chunk: {chunks[0].metadata.get('source')}")
    # Inspect some chunks
    print("\nSample Chunk 1:")
    print(chunks[0].page_content[:200] + "...")
    if len(chunks) > 1:
        print("\nSample Chunk 2:")
        print(chunks[1].page_content[:200] + "...")
else:
    print("No documents were loaded or no chunks were created.")



Attempting to load documents from the absolute directory: /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design
Current working directory of the kernel: /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/int_hr_chatbot
Loading /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design/Chatbot HL_ Appointment Management & Information Dissemination.pdf...
Loading /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design/Chatbot Requirements_ Appointment Management & Information Dissemination.pdf...
Loaded 39 raw documents.
Example source from first loaded document: /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design/Chatbot HL_ Appointment Management & Information Dissemination.pdf
Splitting documents into chunks (size=500, overlap=50)...
Created 269 chunks.
Example source from first chunk: /mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design/Chatbot HL_ Appointment Management & Information Dissemination.pdf

Sample Chunk 1:
High-Level  Design  Document:  C

In [15]:
#Python
# Cell 5: Embedding Generation and Indexing Experimentation
from sqlalchemy import text # For executing raw SQL safely

print("--- Starting Document Synchronization with Vector DB (PGVector) ---")

# Chunks are loaded and processed in Cell 4. Assume 'chunks' variable is available.
# Assume 'DOCUMENT_PATH', 'CONNECTION_STRING', 'COLLECTION_NAME', 'embeddings' are available from previous cells.

# Initialize PGVector store first to interact with its collections and embeddings
# This also ensures its tables (langchain_pg_collection, langchain_pg_embedding) are created if not exist
current_embeddings = embeddings # Defaulting to Sentence Transformers as per initial plan
vector_store = PGVector(
    connection_string=CONNECTION_STRING,
    embedding_function=current_embeddings,
    collection_name=COLLECTION_NAME,
    # pre_delete_collection=False # We want granular control
)
print(f"PGVector store initialized for collection: '{COLLECTION_NAME}'")
print(f"Using embedding model: {current_embeddings.__class__.__name__}")

# Helper function to get Langchain UUIDs for documents with a specific source from PGVector's tables
def get_langchain_ids_for_source_from_pgvector(source_path: str, collection_name: str, pg_vector_store_instance: PGVector) -> list[str]:
    ids_to_delete = []
    with pg_vector_store_instance._bind.connect() as connection: # Changed .engine to ._bind
        collection_query = text("SELECT uuid FROM langchain_pg_collection WHERE name = :coll_name")
        result = connection.execute(collection_query, {"coll_name": collection_name}).fetchone()
        if not result:
            return [] # Collection doesn't exist, so no IDs to find
        collection_id = str(result[0])

        embedding_query = text(
            "SELECT uuid FROM langchain_pg_embedding "
            "WHERE collection_id = :coll_id AND cmetadata->>'source' = :src_path"
        )
        embeddings_result = connection.execute(
            embedding_query, {"coll_id": collection_id, "src_path": source_path}
        ).fetchall()
        ids_to_delete = [str(row[0]) for row in embeddings_result]
    return ids_to_delete

# 1. Get current file paths from the document directory (ensure absolute paths)
current_doc_dir_path = os.path.abspath(DOCUMENT_PATH) # DOCUMENT_PATH from Cell 2, made absolute in Cell 4
current_file_paths_on_disk = set()
for file_path_glob in glob.glob(os.path.join(current_doc_dir_path, "*.pdf")):
    current_file_paths_on_disk.add(os.path.abspath(file_path_glob))
print(f"Found {len(current_file_paths_on_disk)} files in '{current_doc_dir_path}'.")

# 2. Get all unique sources currently in the PGVector collection
existing_db_sources = set()

print("--- DIAGNOSTICS --- PRE-ENGINE ACCESS ---")
if 'vector_store' in locals() and vector_store is not None:
    print(f"Type of vector_store: {type(vector_store)}")
    print(f"Is vector_store an instance of PGVector? {isinstance(vector_store, PGVector)}")
    print("Attributes of vector_store (dir(vector_store)):")
    print(dir(vector_store))
    if hasattr(vector_store, 'engine'):
        print("vector_store HAS 'engine' attribute/property.")
        try:
            print(f"Type of vector_store.engine: {type(vector_store.engine)}")
        except Exception as e_engine_access:
            print(f"Error accessing vector_store.engine for type check: {e_engine_access}")
    else:
        print("vector_store DOES NOT HAVE 'engine' attribute/property.")
    # Check for internal attributes that the 'engine' property might use
    for attr_name in ['_engine', '_connection_string', 'S', 'engine_args']:
        if hasattr(vector_store, attr_name):
            print(f"vector_store has internal attribute: {attr_name}")
        else:
            print(f"vector_store MISSING internal attribute: {attr_name}")
else:
    print("vector_store is not defined or is None.")
print("--- END DIAGNOSTICS ---")

with vector_store._bind.connect() as conn: # Changed .engine to ._bind
    collection_uuid_result = conn.execute(text("SELECT uuid FROM langchain_pg_collection WHERE name = :name"), {"name": COLLECTION_NAME}).fetchone()
    if collection_uuid_result:
        collection_uuid = str(collection_uuid_result[0])
        source_query = text(
            "SELECT DISTINCT cmetadata->>'source' as src FROM langchain_pg_embedding WHERE collection_id = :coll_id AND cmetadata->>'source' IS NOT NULL"
        )
        results = conn.execute(source_query, {"coll_id": collection_uuid}).fetchall()
        for res in results:
            if res[0]: # Ensure source is not None
                 # Paths stored in cmetadata should already be absolute if Cell 4 is correct
                existing_db_sources.add(os.path.abspath(res[0]))
print(f"Found {len(existing_db_sources)} unique document sources in PGVector collection '{COLLECTION_NAME}'.")

# 3. Identify and delete embeddings for documents no longer on disk
files_to_remove_sources = existing_db_sources - current_file_paths_on_disk
ids_for_final_deletion = []
if files_to_remove_sources:
    print(f"Found {len(files_to_remove_sources)} sources to remove from PGVector: {files_to_remove_sources}")
    for source_to_remove in files_to_remove_sources:
        ids = get_langchain_ids_for_source_from_pgvector(source_to_remove, COLLECTION_NAME, vector_store)
        if ids:
            ids_for_final_deletion.extend(ids)
    if ids_for_final_deletion:
        print(f"Attempting to delete {len(ids_for_final_deletion)} embedding entries from PGVector for removed files.")
        vector_store.delete(ids=ids_for_final_deletion)
        print(f"Deleted embeddings for {len(files_to_remove_sources)} sources.")
    else:
        print("No specific embedding entries found for deletion for removed files.")
else:
    print("No documents (sources) to remove from PGVector.")

# 4. Delete old embeddings for files that will be updated/re-added
# These are all files currently on disk. We delete their old versions before adding new ones.
sources_to_re_add_or_update = current_file_paths_on_disk
ids_for_pre_update_deletion = []
if sources_to_re_add_or_update:
    print(f"Preparing to add/update {len(sources_to_re_add_or_update)} documents. Deleting their old embeddings if they exist...")
    for source_path in sources_to_re_add_or_update:
        # Only try to delete if this source was previously known to be in the DB (optimization)
        if source_path in existing_db_sources:
            ids = get_langchain_ids_for_source_from_pgvector(source_path, COLLECTION_NAME, vector_store)
            if ids:
                ids_for_pre_update_deletion.extend(ids)
    
    if ids_for_pre_update_deletion:
        print(f"Attempting to delete {len(ids_for_pre_update_deletion)} old embedding entries for files being updated/re-added.")
        vector_store.delete(ids=ids_for_pre_update_deletion)
        print(f"Deleted old embeddings for files being updated/re-added.")
    else:
        print("No old embeddings to delete for files being updated/re-added (either new files or no old entries found).")
else:
    print("No current files on disk to process for add/update.")

# 5. Add new/updated chunks to the vector store
# 'chunks' variable comes from Cell 4. Each chunk has metadata['source'] as an absolute path.
if 'chunks' in locals() and chunks:
    valid_chunks = [chunk for chunk in chunks if chunk.metadata.get("source")]
    if not valid_chunks and chunks:
        print("Warning: Some chunks from Cell 4 are missing source metadata. They will be skipped.")
    
    if valid_chunks:
        print(f"Adding {len(valid_chunks)} chunks (from current documents on disk) to PGVector...")
        vector_store.add_documents(valid_chunks)
        print("Indexing of current documents complete.")
    else:
        print("No valid chunks with source metadata to add to PGVector.")
elif 'chunks' in locals() and not chunks:
     print("No chunks were created from documents in Cell 4.")
else:
    print("No chunks to add to PGVector ('chunks' variable not defined or not loaded from Cell 4).")

print("--- Document Synchronization with Vector DB (PGVector) Complete ---")

# Note: The SQLAlchemy model 'DocumentChunk' and its table 'document_chunks' (defined in Cell 3)
# are not directly used by this PGVector setup for RAG. PGVector manages its own schema
# (tables like 'langchain_pg_embedding', 'langchain_pg_collection') and the 'COLLECTION_NAME'
# refers to a collection within that schema. This solution works with Langchain's default PGVector behavior.


--- Starting Document Synchronization with Vector DB (PGVector) ---


  vector_store = PGVector(
  vector_store = PGVector(


PGVector store initialized for collection: 'internal_knowledge'
Using embedding model: HuggingFaceEmbeddings
Found 2 files in '/mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design'.
--- DIAGNOSTICS --- PRE-ENGINE ACCESS ---
Type of vector_store: <class 'langchain_community.vectorstores.pgvector.PGVector'>
Is vector_store an instance of PGVector? True
Attributes of vector_store (dir(vector_store)):
['CollectionStore', 'EmbeddingStore', '__abstractmethods__', '__annotations__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_asimilarity_search_with_relevance_scores', '_bind', '_cosine_relevance_score_fn', '_create_e

In [16]:
# Inside Cell 6: Retrieval and RAG Chain Experimentation
# ...

# Import your chosen LLM
from langchain_community.llms import Ollama
from langchain_google_genai import ChatGoogleGenerativeAI # <-- Ensure this is imported
GOOGLE_API_KEY = 'AIzaSyBsErNvPHekr4z2meNLLek9z1sMUC-TeU8'
# ...
# Initialize LLM (adjust based on your choice)
try:
    print(f"Loading LLM model: {LLM_MODEL_NAME}")
    # For Ollama (local)
    if not GOOGLE_API_KEY: # Only use Ollama if no Google Key is available
         llm = Ollama(model=LLM_MODEL_NAME)
    # For Google Generative AI (uncomment and configure GOOGLE_API_KEY)
    if GOOGLE_API_KEY and LLM_MODEL_NAME.startswith("gemini"):
       print(f"Attempting to load Google LLM model: {LLM_MODEL_NAME}")
       # Ensure the API key is passed correctly to the client
       llm = ChatGoogleGenerativeAI(model=LLM_MODEL_NAME, google_api_key=GOOGLE_API_KEY) # <-- Pass the key here
       print(f"Google LLM model '{LLM_MODEL_NAME}' loaded.")
    elif not GOOGLE_API_KEY:
         # Fallback or error if LLM_MODEL_NAME was a Google model but no key was found
         print(f"GOOGLE_API_KEY not set. Cannot load Google model '{LLM_MODEL_NAME}'. Falling back or failing.")
         # Optional: Add fallback to Ollama or raise an error
         llm = Ollama(model=LLM_MODEL_NAME) # Fallback example
    else:
        print(f"LLM_MODEL_NAME '{LLM_MODEL_NAME}' is not a Google model or GOOGLE_API_KEY not set. Using Ollama or default.")
        llm = Ollama(model=LLM_MODEL_NAME) # Defaulting to Ollama if conditions not met

    # If both attempts failed
    if 'llm' not in locals() or llm is None:
         raise ValueError(f"Could not initialize LLM model '{LLM_MODEL_NAME}' with available credentials.")

    print("LLM loaded successfully.")
except Exception as e:
    print(f"Error loading LLM model '{LLM_MODEL_NAME}': {e}")
    llm = None # Ensure llm is None if loading failed

# ... rest of the cell

NameError: name 'LLM_MODEL_NAME' is not defined

In [18]:
#Python
# Cell 6: Retrieval and RAG Chain Experimentation

from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# Import your chosen LLM
#from langchain_community.llms import Ollama
from langchain_google_genai import ChatGoogleGenerativeAI # Uncomment if using Google LLM

# --- Configuration for Experimentation ---
# LLM Model (e.g., "llama2" for Ollama, "gemini-pro" for Google)
#LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "llama2") # Adjust based on your setup
LLM_MODEL_NAME = os.getenv("LLM_MODEL_NAME", "gemini-2.5-flash") # Adjust based on your setup

RETRIEVAL_K = 4 # Number of chunks to retrieve
SEARCH_TYPE = "similarity" # or "mmr" (Maximal Marginal Relevance)
# ---------------------------------------

# Initialize LLM (adjust based on your choice)
try:
    print(f"Loading LLM model: {LLM_MODEL_NAME}")
    # For Ollama
    #llm = Ollama(model=LLM_MODEL_NAME)
    # For Google Generative AI (uncomment and configure GOOGLE_API_KEY)
    # if GOOGLE_API_KEY and LLM_MODEL_NAME.startswith("gemini"):
    llm = ChatGoogleGenerativeAI(model=LLM_MODEL_NAME, google_api_key=GOOGLE_API_KEY)
    # else:
    #    llm = Ollama(model=LLM_MODEL_NAME) # Fallback or different local LLM
    print("LLM loaded.")
except Exception as e:
    print(f"Error loading LLM model '{LLM_MODEL_NAME}': {e}")
    llm = None


if vector_store and llm:
    # Configure the retriever
    retriever = vector_store.as_retriever(search_kwargs={"k": RETRIEVAL_K}, search_type=SEARCH_TYPE)

    # Define the RAG Prompt Template (incorporating ideas from the Prompt Engineering PDF)
    # Use a System Prompt for context and persona
    # Use Contextual Prompting by including retrieved documents
    # Consider Few-shot examples if helpful (add to the prompt)
    template = """
    You are an AI assistant specialized in answering questions based on the provided context.
    Answer the user's question truthfully and concisely, using ONLY the information from the following documents.
    If the documents do not contain the answer, state that you cannot find the answer in the provided information.
    Do NOT make up information.
    Cite the source document(s) (if metadata is available) for your answer.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    prompt = ChatPromptTemplate.from_template(template)

    # Build the RAG Chain
    # 1. Retrieve documents based on the question
    # 2. Format the retrieved documents into a single string for the prompt context
    # 3. Pass the context and question to the prompt template
    # 4. Send the filled prompt to the LLM
    # 5. Parse the LLM's output

    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # --- Run a test query ---
    query = "what is the app about ?" # Replace with a query relevant to your documents

    print(f"\nRunning query: '{query}'")
    response = rag_chain.invoke(query)
    print("\n--- Response ---")
    print(response)

    # --- Experimentation Ideas ---
    # Run different queries
    # Change RETRIEVAL_K or SEARCH_TYPE
    # Change the LLM model or its parameters (temperature, etc.)
    # Refine the prompt template
    # Evaluate the response against your gold standard evaluation set (ideally automated)
    # Track metrics like retrieval time, LLM response time
else:
    print("Vector store or LLM not initialized. Cannot run RAG chain.")


Loading LLM model: gemini-2.5-flash
LLM loaded.

Running query: 'what is the app about ?'

--- Response ---
The app is related to a custom hospital app UI for Appointment Management & Information Dissemination.
Source: Document(metadata={'title': 'Chatbot HL: Appointment Management & Information Dissemination', 'page': 5})


In [37]:
#Python
# Cell 7: (Optional) Automated Evaluation Snippet

# This is a simplified example. A full evaluation framework might be more complex.
# You would load your gold standard questions and ground truth answers here.

gold_standard = [
    {"question": "What is X?", "answer": "According to document Y, X is Z."},
    # Add more questions and ground truth answers
]

# Iterate through your gold standard questions
# For each question:
# 1. Run the rag_chain.invoke(question)
# 2. Compare the rag_chain's response to the expected answer in your gold standard.
#    This comparison is tricky for LLM outputs (semantic similarity, faithfulness).
#    You might need evaluation metrics (e.g., RAGAS library, or manual scoring).
# 3. Record the results (question, expected answer, actual answer, perhaps a score).

# Example (conceptual):
# evaluation_results = []
# for item in gold_standard:
#     question = item["question"]
#     expected_answer = item["answer"]
#     actual_response = rag_chain.invoke(question)
#     # Implement comparison/scoring logic here
#     score = score_response(actual_response, expected_answer) # Your custom scoring function
#     evaluation_results.append({"question": question, "expected": expected_answer, "actual": actual_response, "score": score})

# Print/Save evaluation results
# print("\n--- Evaluation Results ---")
# for res in evaluation_results:
#     print(f"Q: {res['question']}\nA: {res['actual']}\nScore: {res['score']}\n---")

# Challenging Question: How do you define and automatically measure the 'faithfulness' and 'relevance' of the AI's response against your documents and the user's query? Comparing LLM text output programmatically to a 'ground truth' is hard.


In [4]:
#Test with
%pip install requests 
# Or poetry add requests if managing notebook dependencies with poetry
# Cell 1: Import Libraries

import requests
import json # Useful for pretty printing JSON

# --- Configuration ---
# The URL where your local rag-service is running
# Ensure your uvicorn command includes --host 0.0.0.0
RAG_SERVICE_URL = "http://localhost:8000"

# The specific endpoint for asking questions
ASK_ENDPOINT = f"{RAG_SERVICE_URL}/v1/ask"

# The specific endpoint for health check
HEALTH_ENDPOINT = f"{RAG_SERVICE_URL}/v1/health"
# -------------------

print(f"RAG Service Ask Endpoint: {ASK_ENDPOINT}")
print(f"RAG Service Health Endpoint: {HEALTH_ENDPOINT}")

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
RAG Service Ask Endpoint: http://localhost:8000/v1/ask
RAG Service Health Endpoint: http://localhost:8000/v1/health


In [6]:
# Cell 2: Test the Health Endpoint

print(f"Testing Health Endpoint: {HEALTH_ENDPOINT}")

try:
    # Send GET request to the health endpoint
    response = requests.get(HEALTH_ENDPOINT)

    # Check the response status code
    print(f"Status Code: {response.status_code}")

    # Check if the request was successful (status code 200-299)
    if response.status_code == 200:
        print("Health Check Successful!")
        # Parse and print the JSON response body
        print("Response Body:")
        print(json.dumps(response.json(), indent=2))
    else:
        print(f"Health Check Failed. Response:")
        print(response.text) # Print response text if not successful

except requests.exceptions.ConnectionError as e:
    print(f"Error: Could not connect to the RAG service at {HEALTH_ENDPOINT}.")
    print("Please ensure the service is running locally.")
    print(f"Details: {e}")
except Exception as e:
    print(f"An unexpected error occurred during health check: {e}")

Testing Health Endpoint: http://localhost:8000/v1/health
Status Code: 200
Health Check Successful!
Response Body:
{
  "status": "ok",
  "message": "RAG service is healthy."
}


In [7]:
# Cell 3: Test the Ask Endpoint

print(f"\nTesting Ask Endpoint: {ASK_ENDPOINT}")

# Define the request payload (matching your Question Pydantic model)
question_payload = {
    "text": "what the beenfit of the app about?" # Replace with a question from your gold standard evaluation set or documents
}

print(f"Sending question: '{question_payload['text']}'")

try:
    # Send POST request to the ask endpoint
    response = requests.post(
        ASK_ENDPOINT,
        json=question_payload, # 'json' parameter automatically sets Content-Type to application/json and serializes the dict
        # headers={'Content-Type': 'application/json'} # 'json' param handles this
    )

    # Check the response status code
    print(f"Status Code: {response.status_code}")

    # Check if the request was successful
    if response.status_code == 200:
        print("Request Successful!")
        # Parse and print the JSON response body (matching your Answer Pydantic model)
        answer_data = response.json()
        print("\n--- Answer ---")
        print(f"Answer Text: {answer_data.get('text', 'N/A')}") # Use .get to avoid KeyError if fields are missing
        print(f"Sources: {answer_data.get('sources', [])}")
        print("--------------")

    else:
        print(f"Request Failed. Status Code: {response.status_code}")
        print("Response Body:")
        try:
            print(json.dumps(response.json(), indent=2)) # Try to print as JSON if possible
        except json.JSONDecodeError:
            print(response.text) # Otherwise, print as plain text

except requests.exceptions.ConnectionError as e:
    print(f"Error: Could not connect to the RAG service at {ASK_ENDPOINT}.")
    print("Please ensure the service is running locally.")
    print(f"Details: {e}")
except Exception as e:
    print(f"An unexpected error occurred during API call: {e}")


Testing Ask Endpoint: http://localhost:8000/v1/ask
Sending question: 'what the beenfit of the app about?'
Status Code: 200
Request Successful!

--- Answer ---
Answer Text: The benefits of the app include:
*   It can be downloaded and executed with a single command without requiring installation, even for complex applications with many dependencies (27Creating, running, and sharing a container image).
*   Developers involved in running the application in production gain a better understanding of user needs and issues, as well as problems faced by the operations team. This also encourages developers to release the app earlier and use user feedback to guide further development (7Introducing container technologies).
Sources: ['/mnt/c/Users/Tan Prawibowo/Private_PJT/LLM_RAG_WA/LLM_RAG_WA/documents/Kubernetes in Action.pdf', 'c:\\Users\\Tan Prawibowo\\Private_PJT\\LLM_RAG_WA\\LLM_RAG_WA\\documents\\Kubernetes in Action.pdf', '/mnt/c/Users/Tan Prawibowo/BTH_AWS_TF/pv_chatbot_general/design/C