In [None]:
# File: notebooks/rag_evaluation.ipynb
# Rag Evaluation Notebook
# This notebook evaluates the RAG (Retrieval-Augmented Generation) pipeline using the Consumer Compl
# Import necessary libraries
import pandas as pd
import os
import sys

# --- Path Configuration for Notebook ---
# This robust path determination should work whether you launch Jupyter
# from the project root or directly from the 'notebooks/' directory.
current_working_dir = os.getcwd()
if current_working_dir.lower().endswith(os.path.sep + 'notebooks'):
    project_root = os.path.abspath(os.path.join(current_working_dir, os.pardir))
else:
    project_root = current_working_dir

print(f"Detected Project Root: {project_root}")

# Add the 'src' directory to Python's path so we can import rag_pipeline
sys.path.insert(0, os.path.join(project_root, 'src'))

# Now import the rag_pipeline module
import rag_pipeline

# --- 1. Load RAG Components ---
# Use the functions from the imported rag_pipeline to load necessary components
print("\n--- Loading RAG Components for Evaluation ---")
db, embeddings, llm_pipeline = rag_pipeline.load_components()

if not (db and embeddings and llm_pipeline):
    print("\nError: Failed to load all RAG components. Please ensure:")
    print("1. All required libraries are installed (`pip install transformers accelerate`).")
    print("2. `src/embed_and_index.py` ran successfully and created the vector store.")
    print("3. You are running Jupyter from your project root or notebooks sub-directory.")
    # Exit the notebook execution if components aren't loaded.
    raise SystemExit("RAG components not loaded. Exiting notebook.")

print("\n--- RAG Components Loaded Successfully for Evaluation ---")

# --- 2. Define Evaluation Questions ---
evaluation_questions = [
    "What common issues are reported with credit card billing?",
    "Can you find complaints about unauthorized transactions in savings accounts?",
    "What problems are customers facing with personal loan disbursements?",
    "Are there complaints about unexpected fees related to money transfers?",
    "Summarize recent issues with Buy Now, Pay Later services.",
    "What issues did customers have with credit cards and fraud?",
    "Tell me about problems with loan repayment schedules.",
    "What issues are reported regarding online banking features for savings accounts?",
    "Explain any disputes related to international money transfers.",
    "What complaints are there about delayed payments in BNPL schemes?"
]

# --- 3. Run RAG Pipeline for Each Question and Collect Results ---
evaluation_results = []

print("\n--- Running Qualitative Evaluation Queries ---")
for i, question in enumerate(evaluation_questions):
    print(f"\n--- Evaluating Question {i+1}: '{question}' ---")
    # Call run_rag_pipeline from the imported module
    answer, retrieved_docs = rag_pipeline.run_rag_pipeline(question, db, llm_pipeline, k=5)

    # Collect results
    evaluation_results.append({
        "Question": question,
        "Generated Answer": answer,
        "Retrieved Sources": retrieved_docs, # Keep full docs for later inspection
        "Quality Score": "", # Placeholder for manual scoring (1-5)
        "Comments/Analysis": "" # Placeholder for manual comments
    })

    print(f"Generated Answer: {answer}")
    print("Top 1-2 Retrieved Sources (Preview):")
    if retrieved_docs:
        for j, doc in enumerate(retrieved_docs[:2]): # Show top 2 sources for quick review
            print(f"  - Source {j+1} (ID: {doc.metadata.get('complaint_id', 'N/A')}, Product: {doc.metadata.get('product', 'N/A')}): {doc.page_content[:150]}...")
    else:
        print("  No relevant sources retrieved.")

print("\n--- RAG Evaluation Runs Complete ---")

# --- 4. Display Evaluation Table (Markdown Format for Report) ---
print("\n--- RAG Qualitative Evaluation Table ---")
print("\n**Instructions:** Manually review each generated answer based on the question and retrieved sources. Assign a Quality Score (1-5, where 5 is excellent) and provide comments/analysis. This table is in Markdown format for easy copy-pasting into your report.")
print("| Question | Generated Answer | Top 1-2 Retrieved Sources (Complaint ID, Product) | Quality Score (1-5) | Comments/Analysis |")
print("|----------|------------------|---------------------------------------------------|---------------------|-------------------|")

for result in evaluation_results:
    # Format retrieved sources for the table
    sources_str = ""
    if result["Retrieved Sources"]:
        for j, doc in enumerate(result["Retrieved Sources"][:2]):
            comp_id = doc.metadata.get('complaint_id', 'N/A')
            product = doc.metadata.get('product', 'N/A')
            # Escape pipe characters in content to avoid breaking markdown table
            content_preview = doc.page_content[:100].replace('|', '\\|') + '...'
            sources_str += f"ID: {comp_id}, Product: {product}: {content_preview}<br>"
    else:
        sources_str = "No sources retrieved."

    # Escape pipe characters in generated answer
    answer_for_table = result["Generated Answer"].replace('|', '\\|')

    print(f"| {result['Question'].replace('|', '\\|')} | {answer_for_table} | {sources_str} | {result['Quality Score']} | {result['Comments/Analysis']} |")

print("\n--- Qualitative Evaluation Table Generated ---")

Detected Project Root: d:\Kifiya AI Master Training Program 5 6 &7\week-6\intelligent-complaint-analysis


  from .autonotebook import tqdm as notebook_tqdm



--- Loading RAG Components for Evaluation ---
--- Loading RAG Components ---


  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


Embedding model 'all-MiniLM-L6-v2' loaded successfully.
FAISS vector store loaded successfully!


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu


Text generation LLM (t5-small) loaded successfully.
--- All RAG Components Loaded ---

--- RAG Components Loaded Successfully for Evaluation ---

--- Running Qualitative Evaluation Queries ---

--- Evaluating Question 1: 'What common issues are reported with credit card billing?' ---
Retrieving top 5 chunks for query: 'What common issues are reported with credit card billing?'
Retrieved 5 chunks.

Generating answer with LLM...
Generated Answer: Question: What common issues are reported with credit card billing? Question: What common issues are reported with credit card billing? Question: What common issues are reported with credit card billing? Question: What common issues are reported with credit card billing?
Top 1-2 Retrieved Sources (Preview):
  - Source 1 (ID: 9908821, Product: Credit card): have followed all the guidelines of credit card company but being victimized as a courtesy and an ongoing inconvenience i should be issued a monetary ...
  - Source 2 (ID: 12659157, Product: C

In [None]:
#%pip install huggingface_hub[hf_xet]

Collecting hf-xet<2.0.0,>=1.1.2 (from huggingface_hub[hf_xet])
  Downloading hf_xet-1.1.5-cp37-abi3-win_amd64.whl.metadata (883 bytes)
Downloading hf_xet-1.1.5-cp37-abi3-win_amd64.whl (2.7 MB)
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.7 MB ? eta -:--:--
   --- ------------------------------------ 0.3/2.7 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.7 MB 1.9 MB/s eta 0:00:02
   ----------- ---------------------------- 0.8/2.7 MB 1.7 MB/s eta 0:00:02
   ------------------- -------------------- 1.3/2.7 MB 1.6 MB/s eta 0:00:01
   ---------------------- ----------------- 1.6/2.7 MB 1.6 MB/s eta 0:00:01
   ------------------------------ --------- 2.1/2.7 MB 1.7 MB/s eta 0:00:01
   ---------------------------------- ----- 2.4/2.7 MB 1.7 MB/s eta 0:00:01
   ---------------------------------------- 2.7/2.7 MB 1.8 MB/s eta 0:00:00
Installing collected packages: hf-xet
Successfully installed hf-xet-


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


: 