In [1]:
import sys
import os
import pandas as pd

# Add the project root to system path so we can import from src
# We use os.path.abspath to ensure we get the correct absolute path
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)

try:
    from src.rag_pipeline import RAGService
    print("✅ Successfully imported RAGService from src.")
except ImportError as e:
    print(f"❌ Error importing src: {e}")
    print("Make sure you are running this notebook from the 'notebooks' folder.")

  from .autonotebook import tqdm as notebook_tqdm


✅ Successfully imported RAGService from src.


In [2]:
# Define path to vector store
VECTOR_STORE_PATH = os.path.join(project_root, "vector_store")

print(f"Loading Vector Store from: {VECTOR_STORE_PATH}")

if not os.path.exists(VECTOR_STORE_PATH):
    print("❌ Error: Vector store directory not found.")
    print("Please run '02_chunking_embedding.ipynb' to generate the database first.")
else:
    try:
        # Initialize the service
        # This will load the embedding model and the LLM
        rag_service = RAGService(vector_store_path=VECTOR_STORE_PATH)
        print("✅ RAG Service loaded successfully!")
    except Exception as e:
        print(f"❌ Failed to load RAG Service: {e}")

Loading Vector Store from: d:\10academy\phase5\rag-complaint-chatbot\vector_store
Initializing RAG Service from d:\10academy\phase5\rag-complaint-chatbot\vector_store...
Loading Embeddings...
Loading LLM (flan-t5-small)...


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


LLM loaded.
✅ RAG Service loaded successfully!


In [3]:
# Evaluation Questions relevant to CrediTrust's products
test_questions = [
    "What are the main issues customers have with Money Transfers?",
    "Why are people complaining about Credit Card late fees?",
    "Are there issues with accessing Savings Accounts online?",
    "What specific companies are mentioned in student loan complaints?", # Test specific entity retrieval
    "Tell me about a positive experience." # Test handling of missing info (complaints usually aren't positive)
]

print(f"Prepared {len(test_questions)} questions for evaluation.")

Prepared 5 questions for evaluation.


In [4]:
# Only run if service loaded correctly
if 'rag_service' in locals():
    results = []
    
    print("Starting evaluation loop...")
    
    for i, q in enumerate(test_questions):
        print(f"\n[{i+1}/{len(test_questions)}] Question: {q}")
        
        try:
            # Get answer and sources
            answer, docs = rag_service.answer_question(q)
            
            # Extract info from the first source (if available) for validation
            source_id = docs[0].metadata.get('complaint_id', 'N/A') if docs else "None"
            source_preview = docs[0].page_content[:100].replace('\n', ' ') + "..." if docs else "None"
            
            # Store result
            results.append({
                "Question": q,
                "Generated Answer": answer.strip(),
                "Source 1 ID": source_id,
                "Source 1 Preview": source_preview
            })
            print(f"   -> Answer generated ({len(answer)} chars).")
            
        except Exception as e:
            print(f"   -> Error processing question: {e}")
            results.append({
                "Question": q,
                "Generated Answer": f"ERROR: {str(e)}",
                "Source 1 ID": "ERROR",
                "Source 1 Preview": "ERROR"
            })

    print("\nEvaluation complete.")
else:
    print("Skipping evaluation: RAG Service not loaded.")

Starting evaluation loop...

[1/5] Question: What are the main issues customers have with Money Transfers?
Processing: What are the main issues customers have with Money Transfers?
   -> Answer generated (123 chars).

[2/5] Question: Why are people complaining about Credit Card late fees?
Processing: Why are people complaining about Credit Card late fees?
   -> Answer generated (43 chars).

[3/5] Question: Are there issues with accessing Savings Accounts online?
Processing: Are there issues with accessing Savings Accounts online?
   -> Answer generated (2 chars).

[4/5] Question: What specific companies are mentioned in student loan complaints?
Processing: What specific companies are mentioned in student loan complaints?
   -> Answer generated (36 chars).

[5/5] Question: Tell me about a positive experience.
Processing: Tell me about a positive experience.
   -> Answer generated (9 chars).

Evaluation complete.


In [5]:
if 'results' in locals() and results:
    df_results = pd.DataFrame(results)
    
    # Configure pandas to show full text
    pd.set_option('display.max_colwidth', None)
    
    print("Evaluation Results Summary:")
    display(df_results)
else:
    print("No results to display.")

Evaluation Results Summary:


Unnamed: 0,Question,Generated Answer,Source 1 ID,Source 1 Preview
0,What are the main issues customers have with Money Transfers?,consumers should be able to rely on the information provided by their financial institutions tools to manage their finances,,"problems with money transfer, receiving and sending out...."
1,Why are people complaining about Credit Card late fees?,Lack of notice from the credit card company,,ll and avoided the additional 40.00 late fee and interest. credit card companies are not supposed to...
2,Are there issues with accessing Savings Accounts online?,no,,i recently had . while i tried to access online account and put in wrong info. i received and email ...
3,What specific companies are mentioned in student loan complaints?,consumer financial protection bureau,,"ave there been any recent fair lending violations, lawsuits, or regulatory actions against your inst..."
4,Tell me about a positive experience.,[ID: N/A],,he circumstances and my positive customer history. thank you for your time and consideration....


In [6]:
if 'df_results' in locals():
    # Define output path
    output_path = os.path.join(project_root, "data", "processed", "rag_evaluation_results.csv")
    
    # Ensure directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Save
    df_results.to_csv(output_path, index=False)
    print(f"✅ Evaluation results saved to: {output_path}")

✅ Evaluation results saved to: d:\10academy\phase5\rag-complaint-chatbot\data\processed\rag_evaluation_results.csv
