In [None]:
# notebooks/task3_rag_evaluation.ipynb

# Cell 1: Imports
import pandas as pd
import numpy as np
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import warnings
warnings.filterwarnings('ignore')

print("Libraries loaded")

In [None]:
# Cell 2: Load vector store
print("Loading vector store...")
client = chromadb.PersistentClient(path="../vector_store")
collection = client.get_collection("complaints")
print(f"Documents in collection: {collection.count()}")

In [None]:
# Cell 3: Initialize embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Embedding model loaded")

In [None]:
# Cell 4: RAG retriever function
def retrieve_documents(query, k=5):
    """Retrieve relevant complaint documents"""
    results = collection.query(
        query_texts=[query],
        n_results=k,
        include=["documents", "metadatas"]
    )
    return results

In [None]:
# Cell 5: Generate answer (simulated LLM)
def generate_answer(query, retrieved_docs):
    """Generate answer from retrieved documents"""
    context = "\n".join([f"- {doc[:200]}..." for doc in retrieved_docs['documents'][0]])
    
    answer = f"Based on {len(retrieved_docs['documents'][0])} customer complaints:\n"
    answer += f"Query: {query}\n\n"
    answer += "Key issues identified:\n"
    
    # Simple analysis (replace with real LLM)
    for i, doc in enumerate(retrieved_docs['documents'][0][:3]):
        product = retrieved_docs['metadatas'][0][i].get('product', 'Unknown')
        answer += f"{i+1}. {product}: {doc[:100]}...\n"
    
    return answer

In [None]:
# Cell 6: Evaluation questions
eval_questions = [
    "Why are people unhappy with Credit Cards?",
    "What are main complaints about Personal Loans?",
    "What issues with Savings Accounts?",
    "What problems with Money Transfers?",
    "Which product has most billing complaints?",
    "What delays in money transfers?",
    "What credit card fees annoy customers?",
    "Why loan applications get denied?"
]

print(f"{len(eval_questions)} evaluation questions defined")

In [None]:
# Cell 7: Run evaluation
results = []

for question in eval_questions:
    # Retrieve
    retrieved = retrieve_documents(question, k=3)
    
    # Generate
    answer = generate_answer(question, retrieved)
    
    # Get source info
    sources = []
    for meta in retrieved['metadatas'][0][:2]:
        sources.append(f"{meta.get('product', 'Unknown')}: {meta.get('issue', 'N/A')}")
    
    # Manual quality score (1-5)
    score = min(5, len(retrieved['documents'][0]) + 2)  # Simple scoring
    
    results.append({
        "Question": question,
        "Generated Answer": answer[:200] + "..." if len(answer) > 200 else answer,
        "Retrieved Sources": ", ".join(sources[:2]),
        "Quality Score": score,
        "Comments": f"Retrieved {len(retrieved['documents'][0])} relevant documents"
    })

print("Evaluation complete")

In [None]:
# Cell 8: Create evaluation table
results_df = pd.DataFrame(results)
print("Evaluation Table:")
print(results_df.to_string(index=False))

In [None]:
# Cell 9: Save results
results_df.to_csv("../data/processed/rag_evaluation_results.csv", index=False)
print("Results saved to ../data/processed/rag_evaluation_results.csv")

In [None]:
# Cell 10: Test specific queries
test_queries = [
    "credit card payment issues",
    "money transfer delays 2023",
    "loan application problems"
]

print("\nTest Queries Results:")
for query in test_queries:
    retrieved = retrieve_documents(query, k=2)
    print(f"\nQuery: {query}")
    print(f"Found {len(retrieved['documents'][0])} documents")
    for i, doc in enumerate(retrieved['documents'][0][:2]):
        print(f"  Doc {i+1}: {doc[:80]}...")