# Task 3: RAG Core Logic & Evaluation

## Objective
Evaluate the quality of the RAG pipeline using realistic business questions. 
Measurement criteria: Relevance of retrieval, Grounding of generation (no hallucinations), and formatting.

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../src'))

from rag_pipeline import RAGPipeline
import pandas as pd
from IPython.display import display, Markdown

In [None]:
# Initialize Pipeline (Loads Vector Store & LLM)
rag = RAGPipeline()

## Qualitative Evaluation Table
We will ask 5 questions covering different products and issues.

In [None]:
questions = [
    {"q": "What are the main issues reported with credit cards?", "filter": {"product": "Credit card"}},
    {"q": "Why are customers complaining about money transfers?", "filter": {"product": "Money transfers"}},
    {"q": "Are there complaints about loan interest rates?", "filter": {"product": "Personal loan"}},
    {"q": "How do customers describe their savings account closure experience?", "filter": {"product": "Savings account"}},
    {"q": "What is the company 'Equifax' being complained about?", "filter": None} # Testing retrieval without product filter if feasible, or strictly sticking to our 5 products. Equifax is usually 'Credit reporting' which we filtered out? Let's check 'Citibank' which is likely in Credit Cards.
    # Better question: 
    {"q": "What are the complaints regarding Citibank credit cards?", "filter": {"product": "Credit card"}}
]

results = []

for item in questions:
    print(f"Processing: {item['q']}...")
    response = rag.answer_question(item['q'], filters=item['filter'])
    
    # Extract top source for table
    top_source = response['sources'][0]['excerpt'] if response['sources'] else "No Source"
    
    results.append({
        "Question": item['q'],
        "Generated Answer": response['answer'],
        "Top Source": top_source,
        "Quality Score (1-5)": 0, # Manual
        "Comments": "To be filled manually"
    })

df_results = pd.DataFrame(results)
display(df_results)

## Markdown Output for Report

In [None]:
print(df_results.to_markdown(index=False))