# evaluation.ipynb

In [1]:
# notebooks/evaluation.ipynb

import sys
from pathlib import Path
import pandas as pd

# Add project root so src imports work
project_root = Path("..").resolve()
sys.path.append(str(project_root))

from src.pipeline import run_pipeline




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



# Define Representative Questions

In [2]:
evaluation_questions = [
    "Why are customers complaining about credit card disputes?",
    "What issues do customers report about unauthorized credit card charges?",
    "Are banks resolving credit card fraud complaints effectively?",
    "What problems do customers face with chargebacks and refunds?",
    "Which companies receive the most complaints related to credit cards?",
    "Why are customer accounts being closed without explanation?"
]


# Run the RAG Pipeline

In [3]:
results = []

for question in evaluation_questions:
    answer, sources = run_pipeline(question, k=5)

    results.append({
        "Question": question,
        "Generated Answer": answer,
        "Retrieved Sources": sources[:2]  # show only top 1â€“2
    })


In [6]:
df_eval = pd.DataFrame(results)
df_eval

Unnamed: 0,Question,Generated Answer,Retrieved Sources
0,Why are customers complaining about credit car...,They are unhappy with the card issuers response.,[{'text': 'because a merchant is unhelpful or ...
1,What issues do customers report about unauthor...,They report that they have been charged unauth...,[{'text': 'on or before 2022 numerous unauthor...
2,Are banks resolving credit card fraud complain...,no,"[{'text': 'y other bank as well, which has giv..."
3,What problems do customers face with chargebac...,They are really just stealing money from small...,"[{'text': 'issues or chargebacks.', 'metadata'..."
4,Which companies receive the most complaints re...,card companies and the credit bureaus.,[{'text': 'card companies and the credit burea...
5,Why are customer accounts being closed without...,They are not doing what customers want.,[{'text': 'retain accounts on customers '' tha...


In [7]:
df_eval["Quality Score (1-5)"] = [
    4,
    5,
    3,
    3,
    2,
    2
]

df_eval["Comments / Analysis"] = [
    "Correct summary of dissatisfaction with dispute handling, but could include more specific issues such as unresponsive merchants.",
    "Accurately identifies unauthorized charges and is well-supported by the retrieved complaints.",
    "Answer aligns with the complaints but is too brief and lacks explanation.",
    "Captures frustration but uses emotionally charged language and lacks clear structure.",
    "Too generic; does not name specific companies despite such information being present.",
    "Vague and oversimplified explanation with weak grounding in the retrieved context."
]

df_eval


Unnamed: 0,Question,Generated Answer,Retrieved Sources,Quality Score (1-5),Comments / Analysis
0,Why are customers complaining about credit car...,They are unhappy with the card issuers response.,[{'text': 'because a merchant is unhelpful or ...,4,Correct summary of dissatisfaction with disput...
1,What issues do customers report about unauthor...,They report that they have been charged unauth...,[{'text': 'on or before 2022 numerous unauthor...,5,Accurately identifies unauthorized charges and...
2,Are banks resolving credit card fraud complain...,no,"[{'text': 'y other bank as well, which has giv...",3,Answer aligns with the complaints but is too b...
3,What problems do customers face with chargebac...,They are really just stealing money from small...,"[{'text': 'issues or chargebacks.', 'metadata'...",3,Captures frustration but uses emotionally char...
4,Which companies receive the most complaints re...,card companies and the credit bureaus.,[{'text': 'card companies and the credit burea...,2,Too generic; does not name specific companies ...
5,Why are customer accounts being closed without...,They are not doing what customers want.,[{'text': 'retain accounts on customers '' tha...,2,Vague and oversimplified explanation with weak...


In [17]:
df_eval.to_csv("evaluation_results.csv", index=False)
