# Ragas Usage Demo (End-to-End)

This notebook shows how to use a Crucible-generated Ragas export and run a complete evaluation loop.

## What this covers
1. Load exported `ragas` config JSON from Crucible.
2. Generate actual answers from your app/model endpoint.
3. Build a `datasets.Dataset`.
4. Run Ragas metrics and inspect scores.


In [None]:
# If needed, uncomment and run once:
# %pip install -U ragas datasets pandas requests


In [None]:
import json
from pathlib import Path

import pandas as pd
import requests
from datasets import Dataset

from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall


## Usage Demo: Load Crucible export

Set this path to the downloaded `.json` file from Crucible (output format: `ragas`).

In [None]:
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR if (NOTEBOOK_DIR / 'backend').exists() else NOTEBOOK_DIR.parent
DOWNLOADS_DIR = PROJECT_ROOT / 'downloads'

# Option 1: set explicit filename
# RAGAS_EXPORT_PATH = DOWNLOADS_DIR / 'crucible_rag_ragas_YYYYMMDD_HHMMSS.json'

# Option 2: auto-pick latest ragas export
candidates = sorted(DOWNLOADS_DIR.glob('crucible_*_ragas_*.json'))
RAGAS_EXPORT_PATH = candidates[-1] if candidates else DOWNLOADS_DIR / 'missing-ragas-export.json'

if not RAGAS_EXPORT_PATH.exists():
    raise FileNotFoundError(f'Update RAGAS_EXPORT_PATH first: {RAGAS_EXPORT_PATH}')

raw = json.loads(RAGAS_EXPORT_PATH.read_text())
raw.keys()


## Usage Demo: Generate real answers from your app

Replace `call_app` to hit your real endpoint.

Expected return shape (minimum):
- `answer`: model/app response text
- `contexts`: list of retrieved chunks used for the response


In [None]:
APP_API_URL = 'http://localhost:8000/chat'  # change to your app endpoint

def call_app(question: str):
    """
    Replace this with your actual app call.
    Demo supports two shapes:
      1) {'answer': '...', 'contexts': ['...']}
      2) plain text response (contexts fallback to empty list)
    """
    resp = requests.post(APP_API_URL, json={'question': question}, timeout=30)
    resp.raise_for_status()

    data = resp.json()
    if isinstance(data, dict) and 'answer' in data:
        return data.get('answer', ''), data.get('contexts', []) or []

    return str(data), []


In [None]:
questions = raw['question']
ground_truth = raw['ground_truth']

answers = []
contexts = []

for q in questions:
    a, c = call_app(q)
    answers.append(a)
    contexts.append(c if isinstance(c, list) else [str(c)])

len(answers), len(contexts)


## Build dataset and run Ragas

In [None]:
df = pd.DataFrame({
    'question': questions,
    'answer': answers,
    'contexts': contexts,
    'ground_truth': ground_truth,
})

dataset = Dataset.from_pandas(df)

results = evaluate(
    dataset=dataset,
    metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
)

results


In [None]:
# Persist summary + row-level data
summary = results.to_pandas()
summary.head()


In [None]:
OUT_DIR = PROJECT_ROOT / 'outputs'
OUT_DIR.mkdir(parents=True, exist_ok=True)

summary_path = OUT_DIR / 'ragas_results.csv'
summary.to_csv(summary_path, index=False)
summary_path
