# Start Your Server
### run the following in terminal to start the API server

`make start-server`

This server will allow us to save our experiment data.

# Running RAG Evaluations

The process of evaluating your RAG platform consists of the following steps and (Outputs):

1. Get data for our RAG to search (Documents)
2. Create a dataset of questions and ground truths (Q-A Dataset)
3. Use your RAG-LLM system to answer the questions (Completions, Context)
4. Use eval tools to calculate metrics based on previous output (Metrics)

## *BioASQ-QA: A manually curated corpus for Biomedical Question Answering*
For this example we will download a dataset built for RAG Q/A evaluation, which will save us some time.

| Description            | Link                                                                 |
|------------------------|----------------------------------------------------------------------|
| Information about dataset | [Nature Article > scientific data > data descriptors ](https://www.nature.com/articles/s41597-023-02068-4#) |
| Download source        | [Hugging Face Datasets](https://huggingface.co/datasets/rag-datasets/rag-mini-bioasq) |


In [None]:
from logging import getLogger
logger = getLogger(__name__)
logger.setLevel("DEBUG")

from datasets import Dataset, load_dataset
from llama_index.core.schema import TextNode

from eval_scripts.utils import post_dataset, post_qasets, post_documents
from eval_data.models import DatasetType, DocumentType, QASetType



# 1. Get data from HuggingFace

In [None]:
# Create a new Dataset in our db
dataset = DatasetType(
    name="BioASQ",
    description="Manually curated set of biomedical Documents, Questions, and Answers",
)

result = post_dataset(dataset.to_dict())
if result.get("id"):
    dataset_id = result["id"]
elif result.get("error"):
    raise Exception(result["error"])
else:
    print(result)
    raise Exception("Failed to create dataset")

print(f"Created dataset with id: {dataset_id}")

# 2. Get a ready QA set from hugging face

The BioASQ dataset contains a Q/A set already, which is very valuable for our evaluation because it is a curated set of data.

In [None]:
# For deubbing, set the dataset_id to the id of the dataset you want to update
dataset_id=6

In [None]:
logger.info("Loading QA dataset")
dataset = load_dataset("rag-datasets/rag-mini-bioasq", "question-answer-passages")['test']

qaset = QASetType(
    dataset_id=dataset_id,
    name="BioASQ Question Answer Set",
    location=dataset.cache_files[0]["filename"] # This will be a location of a '.arrow' file
)


In [None]:
result = post_qasets(qaset.to_dict())
if result.get("id"):
    qaset_id = result["id"]
elif result.get("error"):
    raise Exception(result["error"])
else:
    print(result)
    raise Exception("Failed to create QA set")

print(f"Created QA Set(s): {qaset_id}")

# Index the documents

In [None]:
from datasets import load_dataset

from eval_scripts.utils import build_query_engine

# Load the documents from HuggingFace data
logger.info("Loading dataset text corpus")
doc_loader = load_dataset("rag-datasets/rag-mini-bioasq", "text-corpus")

# Compose nodes for llama_index.core.index.VectorStoreIndex
#logger.info("Constructing Documents for query engine")
#documents = [ TextNode(text=doc["passage"], id_=doc["id"]) for doc in doc_loader["passages"] ]

# Create a VectorStoreIndex
# This will run embeddings on all the documents and persist the index to disk
query_engine = build_query_engine(doc_loader["passages"])

# 3. Answer the evaluation questions

### If necessary, load the query engine from disk

In [None]:
from eval_scripts.utils import load_query_engine

# Load the query engine from a persisted index
query_engine = load_query_engine()


### Generate responses using the query engine

In [None]:
from eval_scripts.ragas_complete import generate_responses

BATCH_SIZE = 10
for i in range(0, len(dataset['question']), BATCH_SIZE):
    responses = generate_responses(query_engine, dataset['question'][i:i+BATCH_SIZE], test_answers=None)
    print(responses)



In [None]:
from datasets import load_dataset
from eval_scripts.ragas_complete import generate_responses
from eval_scripts.utils import build_query_engine, count_tokens
import math

# Load the documents from HuggingFace data
logger.info("Loading dataset text corpus")
doc_loader = load_dataset("rag-datasets/rag-mini-bioasq", "text-corpus")
    

# Compose nodes for llama_index.core.index.VectorStoreIndex
#logger.info("Constructing Documents for query engine")
#documents = [ TextNode(text=doc["passage"], id_=doc["id"]) for doc in doc_loader["passages"] ]

# Create a VectorStoreIndex
# This will run embeddings on all the documents
query_engine = build_query_engine(doc_loader["passages"])




In [None]:


test_questions = dataset["question"]

if "ground_truth" in dataset.column_names:
    test_answers = dataset["ground_truth"]
else:
    test_answers = dataset["answer"]


result_ds = generate_responses(query_engine1, test_questions, test_answers)

result_ds.save_to_disk(PATH_OUT)