In [3]:
from ragbench import RAGPipeline, RAGEval, RAGTools
import json

# From YAML Config

In [None]:
!cat ../experiment_configs/sample.yaml

In [None]:
RAGPipeline.run_pipeline_from_yaml('../experiment_configs/sample.yaml')

# Step by Step

### Loading Corpus & Queries and Creating RAG Pipeline

In [4]:
system_prompt = '''Using the information contained in the context, give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
If the answer cannot be deduced from the context, do not generate any response on your own and just say `answer not found`.
'''

context_prompt = '''Context:
{CONTEXT}
---
Now here is the question you need to answer.
{QUERY}
'''

In [6]:
with open('../datasets/multihoprag.json', 'r') as file:
    ds = json.load(file)

In [8]:
rag_pipe = RAGPipeline('../caches/sample_experiment', system_prompt, context_prompt)
ls_docs = ds['corpus']
ls_queries = list(ds['gold_answers'].keys())

### Vectorize

In [None]:
rag_pipe.load_embedding_model_from_hf('thenlper/gte-small')
text_splitter = RAGTools.load_text_splitter_hf_tokenizer(rag_pipe.embedding_tokenizer, 256, 0.1)
ls_chunks = rag_pipe.split_docs(ls_docs, text_splitter)
rag_pipe.prepare_vector_db(ls_chunks)

### Retrieve

In [None]:
ls_rets = rag_pipe.retrieve(ls_queries, 10)

### Rerank

In [9]:
rag_pipe.load_reranker_model_from_hf('colbert-ir/colbertv2.0')
ls_reranked_rets = rag_pipe.rerank(ls_rets, 4)

### Evaluate Retriever

In [None]:
rets, golds = ls_reranked_rets, ds['gold_retrieves']
RAGEval.retrieval_metrics(rets, golds)

# or

# rets, golds = ls_rets, ds['gold_retrieves']
# hit10 = RAGEval.hits_at(10, rets, golds)
# hit4 = RAGEval.hits_at(4, rets, golds)
# map10 = RAGEval.map_at(10, rets, golds)
# mrr10 = RAGEval.mrr_at(10, rets, golds)

### Generate Responses

In [None]:
ls_prompts = rag_pipe.create_prompts(ls_rets)

# Sampling 10 for testing, turn this off for actual experiments
random_queries = list(ls_prompts.keys())[:10]
random_prompts = {k:ls_prompts[k] for k in random_queries}

rag_pipe.load_generator_model_from_openai('gpt-4o-mini', API_KEY)
ls_responses = rag_pipe.generate_responses(random_prompts)

### Evaluate Answer Generation

In [None]:
metric_per_query = RAGEval.generation_metrics(ls_responses, ds['gold_answers'], rag_pipe.embedding_model)