# **Evaluating RAG systems following Revised Bloom's Taxonomy**

# Prepare Environment

In [None]:
# Clone Github repo
!git clone https://github.com/chains229/RAG-Evaluation

In [None]:
cd /kaggle/working/RAG-Evaluation

In [None]:
# Install required libraries
!pip install -r requirements.txt

In [None]:
# Prepare HuggingFace token
import os
os.environ['HUGGINGFACE_TOKEN'] = 'your_huggingface_token'
from huggingface_hub import login

# Log in to Hugging Face using the token
login(token=os.getenv('HUGGINGFACE_TOKEN'))

In [None]:
# Prepare API key to Gemini
import google.generativeai as genai
os.environ["GOOGLE_API_KEY"] = "your_api_key"
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# Prepare Data

In [None]:
'''
IN CASE YOU DON'T HAVE THE DATABASE FOLDER, run this to create the knowledge database.
Otherwise, skip this cell and move to the one below.
You will have to modify all code in this repo if you use different data. Also this Kaggle dataset is currently private.
'''

from database.embedding import create_faiss_index
import os

input_csvs = ["/kaggle/input/virag-bloom-v1/single_doc_07_v3.csv"]
input_pdf_folders = ["/kaggle/input/virag-bloom-v1/anpc849-st-viragbloom-gui-v2-main/anpc849-st-viragbloom-gui-v2-main/law_domain_part1",
              "/kaggle/input/virag-bloom-v1/anpc849-st-viragbloom-gui-v2-main/anpc849-st-viragbloom-gui-v2-main/law_domain_part2"]
input_pdfs = []
for i in input_pdf_folders:
    input_pdfs.extend([os.path.join(i, f) for f in os.listdir(i)])
model = "intfloat/multilingual-e5-base"
save_path = "/kaggle/input/virag-bloom-v1/full_document.txt"
chunk_size = 256
chunk_overlap = 100

create_faiss_index(input_csvs, input_pdfs,  chunk_size=chunk_size, chunk_overlap=chunk_overlap, saved_path=save_path,model=model, output_folder = "Database")

# Evaluation

In [None]:
!python3 main.py \
    --model_name your_huggingface_model \
    --data_path path_to_your_csv_file_data \
    --faiss_folder path_to_your_faiss_folder \
    --level level_to_evaluate \
    --demo true