In [1]:
import warnings
warnings.filterwarnings("ignore")

import shap
import numpy as np
import tomllib
from pathlib import Path
import numpy as np
from src.modules.loader.statspearls_data_loader import StatPearlsDataLoader
from src.modules.loader.medmcqa_data_loader import MedMCQADataLoader
from src.modules.loader.medmcqa_data_loader import format_medmcqa_question
from src.modules.loader.data_loader import DataLoader
from src.modules.llm.llm_client import LLMClient
from src.modules.rag.colbert_rag_engine import NativeColbertRAGEngine
from src.modules.rag.multihop_rag_engine import MultiHopRAGEngine

This is a seperate Notebook for using ColBERT with Ragatouille to avoid any dependency conflicts with other parts of the project.

In [2]:
project_root = next((p for p in [Path.cwd()] + list(Path.cwd().parents) if (p / 'src').exists()), None)

config_path = project_root / "config.toml"
config = {}

if config_path.exists():
    with open(config_path, "rb") as f:
        config = tomllib.load(f)

medmcqa_config = config.get("medmcqa") or {}
rag_config = config.get("rag") or {}
llm_config = config.get("llm") or {}

llm_model = llm_config.get("model")
llm_provider = llm_config.get("provider")

question_ids = medmcqa_config.get("kg_capable") or {}

client = LLMClient(provider=llm_provider, model_name=llm_model)

SPLIT = medmcqa_config.get("split", "val")
PERSIST_DIR = project_root / "data" / "vector_db_statpearls"
NUM_HOPS = rag_config.get('n_hops', 2)
NUM_DOCUMENTS = 50

In [3]:
stat_loader = StatPearlsDataLoader(root_dir=str(project_root / "data"))
documents, stats = stat_loader.setup()

rag_engine = NativeColbertRAGEngine(persist_dir=str(PERSIST_DIR))
rag_engine.setup(documents=documents, k=50)

multi_hop = MultiHopRAGEngine(rag_engine=rag_engine, llm_client=client, num_hops=2)

Initializing Native ColBERT Engine...
No existing index found at /home/nilspoethkow/Code/Uni/XAI/xai-rag/data/vector_db_statpearls. Please call setup() to index documents.
Indexing 13922 documents...
Encoding doc 3771/13922

KeyboardInterrupt: 