In [1]:
import warnings
warnings.filterwarnings("ignore")

import shap
import numpy as np
import torch
from src.modules.data_loader import DataLoader
from src.modules.llm_client import LLMClient
from src.modules.colbert_rag_engine import NativeColbertRAGEngine
from src.modules.multihop_rag_engine import MultiHopRAGEngine

This is a seperate Notebook for using ColBERT with Ragatouille to avoid any dependency conflicts with other parts of the project.

In [2]:
data_loader = DataLoader()
documents = data_loader.setup()

rag_engine = NativeColbertRAGEngine(documents=documents)
llm_client = LLMClient(provider="ollama", model_name="gemma3:1b")

mh_rag = MultiHopRAGEngine(rag_engine=rag_engine,llm_client=llm_client)

✓ Dataset found at /home/nilspoethkow/Code/Uni/XAI/xai-rag/data/raw/hotpot_test_fullwiki_v1.json
Loading data into memory...
✓ Loaded 7405 questions.
Converting HotPotQA contexts to documents...


Processing Articles: 100%|██████████| 7405/7405 [00:01<00:00, 7184.53it/s] 


✓ Created 73774 context chunks.
Initializing Native ColBERT Engine...
Indexing 73774 documents...


KeyboardInterrupt: 

In [None]:
# select question randomly (with seed)
N = 1
rng = np.random.default_rng(seed=180)

selected_documents = rng.choice(documents, N)

# storing all traces
traces = []

for doc in selected_documents:
    question = doc.metadata.get("question")

    trace = mh_rag.retrieve_documents(query=question, k=5)

    traces.append(trace)

## Problems

- Probably too heavy weight when done with DeepExplainer
- Dependency Hell with Ragatouille so probably only manageable with Mini-ColBert, so own implementation