# Hybrid Local DSPy RAG with Ollama

This notebook implements a French RAG (Retrieval-Augmented Generation) system using:
- Hybrid retrieval (CamemBERT + BM25)
- DSPy for orchestration
- Mistral via Ollama for language generation
- Metal (MPS) acceleration for Mac
- MLX for optimized computation

In [1]:
# 1. Install required libraries
!pip install mlx mlx-torch
!pip install transformers rank_bm25 nltk torch dspy-ai --quiet
!pip install sentencepiece==0.1.99
!pip install ollama
!pip install --upgrade dspy-ai

import nltk
nltk.download('punkt')

Collecting mlx
  Using cached mlx-0.24.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (5.3 kB)
[31mERROR: Could not find a version that satisfies the requirement mlx-torch (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for mlx-torch[0m[31m
[0mCollecting sentencepiece==0.1.99
  Using cached sentencepiece-0.1.99-cp312-cp312-macosx_14_0_arm64.whl
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
Collecting ollama
  Using cached ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)
Using cached ollama-0.4.7-py3-none-any.whl (13 kB)
Installing collected packages: ollama
Successfully installed ollama-0.4.7


[nltk_data] Downloading package punkt to /Users/jeanbapt/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# 2. Load CamemBERT and tokenizer
from transformers import CamembertTokenizer, CamembertModel
import torch

# Check for Metal support
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

tokenizer = CamembertTokenizer.from_pretrained("camembert/camembert-base")
model = CamembertModel.from_pretrained("camembert/camembert-base")
model = model.to(device)
model.eval()

In [3]:
# 3. Sample French documents
documents = [
    "Le changement climatique menace la biodiversité.",
    "Les énergies renouvelables sont essentielles pour l'avenir.",
    "L'intelligence artificielle transforme l'industrie.",
    "La culture française est riche et diversifiée.",
    "Les océans jouent un rôle crucial dans la régulation du climat."
]

In [4]:
# 4. ColBERT-style encoder
def encode(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.squeeze(0), inputs["attention_mask"].squeeze(0)

# Encode all documents
colbert_index = [(doc, *encode(doc)) for doc in documents]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [5]:
# 5. BM25 Retriever
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize

tokenized_corpus = [word_tokenize(doc.lower()) for doc in documents]
bm25 = BM25Okapi(tokenized_corpus)

In [6]:
# 6. Hybrid Retriever
import torch.nn.functional as F

def colbert_score(query, k=3):
    q_embed, q_mask = encode(query)
    scores = []
    for doc, d_embed, d_mask in colbert_index:
        sim = torch.einsum('id,jd->ij', q_embed, d_embed)
        maxsim = sim.max(dim=1).values
        score = maxsim.mean().item()
        scores.append((doc, score))
    return sorted(scores, key=lambda x: -x[1])[:k]

def bm25_score(query, k=3):
    tokenized_query = word_tokenize(query.lower())
    scores = bm25.get_scores(tokenized_query)
    ranked = sorted(enumerate(scores), key=lambda x: -x[1])[:k]
    return [(documents[i], scores[i]) for i, _ in ranked]

def hybrid_score(query, alpha=0.5, k=3):
    colbert_results = dict(colbert_score(query, k=10))
    bm25_results = dict(bm25_score(query, k=10))
    combined = {}
    for doc in set(colbert_results) | set(bm25_results):
        c = colbert_results.get(doc, 0)
        b = bm25_results.get(doc, 0)
        combined[doc] = alpha * c + (1 - alpha) * b
    return sorted(combined.items(), key=lambda x: -x[1])[:k]

In [7]:
# 7. Try a French query
query = "Quel est l'impact du climat sur la nature ?"
for doc, score in hybrid_score(query, alpha=0.6):
    print(f"{score:.4f} - {doc}")

12.3794 - Les océans jouent un rôle crucial dans la régulation du climat.
11.4298 - La culture française est riche et diversifiée.
10.8376 - Les énergies renouvelables sont essentielles pour l'avenir.


# 8. DSPy Integration with Mistral via Ollama

In [8]:
# Wrap Mistral (Ollama) as DSPy LM
import dspy
import ollama
import time

class MistralOllamaLM(dspy.LM):
    def __init__(self, max_retries=3, timeout=30):
        super().__init__(model='mistral')
        self.max_retries = max_retries
        self.timeout = timeout
        self.client = ollama.Client()

    def __call__(self, prompt, **kwargs):
        for attempt in range(self.max_retries):
            try:
                response = self.client.chat(
                    model='mistral',
                    messages=[{"role": "user", "content": prompt}],
                    options={"timeout": self.timeout * 1000}
                )
                return response['message']['content']
            except Exception as e:
                if attempt == self.max_retries - 1:
                    raise Exception(f"Failed to get response from Ollama after {self.max_retries} attempts: {str(e)}")
                print(f"Attempt {attempt + 1} failed, retrying...")
                time.sleep(1)

lm = MistralOllamaLM()
dspy.settings.configure(lm=lm)

In [9]:
# DSPy Retriever wrapper using hybrid_score
from dspy.retrieve import Retrieve

class HybridDSPyRetriever(Retrieve):
    def __init__(self, alpha=0.6, k=3):
        super().__init__()
        self.alpha = alpha
        self.k = k

    def retrieve(self, query, k=None):
        results = hybrid_score(query, alpha=self.alpha, k=k or self.k)
        return [dspy.Passage(text=doc, score=score) for doc, score in results]

retriever = HybridDSPyRetriever()

In [10]:
# DSPy Pipeline Implementation
from dspy import Module, InputField, OutputField, Signature, Example

class HybridRetriever(Module):
    def __init__(self, hybrid_score_function, alpha=0.6, k=3):
        super().__init__()
        self.hybrid_score_function = hybrid_score_function
        self.alpha = alpha
        self.k = k

    def forward(self, query):
        results = self.hybrid_score_function(query, alpha=self.alpha, k=self.k)
        return results

class FrenchQAPipeline(Module):
    def __init__(self, retriever):
        super().__init__()
        self.retriever = retriever
        
    def forward(self, question):
        # Retrieve relevant passages
        passages = self.retriever(question)
        
        # Format context from passages
        context = "\n".join([doc for doc, _ in passages])
        
        # Create prompt for Mistral
        prompt = f"""Contexte:
{context}

Question: {question}

Réponse:"""
        
        # Get answer from Mistral
        response = lm(prompt)
        return response

# Initialize the pipeline
retriever = HybridRetriever(hybrid_score_function=hybrid_score)
qa_pipeline = FrenchQAPipeline(retriever)

# Test the pipeline
question = "Quel est l'impact du climat sur la nature ?"
answer = qa_pipeline(question)
print(f"Question: {question}\nRéponse: {answer}")

Question: Quel est l'impact du climat sur la nature ?
Réponse:  Le changement de climat a un impact important sur la nature car il modifie les habitats, les cycles biologiques, les espèces végétales et animales et peut engendrer des disparitions d'espèces ou des migrations vers d'autres régions plus adaptées. Les océans, en particulier, jouent un rôle crucial dans ce contexte car ils absorbent une partie importante des gaz à effet de serre, régulent le climat et abritent la moitié de la biodiversité planétaire.
