https://deepwiki.com/search/i-want-to-learn-about-multihop_56271052-8b15-41df-9499-8d4e2562d7ab

In [None]:
%pip install -q datasets transformers pytorch-lightning matplotlib seaborn dspy

In [None]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
print(f"API Key: {api_key}")


gpt_4o_mini = "openai/gpt-4o-mini"
gpt_41_nano = "openai/gpt-4.1-nano"

In [None]:
import dspy  
  
# Set up your language model  
lm = dspy.LM(gpt_41_nano)  # You can change this to your preferred model  
dspy.settings.configure(lm=lm)  

retriever = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')  # This is the example URL from the docs  
dspy.settings.configure(lm=lm, rm=retriever)  


# Define the MultiHop module  
class MultiHop(dspy.Module):  
    def __init__(self, passages_per_hop=3):  
        super().__init__()  
        self.retrieve = dspy.Retrieve(k=passages_per_hop)  
        self.generate_query = dspy.ChainOfThought("context, question -> search_query")  
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")  
  
    def forward(self, question):  
        context = []  
        for hop in range(2):  # Two hops  
            query = self.generate_query(context=context, question=question).search_query  
            context += self.retrieve(query).passages  
        return dspy.Prediction(  
            context=context,  
            answer=self.generate_answer(context=context, question=question).answer,  
        )  
  
# Create an instance of the MultiHop module  
multihop = MultiHop(passages_per_hop=10)  
  
# Function to process user queries  
def answer_question(user_question):  
    result = multihop(user_question)  
    return {  
        "question": user_question,  
        "answer": result.answer,  
        "context": result.context  
    }  
  
# Example usage with user input  
# user_question = "Where is yoga capital of the world and how far it is from the nearest airport?"
user_question = "Which airline has concept of skycouch, which country is it from"
response = answer_question(user_question)  
print(f"Question: {response['question']}")  
print(f"Answer: {response['answer']}")  
print("\nSupporting context:")  
for i, passage in enumerate(response['context']):  
    print(f"{i+1}. {passage[:100]}...")

In [None]:
import dspy  
  
# Set up your language model  
lm = dspy.LM(gpt_41_nano)  # You can change this to your preferred model  
  
# Set up ColBERTv2 retriever  
retriever = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')  # This is the example URL from the docs  
  
# Configure both LM and RM  
dspy.settings.configure(lm=lm, rm=retriever)  
  
# Define the RetrieveMultiHop module  
class RetrieveMultiHop(dspy.Module):  
    def __init__(self, passages_per_hop=5, max_hops=3):  
        super().__init__()  
        self.passages_per_hop = passages_per_hop  
        self.max_hops = max_hops  
        self.retrieve = dspy.Retrieve(k=passages_per_hop)  
          
        # Query generation for each hop  
        self.generate_initial_query = dspy.ChainOfThought("question -> search_query")  
        self.generate_followup_query = dspy.ChainOfThought("question, context, summaries -> search_query")  
          
        # Summarization after each hop  
        self.summarize = dspy.ChainOfThought("question, passages -> summary")  
          
        # Final answer generation  
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")  
          
        # Hop controller - decides if we need more hops  
        self.need_more_hops = dspy.ChainOfThought("question, context, summaries, current_hop, max_hops -> continue_searching: bool, reasoning: str")  
      
    def forward(self, question):  
        context = []  
        summaries = []  
        all_queries = []  
          
        # First hop - always performed  
        initial_query = self.generate_initial_query(question=question).search_query  
        all_queries.append(initial_query)  
          
        hop1_docs = self.retrieve(initial_query).passages  
        context.extend(hop1_docs)  
          
        # Summarize the first hop results  
        if hop1_docs:  
            summary = self.summarize(question=question, passages=hop1_docs).summary  
            summaries.append(summary)  
          
        # Subsequent hops - performed conditionally  
        current_hop = 1  
        while current_hop < self.max_hops:  
            # Check if we need more hops  
            continue_decision = self.need_more_hops(  
                question=question,  
                context=context,  
                summaries=summaries,  
                current_hop=current_hop,  
                max_hops=self.max_hops  
            )  
              
            if not continue_decision.continue_searching:  
                break  
                  
            # Generate query for the next hop based on accumulated context and summaries  
            next_query = self.generate_followup_query(  
                question=question,  
                context=context,  
                summaries=summaries  
            ).search_query  
            all_queries.append(next_query)  
              
            # Retrieve documents for this hop  
            hop_docs = self.retrieve(next_query).passages  
            context.extend(hop_docs)  
              
            # Summarize this hop's results  
            if hop_docs:  
                summary = self.summarize(question=question, passages=hop_docs).summary  
                summaries.append(summary)  
              
            current_hop += 1  
          
        # Generate the final answer  
        answer = self.generate_answer(context=context, question=question).answer  
          
        return dspy.Prediction(  
            context=context,  
            answer=answer,  
            summaries=summaries,  
            queries=all_queries,  
            hops_performed=current_hop  
        )  
  
# Create an instance of the RetrieveMultiHop module  
multihop = RetrieveMultiHop(passages_per_hop=10, max_hops=3)  
  
# Function to process user queries  
def answer_question(user_question):  
    result = multihop(user_question)  
    return {  
        "question": user_question,  
        "answer": result.answer,  
        "context": result.context,  
        "summaries": result.summaries,  
        "queries": result.queries,  
        "hops_performed": result.hops_performed  
    }  
  
# Example usage with user input  
# user_question = "Where is yoga capital of the world and how far it is from the nearest airport?"  
user_question = "Who wrote 'The Art of Computer Programming', which university he is from, and how far it is from Meta headquarters?"  
response = answer_question(user_question)  
print(f"Question: {response['question']}")  
print(f"Answer: {response['answer']}")  
print(f"\nHops performed: {response['hops_performed']}")  
print(f"\nQueries used:")  
for i, query in enumerate(response['queries']):  
    print(f"Hop {i+1}: {query}")  
print(f"\nSummaries:")  
for i, summary in enumerate(response['summaries']):  
    print(f"Hop {i+1} Summary: {summary}")  
print("\nSupporting context (sample):")  
for i, passage in enumerate(response['context'][:3]):  
    print(f"{i+1}. {passage[:100]}...")