Topics: Data Modelling and Search Models
* Expansion via multi-query generation
* Task decomposition


In [15]:
import os
import getpass

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [None]:
!pip install -qU langchain-openai

# Expansion (Multi-Query)

In [23]:
# Adapted from https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)
output_parser = StrOutputParser()

generate_queries = (
    prompt_perspectives
    | llm
    | output_parser
    | (lambda x: x.split("\n"))
)
generate_queries.invoke({"question": "What is the capital of Germany?"})

['1. Can you tell me the capital city of Germany?',
 '2. Which city serves as the capital of Germany?',
 '3. What is the official capital of Germany?',
 '4. Where is the governmental center of Germany located?',
 '5. Do you know the primary city that functions as the capital of Germany?']

In [24]:
# Another example of template/prompt for multi-query generation
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)
output_parser = StrOutputParser()

generate_queries = (
    prompt_rag_fusion
    | llm
    | output_parser
    | (lambda x: x.split("\n"))
)

generate_queries.invoke({"question": "What is the geometry of Hessen?"})

['1. What are the geographical features of Hessen?',
 '2. How is the land area of Hessen distributed?',
 '3. What are the boundaries of Hessen?',
 '4. What is the topography of Hessen like?']

# Decomposition

In [28]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""

llm = ChatOpenAI(temperature=0)
output_parser = StrOutputParser()

prompt_for_decomposition = ChatPromptTemplate.from_template(template)

decompose_problem = ( prompt_for_decomposition | llm | output_parser | (lambda x: x.split("\n")))

question1 = "What is the capital of Germany?"
question2 = "Which cities are north of Germany?"
question3 = "What is the geometry of Germany?"
question4 = "What is the population of Germany?"
question5 = "What are hotels near the central train station in MÃ¼nster, Germany?"

#decompose_problem.invoke({"question": question1})
#decompose_problem.invoke({"question": question2})
decompose_problem.invoke({"question": question4})

['1. What is the current population of Germany in 2021?',
 '2. How has the population of Germany changed over the past decade?',
 '3. What are the factors influencing the population growth or decline in Germany?']