<a href="https://colab.research.google.com/github/aurioldegbelo/sis2025/blob/main/2025_SIS_Demo_2_Prompts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Topics: Data Modelling and Search Models
* Expansion via multi-query generation
* Task decomposition


In [2]:
import os
import getpass

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [4]:
!pip install -qU langchain-openai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/81.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/467.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m460.8/467.2 kB[0m [31m20.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m467.2/467.2 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.3.27 requires langchain-core<1.0.0,>=0.3.72, but you have langchain-core 1.0.0 which is incompatible.[0m[31m
[0m

# Expansion (Multi-Query)

In [9]:
# Adapted from https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
output_parser = StrOutputParser()

generate_queries = (
    prompt_perspectives
    | llm
    | output_parser
    | (lambda x: x.split("\n"))
)
generate_queries.invoke({"question": "What is the capital of Germany?"})

['1. Can you tell me the capital city of Germany?',
 '2. Which city serves as the capital of Germany?',
 '3. What is the main city that functions as the capital of Germany?',
 '4. Do you know the capital of Germany?',
 '5. Could you provide information on the capital city of Germany?']

In [10]:
# Another example of template/prompt for multi-query generation
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)
output_parser = StrOutputParser()

generate_queries = (
    prompt_rag_fusion
    | llm
    | output_parser
    | (lambda x: x.split("\n"))
)

generate_queries.invoke({"question": "What is the geometry of Hessen?"})

['1. What are the geographical features of Hessen?',
 '2. How is the landscape of Hessen shaped?',
 '3. What are the boundaries of Hessen?',
 '4. What is the topography of Hessen like?']

# Decomposition

In [12]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answered in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
output_parser = StrOutputParser()

prompt_for_decomposition = ChatPromptTemplate.from_template(template)

decompose_problem = prompt_for_decomposition | llm | output_parser | (lambda x: x.split("\n"))

question1 = "What is the capital of Germany?"
question2 = "Which cities are north of Germany?"
question3 = "What is the geometry of Germany?"
question4 = "What is the population of Germany?"
question5 = "What are hotels near the central train station in Münster, Germany?"

#decompose_problem.invoke({"question": question1})
decompose_problem.invoke({"question": question2})
#decompose_problem.invoke({"question": question5})

['1. What are the northernmost cities in Europe?',
 '2. Which cities in Scandinavia are located north of Germany?',
 '3. Are there any major cities in the Baltic states that are north of Germany?']