<a href="https://colab.research.google.com/github/eericheva/langchain_rag/blob/dev/tutorials/start_here.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Basic and simple first RAG tutorial
- *the same but in as python script format: [`start_here.py`](https://github.com/eericheva/langchain_rag/blob/main/tutorials/start_here.py)*
- V1: FULL RAG = RETRIEVER + GENERATOR with **`create_stuff_documents_chain`** from `langchain.chains.combine_documents` and **`create_retrieval_chain`** from `langchain.chains.retrieval`
- V2: FULL RAG = RETRIEVER + GENERATOR with **`RetrievalQA.from_chain_type`** from `langchain.chains.retrieval_qa.base`
- V3: FULL RAG = RETRIEVER + GENERATOR with **Runnable Sequences**

*Full repo with RAG hints and scripts [eericheva/langchain_rag](https://github.com/eericheva/langchain_rag/tree/main)*

In [2]:
!pip install langchain langchain_community langchain_core
!pip install huggingface_hub
!pip install sentence-transformers
!pip install pypdf
!pip install faiss-cpu



In [3]:
# import shutil
# shutil.rmtree('/HDD/models/HuggingFaceH4')
# shutil.rmtree('/HDD/models/intfloat')
# os.makedirs("/langchain_rag_data/raw_docs")

# import os
# from tqdm import tqdm
# from operator import itemgetter
# import logging
# from google.colab import userdata
# import inspect

In [4]:
# from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain.chains.retrieval import create_retrieval_chain
# from langchain.chains.retrieval_qa.base import RetrievalQA
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
# from langchain_community.vectorstores import FAISS
# from langchain_community.document_loaders import pdf
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.prompts import PromptTemplate
# from langchain_core.runnables import RunnableLambda
# from langchain_text_splitters import RecursiveCharacterTextSplitter

# from huggingface_hub import hf_hub_download, snapshot_download

In [5]:
import logging

########### LOGER ###########
logger = logging.getLogger("langchain_rag")
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
    fmt="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.handlers.clear()  # to avoid doubling in logger output
logger.addHandler(handler)
logger.propagate = False  # to avoid doubling in logger output

# INITIAL SETUP

Setup `Config` with your tokens, key and setup params: vectorstore type models, local paths, models etc

In [6]:
import os
from google.colab import userdata

########### KEYS AND TOKENS ###########
# (Optional) LangSmith for closely monitor and evaluate your application. https://docs.smith.langchain.com/
# go to the https://smith.langchain.com/settings, and create your oun LANGCHAIN_API_KEY
LANGCHAIN_API_KEY = userdata.get("LANGCHAIN_API_KEY")
# (Optional) If you want to use OpenAI models,
# go to the https://platform.openai.com/api-keys, and create your oun OPENAI_API_KEY
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")
# (Optional) If you want to use HuggingFaceHub:
# go to the https://huggingface.co/settings/tokens, and create your oun HUGGINGFACEHUB_API_TOKEN
HUGGINGFACEHUB_API_TOKEN = userdata.get("HUGGINGFACEHUB_API_TOKEN")

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [7]:
import inspect
import os

from huggingface_hub import hf_hub_download, snapshot_download

class Config:
    ########### SETUP ###########
    source_path = "/langchain_rag_data"
    RELOAD_VECTORSTORE = False  # True if you want to recreate new vector store with new embedding or new documents
    # False, if you want to restore vectorstore from dump
    DEVICE_EMB = "cpu"  # "cpu" stands for cpu, "cuda:1"
    DEVICE_GEN = 1  # -1 stands for cpu

    VECTORSTORE2USE = "FAISS"  # "FAISS", "CHROMA"
    # models = repo names from hugginface_hub
    HF_EMB_MODEL = "google/gemma-2b-it"  # model for embedding documents
    HF_LLM_NAME = "google/gemma-2b-it"  # model for generate answer
    # answer

    MYQ = "What is in my documets base?"

    ########### PATHS ###########
    this_project_path = os.getcwd()
    # here you store raw documents, you shold put some files there
    DOC_SOURCE = os.path.join(this_project_path, source_path, "raw_docs/")

    # following will be loaded automaticly
    # here your models is or will be stored
    MODEL_SOURCE = "/HDD/models/"
    # here pickle with dump of your stored documents will be stored
    DOC_LOADER_FILE = os.path.join(this_project_path, source_path, "data/MyDocs.pickle")
    # here vectorstore will be stored
    VECTORSTORE_FILE = os.path.join(
        this_project_path,
        source_path,
        f"data/MyDocs.{VECTORSTORE2USE}{HF_EMB_MODEL.split('/')[0]}.vectorstore",
    )

    # download models from huggingface_hub locally
    if HF_EMB_MODEL.endswith(".gguf"): # if your want to use quantized model vertion
        if not os.path.exists(os.path.join(MODEL_SOURCE, HF_EMB_MODEL)):
            hf_hub_download(
                repo_id="/".join(HF_EMB_MODEL.split("/")[:-1]),
                filename=HF_EMB_MODEL.split("/")[-1],
                local_dir=os.path.join(MODEL_SOURCE, HF_EMB_MODEL),
                token=HUGGINGFACEHUB_API_TOKEN,
                force_download=True,
            )
    else:
        if not os.path.exists(os.path.join(MODEL_SOURCE, HF_EMB_MODEL)):
            snapshot_download(
                repo_id=HF_EMB_MODEL,
                local_dir=os.path.join(MODEL_SOURCE, HF_EMB_MODEL),
                token=HUGGINGFACEHUB_API_TOKEN,
                force_download=True,
            )
            RELOAD_VECTORSTORE = True
    if HF_LLM_NAME.endswith(".gguf"): # if your want to use quantized model vertion
        if not os.path.exists(os.path.join(MODEL_SOURCE, HF_LLM_NAME)):
            hf_hub_download(
                repo_id="/".join(HF_LLM_NAME.split("/")[:-1]),
                filename=HF_LLM_NAME.split("/")[-1],
                local_dir=os.path.join(MODEL_SOURCE, HF_LLM_NAME),
                token=HUGGINGFACEHUB_API_TOKEN,
                force_download=True,
            )
    else:
        if not os.path.exists(os.path.join(MODEL_SOURCE, HF_LLM_NAME)):
            snapshot_download(
                repo_id=HF_LLM_NAME,
                local_dir=os.path.join(MODEL_SOURCE, HF_LLM_NAME),
                token=HUGGINGFACEHUB_API_TOKEN,
                force_download=True,
            )


# ########### LOGGING WHOLE SETUP ###########
def print_config():
    for i in inspect.getmembers(Config):
        if (not i[0].startswith("_")) and (not inspect.ismethod(i[1])):
            print(f"{i[0]} : {i[1]}")

print_config()

DEVICE_EMB : cpu
DEVICE_GEN : 1
DOC_LOADER_FILE : /langchain_rag_data/data/MyDocs.pickle
DOC_SOURCE : /langchain_rag_data/raw_docs/
HF_EMB_MODEL : google/gemma-2b-it
HF_LLM_NAME : google/gemma-2b-it
MODEL_SOURCE : /HDD/models/
MYQ : What is in my documets base?
RELOAD_VECTORSTORE : False
VECTORSTORE2USE : FAISS
VECTORSTORE_FILE : /langchain_rag_data/data/MyDocs.FAISSgoogle.vectorstore
source_path : /langchain_rag_data
this_project_path : /content


# EMBEDDING MODEL
Load model for embedding documents

In [8]:
from langchain_community.embeddings import HuggingFaceEmbeddings

############## EMBEDDING MODEL ##############
# Load model for embedding documents
logger.info(f"LLM_EMB : {Config.HF_EMB_MODEL}")
# llm_emb = create_llm_emb_default()
# OR This return:
llm_emb = HuggingFaceEmbeddings(
    # https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface
    # .HuggingFaceEmbeddings.html
    model_name=os.path.join(Config.MODEL_SOURCE, Config.HF_EMB_MODEL),
    model_kwargs={
        # full list of parameters for this section with explanation:
        # https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html
        # #sentence_transformers.SentenceTransformer
        "device": "cpu"
    },
    encode_kwargs={
        # full list of parameters for this section with explanation:
        # https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html
        # #sentence_transformers.SentenceTransformer.encode
        "normalize_embeddings": False
    },
)

2024-07-13 10:24:51 - INFO - LLM_EMB : google/gemma-2b-it
  warn_deprecated(
`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# GENERATOR MODEL
Load model for generating answer

In [9]:
llm_gen = llm_emb
# from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
# ############## GENERATOR MODEL ##############
# # Load model for generating answer
# logger.info(f"LLM : {Config.HF_LLM_NAME}")
# # llm_gen = create_llm_gen_default()
# # OR This returns:
# llm_gen = HuggingFacePipeline.from_model_id(
#     # https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_pipeline
#     # .HuggingFacePipeline.html
#     model_id=os.path.join(Config.MODEL_SOURCE, Config.HF_LLM_NAME),
#     task="text-generation",
#     device=-1,  # -1 stands for CPU
#     pipeline_kwargs={
#         # full list of parameters for this section with explanation:
#         # https://huggingface.co/docs/transformers/en/main_classes/text_generation
#         # Note: some of them (depends on the specific model) should go to the model_kwargs attribute
#         "max_new_tokens": 512,  # How long could be generated answer
#         "return_full_text": False,
#         # "return_full_text": True if you want to return within generation answer also all prompts,
#         # contexts and other serving instrumentals
#     },
#     model_kwargs={
#         # full list of parameters for this section with explanation:
#         # https://huggingface.co/docs/transformers/en/main_classes/text_generation
#         # Note: some of them (depends on the specific model) should go to the pipeline_kwargs attribute
#         "do_sample": True,
#         "top_k": 10,
#         "temperature": 0.0,
#         "repetition_penalty": 1.03,  # 1.0 means no penalty
#         "max_length": 20,
#     },
# )


# LOAD DOCUMENTS BASE
Create new vectorstore (FAISS)

In [10]:
import pickle
from langchain_community.document_loaders import pdf

############## LOAD DOCUMENTS BASE ##############
# Create new vectorstore (FAISS)
logger.info("#### RELOAD_VECTORSTORE ####")
# Load Documents
docs = []
logger.info("#### LOAD RAW DOCS ####")
for file_name in os.listdir(Config.DOC_SOURCE):
    fp = os.path.join(Config.DOC_SOURCE, file_name)

    docs += pdf.PyPDFLoader(fp).load() # this contains list of texts from my documents base

logger.info(f"dump raw docs to {Config.DOC_LOADER_FILE} file")
pickle.dump(docs, open(Config.DOC_LOADER_FILE, "wb"))

2024-07-13 10:25:36 - INFO - #### RELOAD_VECTORSTORE ####
2024-07-13 10:25:36 - INFO - #### LOAD RAW DOCS ####
2024-07-13 10:25:38 - INFO - dump raw docs to /langchain_rag_data/data/MyDocs.pickle file


# TEXT SPLITTER FOR DOCUMENTS
Split documents to chunks, retriever will search through embedded chunks, not whole documents

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

############## TEXT SPLITTER FOR DOCUMENTS ##############
# split documents to chunks, retriever will search through embedded chunks, not whole documents
logger.info("Split")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=5000,  # num of characters in single chunk
    chunk_overlap=200,  # num of characters to appear in neighborous chunks
)
splits = text_splitter.split_documents(docs)
del docs  # for gc
logger.info(f"Num of splits : {len(splits)}")

2024-07-13 10:25:38 - INFO - Split
2024-07-13 10:25:39 - INFO - Num of splits : 65


# VECTORSTORE FOR EMBEDDINGS
Create vector store FAISS

In [None]:
from tqdm import tqdm
from langchain_community.vectorstores import FAISS

############## VECTORSTORE FOR EMBEDDINGS ##############
# create vector store FAISS
# https://python.langchain.com/v0.1/docs/integrations/vectorstores/faiss/
# for Num of splits : 700 will take Time : ~60min
logger.info("vectorstore FAISS")
# do whole work in one approach (Note: FAISS has no verbose parameter)
# vectorstore = FAISS.from_documents(documents=splits,
#                                    embedding=llm_emb)
# add progress bar to FAISS creating procedure, to see some verbose:
vectorstore = FAISS.from_documents(
    documents=[splits[0]], embedding=llm_emb  # here we provide our embedding model
)
splits = splits[1:]
for d in tqdm(splits, desc="vectorstore FAISS documents"):
    vectorstore.add_documents([d])
del splits  # for gc

2024-07-13 10:25:39 - INFO - vectorstore FAISS
vectorstore FAISS documents:  55%|█████▍    | 35/64 [19:28<17:11, 35.58s/it]

# SAVE and LOAD FAISS VECTORSTORE
(Note: Chroma has another signature)

In [None]:
# save vectorstore FAISS to the disk (Note: Chroma has another signature)
vectorstore.save_local(Config.VECTORSTORE_FILE)

# load vectorstore FAISS from the disk (Note: Chroma has another signature)
logger.info("vectorstore FAISS from dump")
vectorstore = FAISS.load_local(
    folder_path=Config.VECTORSTORE_FILE,
    embeddings=llm_emb,  # here we provide our embedding model
    allow_dangerous_deserialization=True,  # True for data (docs) with loading from a pickle file.
)

# RETRIEVER MODEL FROM EMBEDDING MODEL

In [None]:
############## RETRIEVER MODEL FROM EMBEDDING MODEL ##############
logger.info("RETRIEVER")
retriever = vectorstore.as_retriever(
    # full list of parameters for this section with explanation:
    # https://api.python.langchain.com/en/latest/vectorstores/langchain_chroma.vectorstores.Chroma.html
    # #langchain_chroma.vectorstores.Chroma.as_retriever
    search_type="similarity",
    search_kwargs={
        "k": 4
    },  # return top-4 relevant (according to search_type) documents for single query
)
del vectorstore  # for gc

# V1: FULL RAG = RETRIEVER + GENERATOR
- with **`create_stuff_documents_chain`** from `langchain.chains.combine_documents` and **`create_retrieval_chain`** from `langchain.chains.retrieval`




In [None]:
# some serving stuff
def invoke_input_context_answer(chain_invoke_result):
    answer = ""
    answer += "QUESTION: \n"
    answer += chain_invoke_result.get("input")
    answer += "\n\n"
    answer += "BASED DOCUMENTS: \n"
    for d in chain_invoke_result.get("context"):
        answer += (
            d.metadata.get("source").split("/")[-1]
            + ", page : "
            + str(d.metadata.get("page"))
            + "\n"
        )
    answer += "\n\n"
    answer += "ANSWER: \n"
    answer += (
        chain_invoke_result.get("answer").split("*** Helpful Answer***:")[-1].strip()
    )
    return answer

### Prompt

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template_input_context = """
Use the following pieces of context to answer the question at the end.
Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer.
Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {input}

*** Helpful Answer***:
"""

# Prompt
prompt = PromptTemplate(
    template=prompt_template_input_context,
    input_variables=["context", "input"],
)

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

############## V1 FULL RAG = RETRIEVER + GENERATOR ##############
logger.info("Classical RETRIEVER and GENERATOR")
question_answer_chain = create_stuff_documents_chain(llm_gen, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)
logger.info("rag_chain.invoke")
result = chain.invoke({"input": Config.MYQ})
print(invoke_input_context_answer(result))

# V2: FULL RAG = RETRIEVER + GENERATOR
- with **`RetrievalQA.from_chain_type`** from `langchain.chains.retrieval_qa.base`

In [None]:
# some serving stuff
def invoke_query_source_documents_result(chain_invoke_result):
    answer = ""
    answer += "QUESTION: \n"
    answer += chain_invoke_result.get("query")
    answer += "\n\n"
    answer += "BASED DOCUMENTS: \n"
    for d in chain_invoke_result.get("source_documents"):
        answer += (
            d.metadata.get("source").split("/")[-1]
            + ", page : "
            + str(d.metadata.get("page"))
            + "\n"
        )
    answer += "\n\n"
    answer += "ANSWER: \n"
    answer += (
        chain_invoke_result.get("result").split("Generate according to:")[-1].strip()
    )
    return answer

### Not need Prompt, use default

In [None]:
from langchain.chains.retrieval_qa.base import RetrievalQA
############## V2 FULL RAG = RETRIEVER + GENERATOR ##############
logger.info("Classical RETRIEVER and GENERATOR with chain type")
chain = RetrievalQA.from_chain_type(
    llm=llm_gen,
    chain_type="refine",
    retriever=retriever,
    return_source_documents=True,
)
logger.info("RetrievalQA.rag_chain.invoke")
result = chain.invoke({"query": Config.MYQ})
print(invoke_query_source_documents_result(result))

# V3: FULL RAG = RETRIEVER + GENERATOR
- with **Runnable Sequences**

### Generate multiple alternatives to the question formulation


In [None]:
from langchain.load import dumps, loads

# some serving stuff
def invoke_generate_queries_with_origin(input_dict: dict) -> str:
    """Attach Original question to generated Alternatives"""
    # input: queries_result: dict with keys ("question", "alternatives")
    # names of these key were setted up in the variables for calling chain
    question = input_dict.get("question")
    alternatives = input_dict.get("alternatives").replace("\n\n", "\n")
    new_queries = f"Original question: {question}?" + alternatives
    return new_queries

def invoke_unique_docs_union_from_retriever(documents: list[list]) -> list:
    """Unique union of retrieved docs"""
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

### Prompt for multiple alternatives to the question formulation

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_multi_query = """You are an AI language model assistant. Your task is to generate {question_numbers}
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines.

Original question: {question}
"""

# Prompt for multiple alternatives to the question formulation
prompt_multi_query = PromptTemplate(
    template=prompt_multi_query,
    # you can create any imagined prompt as template.
    # Note: if your prompt refers to some variables in formatting type, you should provide these variables
    # names to input_variables parameter
    input_variables=["question", "number_questions"],
)

In [None]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

# Chain for generating multiple alternatives to the question formulation
generate_queries_chain = (
    {
        "question": itemgetter("question"),
        "question_numbers": itemgetter("question_numbers"),
        # my prompt has a variable for number of alternative questions to generate.
        # Actual value will be taken from this.invoke({}) calling from key "question_numbers"
    }
    | prompt_multi_query
    | llm_gen
    | StrOutputParser()
)

### The generate_queries_chain is a pipeline built using LangChain's `RunnableSequence`. How this work?

**Long story short**: output from the previous `RunnableSequence` element is passed as input to the next `RunnableSequence` element. The output type of the previous element must be compatible with the input type of the next element.
  
**Input Data**: The input to the chain is a `dictionary` that contains at least two keys: `question` and `question_numbers`. These values are extracted from the input dictionary using the itemgetter function.
  
**Prompt for Multiple Queries**: The extracted `question` and `question_numbers` are passed to the `prompt_multi_query` function. This function likely formats these inputs into a specific prompt template or prepares them for the language model (LLM).
  
**Language Model (LLM)**: The formatted prompt is then passed to the language model (`llm`). The LLM generates a response based on the input prompt.
  
**Output Parsing**: The response from the LLM is parsed using `StrOutputParser()`. This parser converts the raw output string from the LLM into a more structured format.
  
**Output**: The final output of the chain is the structured response from the LLM, after being parsed by `StrOutputParser()`.
  
This is alternative to:
  ```
  input_dict = {
      "question": "What is the capital of France?",
      "question_numbers": 1
  }
  ```

**Extract Question and Question Numbers**:
  ```
  question = itemgetter("question")(input_dict)
  question_numbers = itemgetter("question_numbers")(input_dict)
  # Create Prompt for Multiple Queries:
  formatted_prompt = prompt_multi_query(question, question_numbers)
  # Generate Response Using LLM:
  llm_response = llm(formatted_prompt)
  # Parse the LLM Response: Output -> parsed_response
  parsed_response = StrOutputParser()(llm_response)
  ```

In [None]:
from langchain_core.runnables import RunnableLambda

# Generate Queries Chain
invoke_generate_queries_chain = (
    # Here we need to pass as input to the invoke_generate_queries_with_origin 2 variables, as keys inside dict:
    # "alternatives" - output from the last step of previous chain (generate_queries_chain),
    # as well as additional var ("question"). Create a dict with them as input to the RunnableLambda
    # We also pass the name for the output of previous chain (generate_queries_chain) as key of the dict
    {"question": itemgetter("question"), "alternatives": generate_queries_chain}
    # To enable function invoke_generate_queries_with_origin to use this dict as input, it should be
    # RunnableLambda
    | RunnableLambda(invoke_generate_queries_with_origin)
)
# to check multiple generated questions:
result = invoke_generate_queries_chain.invoke({"question": Config.MYQ, "question_numbers": 2})
print(result)

### Retrieval Chain for multiple alternatives to the question formulation
    
Retriever will embed input question (as well as my previously generated alternatives) with the same `llm_emb` model as was using for vectorstore and will provide `top_k` documents similar to the question by `search_type`.

Values of `top_k` and `search_type` were provided in calling `vectorstore.as_retriever()` above.

In [None]:
# Retrieval Chain for multiple alternatives to the question formulation
retrieval_chain = (
    # We can attach previous chains as input to the next chain:
    invoke_generate_queries_chain
    # Next step is retriever. Here we need to split str with alternative multiple queries into list to
    # allow retriever to deal with them separatedlly and calling .map() function.
    | (lambda x: x.split("\n"))
    | retriever.map()
    | invoke_unique_docs_union_from_retriever
)
# to check list of retrieved documents
result = retrieval_chain.invoke({"question": Config.MYQ, "question_numbers": 2})
print(result)

### Prompt for generation answer with retriever and generation prompt

In [None]:
from langchain_core.prompts import PromptTemplate

# Prompt for generation answer with retriever and generation prompt
prompt_template_question_context = """
Use the following pieces of context to answer the question at the end.
Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer.
Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {question}

*** Helpful Answer***:
"""

prompt_generation = PromptTemplate(
    template=prompt_template_question_context,
    input_variables=["question", "context"],
)

### RAG Chain
-  Generator (could be another model as for retriever) takes list of retrieved (relevant) documents and generate answer for the qustion according to them.

In [None]:
# RAG Chain
# Generator (could be another model as for retriever) takes list of retrieved (relevant) documents and generate
# answer for the qustion according to them.
rag_chain = (
    {
        "context": retrieval_chain,
        "question": itemgetter("question"),
    }
    # Here again: since prompt_generation takes as input 2 variables with names: context and question,
    # we assign these name to the variables as dict keys.
    # "context" will take value from the output of retrieval_chain
    # "question" will take value from calling this.invoke() with provided "question" key
    | prompt_generation
    | llm_gen
    | StrOutputParser()
)

result = rag_chain.invoke({"question": Config.MYQ, "question_numbers": 2})
print(result)