In [1]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

In [2]:
def add_5(x):
    return x+5
def sub_5(x):
    return x-5
def mul_5(x):
    return x*5

In [3]:
add_5_runnable = RunnableLambda(add_5)
sub_5_runnable = RunnableLambda(sub_5)
mul_5_runnable = RunnableLambda(mul_5)

In [5]:
chain = add_5_runnable.__or__(mul_5_runnable).__or__(sub_5_runnable)

In [6]:
result = chain.invoke(5)
result

45

In [7]:
chain2 = add_5_runnable | sub_5_runnable | mul_5_runnable

In [8]:
chain2.invoke(5)

25

In [10]:
from langchain_core.prompts import PromptTemplate


In [11]:
template = "Give me a small report on {topic}"
prompt = PromptTemplate(template=template, input_variables=["topic"])
prompt

PromptTemplate(input_variables=['topic'], input_types={}, partial_variables={}, template='Give me a small report on {topic}')

In [12]:
from langchain_community.chat_models import ChatOllama

In [13]:
llm = ChatOllama(model="llama3.1:8b")

  llm = ChatOllama(model="llama3.1:8b")


In [14]:
chain = prompt | llm
chain

PromptTemplate(input_variables=['topic'], input_types={}, partial_variables={}, template='Give me a small report on {topic}')
| ChatOllama(model='llama3.1:8b')

In [15]:
chain.invoke("Retrieval Augmentatiion Generation")

AIMessage(content="**Retrieval-Augmented Generation (RAG)**\n\n**Overview:**\nRetrieval-Augmentation-Generation (RAG) is a recent paradigm in natural language processing (NLP) that combines the strengths of two popular techniques: retrieval-based models and generation-based models. This approach has shown impressive results in various downstream tasks, such as question answering, text classification, and machine translation.\n\n**Key Components:**\n\n1. **Retrieval Module:** This module retrieves relevant documents or passages from a large database based on the input query or prompt.\n2. **Augmentation Module:** The retrieved documents are then augmented with additional information, such as context, metadata, or auxiliary tasks, to enhance the model's understanding of the input.\n3. **Generation Module:** Finally, the augmented document is fed into a generation module, which produces the final output.\n\n**Benefits:**\n\n1. **Improved performance**: By leveraging external knowledge and

In [16]:
from langchain_core.output_parsers import StrOutputParser


output_parser = StrOutputParser()

In [17]:
chain = prompt | llm | output_parser
chain.invoke("Retrieval Augmentatiion Generation"
)

"Here's a brief report on Retrieval-Augmented Generation (RAG) models:\n\n**What is RAG?**\n\nRetrieval-Augmented Generation (RAG) is a type of machine learning model that combines the strengths of two approaches: Retrieval-based models and Generative models. RAG models are designed to address the limitations of traditional generative models, which can be prone to hallucinations or generate uninformative text.\n\n**How does RAG work?**\n\nA RAG model typically consists of two main components:\n\n1. **Retrieval component**: This component uses a set of stored documents (e.g., books, articles) and a retrieval algorithm (e.g., BM25, TF-IDF) to identify relevant snippets or passages from the document collection that match the input query or prompt.\n2. **Generation component**: This component generates text based on the retrieved passages, often using a sequence-to-sequence model (e.g., transformer-based models like BART or T5).\n\n**Key benefits of RAG**\n\n1. **Improved accuracy and rele

In [18]:
def extract_fact(x):
    if '\n\n' in x:
        return '\n'.join(x.split('\n\n')[1:])
    return x
old_word_1= 'RAG'
old_word_2= 'Retrieval'
new_word= 'Traditional RAG'
def replace_word(x):
    return x.replace(old_word_1, new_word).replace(old_word_2, new_word)

In [19]:
from IPython.display import display, Markdown


In [20]:
chain  = prompt | llm | output_parser | RunnableLambda(extract_fact) | RunnableLambda(replace_word)
result = chain.invoke("retrieval augmented generation")
display(Markdown(result))

**What is Traditional RAG Augmented Generation (Traditional RAG)?**
Traditional RAG Augmented Generation (Traditional RAG) is a class of neural architectures that combine the strengths of traditional retrievals (e.g., database querying, search engines) with the flexibility and creativity of language models. Traditional RAG systems aim to bridge the gap between explicit data storage and implicit knowledge representation in neural networks.
**Key Components**
1. **Traditional RAG Module**: This module searches a database or knowledge graph for relevant information related to a given query or input prompt.
2. **Generator Module**: A pre-trained language model (e.g., transformer, BART) that generates text based on the retrieved information and the input prompt.
**How it Works**
1. The retrieval module receives an input prompt and searches for relevant documents, entities, or relationships in a database or knowledge graph.
2. The generator module takes the retrieved information and the original input prompt as inputs to generate text that incorporates the retrieved information.
**Advantages**
1. **Improved Accuracy**: Traditional RAG systems can leverage external knowledge to improve accuracy and reduce reliance on pre-existing text generation capabilities.
2. **Increased Contextual Understanding**: By incorporating external knowledge, Traditional RAG models can better understand context and relationships between entities.
3. **Flexibility**: Traditional RAG architectures can be applied to various tasks, such as question answering, text summarization, or conversational dialogue.
**Applications**
1. **Question Answering**: Traditional RAG systems can retrieve relevant documents and generate answers based on the retrieved information.
2. **Text Summarization**: Traditional RAG models can summarize long documents by retrieving key points and generating a concise summary.
3. **Conversational Dialogue**: Traditional RAG systems can engage in conversations by retrieving relevant context and generating responses.
**Challenges**
1. **Knowledge Graph Construction**: Building and maintaining high-quality knowledge graphs is crucial for effective retrieval-augmented generation.
2. **Scalability**: Traditional RAG systems need to handle large-scale data and maintain efficiency as the database or knowledge graph grows.
3. **Evaluation Metrics**: Developing suitable evaluation metrics that capture the strengths of Traditional RAG systems remains an open research challenge.
**State-of-the-Art**
Recent works have shown promising results on various tasks, including question answering (e.g., BART + RETRO) and text summarization (e.g., T5 + Retrieve-Generator). However, more research is needed to fully unlock the potential of retrieval augmented generation.

In [22]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import HuggingFaceEmbeddings

In [25]:
pip install docarray

Collecting docarray
  Downloading docarray-0.41.0-py3-none-any.whl.metadata (36 kB)
Collecting rich>=13.1.0 (from docarray)
  Downloading rich-14.2.0-py3-none-any.whl.metadata (18 kB)
Collecting types-requests>=2.28.11.6 (from docarray)
  Downloading types_requests-2.32.4.20250913-py3-none-any.whl.metadata (2.0 kB)
Collecting markdown-it-py>=2.2.0 (from rich>=13.1.0->docarray)
  Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)
Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich>=13.1.0->docarray)
  Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)
Downloading docarray-0.41.0-py3-none-any.whl (302 kB)
Downloading rich-14.2.0-py3-none-any.whl (243 kB)
Downloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)
Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Downloading types_requests-2.32.4.20250913-py3-none-any.whl (20 kB)
Installing collected packages: types-requests, mdurl, markdown-it-py, rich, docarray
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [36]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
prompt_str = """Using the context provided, answer user's question
context:
{context1}
{context2}
"""
prompt = ChatPromptTemplate.from_messages([SystemMessagePromptTemplate.from_template(prompt_str), HumanMessagePromptTemplate.from_template("{question}")])

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store_a = DocArrayInMemorySearch.from_texts(texts=["half the info is here", "Deepseek-V3 was released in December 2024"], embedding=embeddings)
vector_store_b = DocArrayInMemorySearch.from_texts(texts=["the otehr half of the info is here", "the deepseek-v3 llm is a mixture of experts with 671B parameters"], embedding=embeddings)

In [27]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [37]:
retrieval = RunnableParallel(
    {
        "context1": vector_store_a.as_retriever(),
        "context2": vector_store_b.as_retriever(),
        "question": RunnablePassthrough()
    }
)
print(retrieval)

steps__={'context1': VectorStoreRetriever(tags=['DocArrayInMemorySearch'], vectorstore=<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x14930dbd0>, search_kwargs={}), 'context2': VectorStoreRetriever(tags=['DocArrayInMemorySearch'], vectorstore=<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x14930dd10>, search_kwargs={}), 'question': RunnablePassthrough()}


In [None]:
chain = retrieval | prompt | llm | output_parser


In [44]:
chain.invoke("What architecture does deepseek model released in december uses?")

AIMessage(content='The DeepSeek-V3 model uses a "Mixture of Experts" (MoE) architecture, and it has 671B parameters.', additional_kwargs={}, response_metadata={'model': 'llama3.1:8b', 'created_at': '2025-11-22T09:56:12.800188Z', 'message': {'role': 'assistant', 'content': ''}, 'done': True, 'done_reason': 'stop', 'total_duration': 1462491916, 'load_duration': 116734000, 'prompt_eval_count': 111, 'prompt_eval_duration': 198258959, 'eval_count': 29, 'eval_duration': 1106103667}, id='lc_run--1d8eb1b1-6773-4cac-9316-800bb5e19669-0')