In [1]:
import os
from dotenv import load_dotenv

# Load all environment variables from .env file
load_dotenv()

## LLM
openai_api_key = os.getenv('OPENAI_API_KEY')

## Pinecone Vector Database
pinecone_api_key = os.getenv('PINECONE_API_KEY')

In [2]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)


  from tqdm.autonotebook import tqdm


In [3]:
import time

index_name = "rag-multi-query-rag-fusion-index" # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [5]:
# Load blog
import bs4
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Pinecone
from pprint import pprint

#### INDEXING ####

# Load Document (Uploading one file at a time)
pdf_file_path = "./data/langchain_turing.pdf"
loader = PyPDFLoader(pdf_file_path)

docs = loader.load()

# Upload muiltiple PDF files from a directory
# pdf_file_paths = <enter your path here>
# loader = PyPDFDirectoryLoader(pdf_file_paths)

# docs_dir = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=2000, 
    chunk_overlap=500)

# Make splits
splits = text_splitter.split_documents(docs)

# Index
vectorstore = Pinecone.from_documents(
    documents=splits, 
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"), 
    index_name=index_name
)


In [8]:
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 5, "score_threshold": 0.5},
)

# RAG Fusion

![RAG Fusion](./images/multi_query_rag_fusion.png)

Link - https://medium.com/towards-data-science/forget-rag-the-future-is-rag-fusion-1147298d8ad1

In [6]:
from pydantic import BaseModel

class Question(BaseModel):
    generated_questions: list[str]


In [10]:
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

def generate_questions(question):
    template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
    Generate multiple search queries related to: {question} \n
    Output (4 queries):"""
    prompt_rag_fusion = ChatPromptTemplate.from_template(template)

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)


    generated_questions_prompt = prompt_rag_fusion.invoke(
        {"question": question}
    )
    llm_with_structured_output = llm.with_structured_output(Question)


    generated_questions = llm_with_structured_output.invoke(generated_questions_prompt)

    return generated_questions.generated_questions

In [11]:
all_docs_retriever = generate_questions | retriever.map()

question = "How does LangChain leverage modular components like LangGraph, LangSmith, and LangServe to address challenges in building scalable and secure LLM-powered applications?"

all_docs = all_docs_retriever.invoke(question)


In [12]:
all_docs

[[Document(metadata={'author': '', 'creationdate': '2024-11-06T10:08:55+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2024-11-06T10:08:55+00:00', 'page': 4.0, 'page_label': '5', 'producer': 'pdfTeX-1.40.26', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'source': './data/langchain_turing.pdf', 'subject': '', 'title': '', 'total_pages': 14.0, 'trapped': '/False'}, page_content='LangChain 5\nand relevance. RAG allows models to access up-to-date information, extending\ntheir capabilities beyond their training data. LangChain’s RAG implementation\nuses:\n– Document Loaders and Text Splitters: Preprocess documents for in-\ndexing and efficient retrieval [6].\n– Embedding Models and Vector Stores: Enable similarity-based re-\ntrieval by embedding documents into vector spaces. LangChain integrates\nwithvectorstoragesolutionslikeChromaandMilvusforoptimizedsearches[3].\n– Retrievers and RAG Chains: Retrieve a

## What is RRF?

Reciprocal Rank Fusion is a method for combining multiple ranked lists into a single, improved ranked list. It's particularly useful in RAG (Retrieval Augmented Generation) systems when you have multiple ways of retrieving relevant documents and want to combine their results optimally.

How the code works:

### 1. Function Input:
- results: A list of lists, where each inner list contains ranked documents
- k: A constant (default=60) that acts as a smoothing factor to prevent high rankings from dominating the final score
```bash
def reciprocal_rank_fusion(results: list[list], k=60):
```

### 2. Score Calculation:

The core formula is: score = 1 / (rank + k)

For example, if k=60:
- Document at rank 0 gets score: 1/60 = 0.0167
- Document at rank 1 gets score: 1/61 ≈ 0.0164
- Document at rank 2 gets score: 1/62 ≈ 0.0161

And so on...
```bash
fused_scores = {}
for docs in results:
    for rank, doc in enumerate(docs):
        doc_str = dumps(doc)
        if doc_str not in fused_scores:
            fused_scores[doc_str] = 0
        fused_scores[doc_str] += 1 / (rank + k)
```

### 3. Process Flow:
- For each ranked list in the input:
    - For each document in the list:
        - Convert document to string (for dictionary key)
        - Add its reciprocal rank score to any existing score
- Documents appearing in multiple lists accumulate scores

### 4. Final Reranking:
```bash
reranked_results = [
    (loads(doc), score)
    for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
]
```

- Sorts documents by their accumulated scores
- Converts document strings back to their original format
- Returns list of (document, score) tuples in descending order


**Example:** 

List 1: [Doc A, Doc B, Doc C]

List 2: [Doc B, Doc C, Doc A]

**After RRF:**

Doc A: 1/60 (from List 1) + 1/62 (from List 2) = 0.0334

Doc B: 1/61 (from List 1) + 1/60 (from List 2) = 0.0331

Doc C: 1/62 (from List 1) + 1/61 (from List 2) = 0.0325


In [22]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] = previous_score +  (1 / (rank + k))

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    sorted_items = sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    reranked_results = [(loads(doc), score) for doc, score in sorted_items]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

In [23]:
reranked_docs = reciprocal_rank_fusion(all_docs, 60)

In [24]:
reranked_docs

[(Document(metadata={'author': '', 'creationdate': '2024-11-06T10:08:55+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2024-11-06T10:08:55+00:00', 'page': 1.0, 'page_label': '2', 'producer': 'pdfTeX-1.40.26', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'source': './data/langchain_turing.pdf', 'subject': '', 'title': '', 'total_pages': 14.0, 'trapped': '/False'}, page_content='2 Vasilios Mavroudis\nstateful, and contextually aware applications with ease. Its suite of compo-\nnents—including LangGraph for stateful process modeling, LangServe for scal-\nableAPIdeployment,andLangSmithformonitoringandevaluation—collectively\nform a comprehensive toolkit for leveraging LLMs effectively [3].\nLangChain facilitates the integration of LLMs into a wide array of applica-\ntions, empowering developers to create solutions that are not only functional\nbut also efficient and secure. Its support for features like 

## Why is this useful?
- **Robustness**: Combines evidence from multiple ranking systems
- **Smoothing**: The k parameter prevents individual high rankings from dominating
- **Diversity**: Documents that appear high in multiple lists get higher final scores
- **Normalization**: The reciprocal rank helps normalize scores across different ranking methods

This is particularly valuable in RAG systems where you might have:
- Different embedding models
- Different retrieval strategies (semantic search, keyword search, etc.)
- Different chunking strategies
- Different similarity metrics

By combining these different approaches, you often get better overall results than using any single method alone.

In [25]:
final_retrieval_chain = generate_questions | retriever.map() | reciprocal_rank_fusion

question = "How does LangChain leverage modular components like LangGraph, LangSmith, and LangServe to address challenges in building scalable and secure LLM-powered applications?"

context = final_retrieval_chain.invoke(question)

len(context)


9

In [26]:
context

[(Document(metadata={'author': '', 'creationdate': '2024-11-06T10:08:55+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2024-11-06T10:08:55+00:00', 'page': 1.0, 'page_label': '2', 'producer': 'pdfTeX-1.40.26', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'source': './data/langchain_turing.pdf', 'subject': '', 'title': '', 'total_pages': 14.0, 'trapped': '/False'}, page_content='2 Vasilios Mavroudis\nstateful, and contextually aware applications with ease. Its suite of compo-\nnents—including LangGraph for stateful process modeling, LangServe for scal-\nableAPIdeployment,andLangSmithformonitoringandevaluation—collectively\nform a comprehensive toolkit for leveraging LLMs effectively [3].\nLangChain facilitates the integration of LLMs into a wide array of applica-\ntions, empowering developers to create solutions that are not only functional\nbut also efficient and secure. Its support for features like 

In [31]:
from langchain_core.messages import HumanMessage, SystemMessage

def generate_response(question):
    rag_system_prompt = """
        You are an AI language model assistant. Your task is to answer the user question based on the provided context.

        Context:
        {context}
    """

    rag_user_prompt = """
        Question: {question}
    """
    context = final_retrieval_chain.invoke(question)

    rag_system_message = SystemMessage(content=rag_system_prompt.format(context=context))
    rag_user_message = HumanMessage(content=rag_user_prompt.format(question=question))

    rag_prompt = [rag_system_message , rag_user_message]

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

    response = llm.invoke(rag_prompt)

    return response

In [32]:
response = generate_response(question)

In [34]:
from IPython.display import Markdown

Markdown(response.content)

LangChain leverages its modular components—LangGraph, LangSmith, and LangServe—to effectively address the challenges of building scalable and secure applications powered by large language models (LLMs) in several ways:

1. **LangGraph**:
   - **Stateful Process Modeling**: LangGraph enables developers to structure applications using nodes and edges, facilitating complex branching and multi-agent workflows. This modularity allows for the creation of sophisticated applications that can manage state effectively, which is crucial for maintaining context in LLM interactions.
   - **Integration with LangChain**: It can utilize LangSmith’s tracing capabilities to monitor performance and capture detailed logs of agent interactions, enhancing observability and debugging.

2. **LangSmith**:
   - **Monitoring and Evaluation**: LangSmith provides tools for real-time performance monitoring, error tracking, and version control, which are essential for optimizing applications iteratively. This helps developers maintain high standards and quickly address issues as they arise.
   - **Tracing and Performance Testing**: It offers detailed visibility into how applications interact with LLMs and external data sources, allowing developers to log interactions and analyze performance. This capability is vital for identifying bottlenecks and ensuring that applications meet expected outcomes.
   - **Dataset Management**: LangSmith supports the creation and management of datasets for testing, which is crucial for validating the performance of LLM applications under real-world conditions.

3. **LangServe**:
   - **API Deployment**: LangServe simplifies the process of deploying LangChain applications as scalable REST APIs, making LLM models accessible for various services. This API-centric design allows for flexible interactions and supports diverse use cases, from chatbots to complex multi-agent systems.
   - **Scalability and Load Balancing**: It includes built-in support for scalability, enabling applications to handle high traffic volumes through load balancing and auto-scaling features. This ensures consistent performance and low response times, which are critical for production environments.
   - **Latency and Error Management**: LangServe is designed to minimize latency and includes error handling mechanisms to maintain reliability, making it suitable for mission-critical applications.

Overall, the modular architecture of LangChain allows developers to configure, extend, and deploy applications tailored to specific needs while addressing complexities related to scalability and security. By integrating these components, LangChain provides a comprehensive toolkit that enhances the development lifecycle of LLM applications, ensuring they are both efficient and secure.