In [77]:
import os
from dotenv import load_dotenv

# Load all environment variables from .env file
load_dotenv()

## LLM
openai_api_key = os.getenv('OPENAI_API_KEY')

## Pinecone Vector Database
pinecone_api_key = os.getenv('PINECONE_API_KEY')

In [78]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=pinecone_api_key)


In [79]:
import time

index_name = "rag-decomposition-index" # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [80]:
# Load blog
import bs4
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Pinecone
from pprint import pprint

#### INDEXING ####

# Load Document (Uploading one file at a time)
pdf_file_path = "./data/langchain_turing.pdf"
loader = PyPDFLoader(pdf_file_path)

docs = loader.load()

# Upload muiltiple PDF files from a directory
# pdf_file_paths = <enter your path here>
# loader = PyPDFDirectoryLoader(pdf_file_paths)

# docs_dir = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=2000, 
    chunk_overlap=500)

# Make splits
splits = text_splitter.split_documents(docs)

# Index
vectorstore = Pinecone.from_documents(
    documents=splits, 
    embedding=OpenAIEmbeddings(model="text-embedding-3-small"), 
    index_name=index_name
)


In [81]:
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 5, "score_threshold": 0.5},
)

# RAG Decomposition

Link -
- https://arxiv.org/pdf/2205.10625
- https://arxiv.org/pdf/2212.10509

## 1. Answer recursively


![REcursion](./images/rag_decompostion_recursion.png)

In [82]:
from pydantic import BaseModel

class Question(BaseModel):
    generated_questions: list[str]

In [83]:
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

def generate_questions(question):
    template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
    The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
    Generate multiple search queries related to: {question} \n
    Output (3 queries):"""
    prompt_decomposition = ChatPromptTemplate.from_template(template)

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)


    generated_questions_prompt = prompt_decomposition.invoke(
        {"question": question}
    )
    llm_with_structured_output = llm.with_structured_output(Question)


    generated_questions = llm_with_structured_output.invoke(generated_questions_prompt)

    return generated_questions.generated_questions

In [84]:
question = "How does LangChain leverage modular components like LangGraph, LangSmith, and LangServe to address challenges in building scalable and secure LLM-powered applications?"
decompostion_questions = generate_questions(question)
decompostion_questions

['What are the key features of LangGraph and how do they contribute to building scalable LLM-powered applications?',
 'How does LangSmith enhance the security of applications built using LangChain?',
 'What role does LangServe play in the deployment of LLM-powered applications and how does it ensure scalability?']

In [65]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs if any:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n


Use the above context and any background question + answer pairs to answer the question: \n {question}

Then based on the generated answer create a conhesive answer that draws from the responses of all the question + answer pairs and the answer to the latest question.

Provide you answer:
"""

recursion_prompt = ChatPromptTemplate.from_template(template)

In [85]:
def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

llm = ChatOpenAI(model = "gpt-4o-mini", temperature=1)



In [86]:
def retrieve_and_rag(decompostion_questions):
    
    answer = "" 
    for q in decompostion_questions:
        q_a_pairs = ""
        context = retriever.invoke(q)

        if q_a_pairs == "":
            q_a_pairs = "NO QA Pairs for now"

        answer = llm.invoke(recursion_prompt.invoke({"context" : context, "question" : q, "q_a_pairs" : q_a_pairs}))

        q_a_pairs = format_qa_pair(q, answer.content)
        q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pairs
        
    return answer.content
    


In [61]:
answer = retrieve_and_rag(decompostion_questions)

In [63]:
from IPython.display import Markdown

Markdown(answer)

The use of modular components in Large Language Model (LLM) application development offers several significant benefits:

1. **Flexibility and Customization**: Modular components allow developers to tailor applications to specific requirements easily. This adaptability enables diverse workflows and functionalities, facilitating the integration of various tools as needed for specific tasks.

2. **Simplified Development Cycle**: With a modular architecture, developers can focus on individual components without needing to understand the entire system at once. This approach simplifies the stages of development, productionization, and deployment, streamlining the development lifecycle.

3. **Reusability**: Modules can be reused across different projects, reducing redundancy and fostering efficient resource management. This capability accelerates development timelines as developers can leverage pre-existing components instead of starting from scratch.

4. **Scalability**: Modular designs generally support better scalability. Developers can add or replace components to meet increasing user demands or to incorporate new technologies without overhauling the entire application.

5. **Focused Optimization and Monitoring**: Components like LangSmith enable detailed performance monitoring and evaluation of specific modules. This allows developers to track issues, optimize individual components, and improve the overall functionality of the application iteratively.

6. **Enhanced Collaboration**: A modular approach allows teams to work on different components concurrently, facilitating collaboration among developers. This parallel development can lead to faster delivery of application updates and features.

7. **Security Controls**: Modular components can introduce targeted security measures. For instance, specific modules can be designed to handle sensitive tasks with increased security protocols, thereby minimizing the risks associated with data exposure and third-party dependencies.

Overall, the modular architecture, as exemplified by frameworks like LangChain, significantly enhances the ability of developers to create scalable, efficient, and contextually aware LLM applications while addressing the complexities of integration and security. 

In summary, while LangChain offers a comprehensive toolkit for LLM application development with its modular components, it also introduces certain complexities, such as a steep learning curve and security considerations related to external integrations. Thus, effective training and best practices are essential for developers to fully leverage the benefits of this modularity while mitigating potential challenges.

## 2. Answer Individually

![Answer Individually](./images/rag_decomposition_individual.png)

In [87]:
decompostion_questions

['What are the key features of LangGraph and how do they contribute to building scalable LLM-powered applications?',
 'How does LangSmith enhance the security of applications built using LangChain?',
 'What role does LangServe play in the deployment of LLM-powered applications and how does it ensure scalability?']

In [88]:
question

'How does LangChain leverage modular components like LangGraph, LangSmith, and LangServe to address challenges in building scalable and secure LLM-powered applications?'

In [89]:
def retrieve_and_rag(decompostion_questions):
    rag_results = []

    for q in decompostion_questions:
        context = retriever.invoke(q)
        
        template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
        Context: {context}
        Question: {question} 
        Answer: 
        """

        prompt_rag = ChatPromptTemplate.from_template(template)


        answer = llm.invoke(prompt_rag.invoke({"context" : context, "question" : q}))

        rag_results.append({"question" : q, "answer" : answer.content})

    return rag_results



In [90]:
results = retrieve_and_rag(decompostion_questions)

results

[{'question': 'What are the key features of LangGraph and how do they contribute to building scalable LLM-powered applications?',
  'answer': 'LangGraph offers key features such as support for cycles and branching in workflows, built-in persistence for state management, and human-in-the-loop capabilities. These features enable developers to construct flexible, stateful applications that can handle complex interactions and maintain continuity across sessions. By facilitating iterative logic and real-time updates, LangGraph enhances the scalability and adaptability of LLM-powered applications.'},
 {'question': 'How does LangSmith enhance the security of applications built using LangChain?',
  'answer': 'LangSmith enhances the security of applications built using LangChain by providing detailed logging and monitoring capabilities, which enable developers to track application usage and detect anomalies in real-time. It supports auditability and the principle of least privilege by allowing 

In [91]:
def format_qa_pairs(results):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, result in enumerate(results, start=1):
        formatted_string += f"\n\nQuestion {i}: {result['question']}\n\nAnswer {i}: {result['answer']}\n\n"
    return formatted_string.strip()


context = format_qa_pairs(results)


In [92]:
from IPython.display import Markdown

Markdown(context)

Question 1: What are the key features of LangGraph and how do they contribute to building scalable LLM-powered applications?

Answer 1: LangGraph offers key features such as support for cycles and branching in workflows, built-in persistence for state management, and human-in-the-loop capabilities. These features enable developers to construct flexible, stateful applications that can handle complex interactions and maintain continuity across sessions. By facilitating iterative logic and real-time updates, LangGraph enhances the scalability and adaptability of LLM-powered applications.



Question 2: How does LangSmith enhance the security of applications built using LangChain?

Answer 2: LangSmith enhances the security of applications built using LangChain by providing detailed logging and monitoring capabilities, which enable developers to track application usage and detect anomalies in real-time. It supports auditability and the principle of least privilege by allowing fine-grained permission controls, minimizing the risk of unauthorized actions. Additionally, it employs sandboxing and layered security measures to protect sensitive data and limit exposure to vulnerabilities.



Question 3: What role does LangServe play in the deployment of LLM-powered applications and how does it ensure scalability?

Answer 3: LangServe facilitates the deployment of LLM-powered applications as scalable REST APIs, allowing for production-grade interactions with external systems. It ensures scalability by supporting load balancing, handling multiple API requests simultaneously, and providing auto-scaling features to adjust resources dynamically based on traffic demands. This makes LangServe suitable for high-demand production environments while maintaining consistent performance.

In [93]:
question

'How does LangChain leverage modular components like LangGraph, LangSmith, and LangServe to address challenges in building scalable and secure LLM-powered applications?'

In [94]:
# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

answer = llm.invoke(prompt.invoke({"context" : context, "question" : question}))



In [95]:
Markdown(answer.content)

LangChain leverages modular components such as LangGraph, LangSmith, and LangServe to effectively address challenges in building scalable and secure LLM-powered applications through their distinct yet complementary functionalities.

LangGraph enhances scalability by providing features such as support for cycles and branching in workflows, built-in persistence for state management, and human-in-the-loop capabilities. These features allow developers to create flexible and stateful applications capable of managing complex interactions and maintaining continuity across user sessions. By facilitating iterative logic and real-time updates, LangGraph ensures that applications can adapt and scale efficiently to meet varying user demands.

LangSmith contributes to security by implementing robust logging and monitoring capabilities, enabling developers to track application usage and detect anomalies in real-time. Its support for auditability and fine-grained permission controls minimizes the risk of unauthorized actions, while sandboxing and layered security measures protect sensitive data and reduce exposure to vulnerabilities. This emphasis on security is crucial for maintaining user trust and compliance in increasingly data-sensitive environments.

LangServe plays a vital role in the deployment aspect, converting LLM-powered applications into scalable REST APIs. It ensures high scalability by supporting load balancing and managing multiple API requests simultaneously, along with auto-scaling features that dynamically adjust resources based on traffic demands. This capability makes LangServe ideal for operating in high-demand production scenarios while providing consistent application performance.

Together, these modular components of LangChain create a cohesive ecosystem that addresses the intertwined challenges of scalability and security, allowing developers to build robust LLM-powered applications that can handle complex functionalities and maintain high standards of data protection.