## Loading Env

In [None]:
from dotenv import load_dotenv

load_dotenv()

## LLM APIs

### OpenAI Env, Model, and Embedding

In [None]:
import os
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

llm.invoke("hello how are you my firend?")

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large"
)

len(embeddings.embed_query("hello how are you my firend?"))

### Groq KEY and Model

In [None]:
import os
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model_name="deepseek-r1-distill-llama-70b",
    temperature=0
)

response=llm.invoke("what is length of wall of china?")

### Google Gemini Env, Model, and Embedding

In [None]:
import os
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

output = model.invoke("hi")
print(output.content)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

embeddings.embed_query("Hello AI")

## Hugging Face Embedding Models

In [None]:
import os
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

len(embeddings.embed_query("hi"))

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

text="this is atest documents"
query_result=embeddings.embed_query(text)
query_result

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

## Data Loaders

### WebBaseLoader

In [None]:
from langchain_community.document_loaders import WebBaseLoader

# For 1 URL
url = ''
web_loader=WebBaseLoader(url)
data=web_loader.load()

# For Multi URL
urls = ['', '']
docs=[WebBaseLoader(url).load() for url in urls]
docs_list=[item for sublist in docs for item in sublist]

### TextLoader and DirectoryLoader

In [None]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader

loader=DirectoryLoader("../data",glob="./*.txt",loader_cls=TextLoader)
docs=loader.load()

### PDF Loader

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader('syllabus.pdf')
docs=loader.load()

### ArXiv Loader

In [None]:
from langchain_community.document_loaders import ArxivLoader

docs = ArxivLoader(query="1706.03762", load_max_docs=2).load()

### Wikipedia Loader

In [None]:
from langchain_community.document_loaders import WikipediaLoader

docs = WikipediaLoader(query="Generative AI", load_max_docs=4).load()

## Chunking

### RecursiveCharaterTextSplitter

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Normal Embedding Models
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

# OpenAI Embedding Models
text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder
(
    chunk_size=100,
    chunk_overlap=25
)

# Common Code
doc_splits=text_splitter.split_documents(docs_list)


# If only page content needed
doc_string=[doc.page_content for doc in doc_splits]

# If need to preserve metadata
texts = [doc.page_content for doc in doc_splits]
metadatas = [doc.metadata for doc in doc_splits]

### CharacterTextSplitter

In [None]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter=CharacterTextSplitter(separator="\n\n",chunk_size=100,chunk_overlap=20)
text_splitter.split_documents(docs)

### HTMLHeaderTextSplitter

In [None]:
from langchain_text_splitters import HTMLHeaderTextSplitter

html_string = """
<!DOCTYPE html>
<html>
<body>
    <div>
        <h1>Foo</h1>
        <p>Some intro text about Foo.</p>
        <div>
            <h2>Bar main section</h2>
            <p>Some intro text about Bar.</p>
            <h3>Bar subsection 1</h3>
            <p>Some text about the first subtopic of Bar.</p>
            <h3>Bar subsection 2</h3>
            <p>Some text about the second subtopic of Bar.</p>
        </div>
        <div>
            <h2>Baz</h2>
            <p>Some text about Baz</p>
        </div>
        <br>
        <p>Some concluding text about Foo</p>
    </div>
</body>
</html>
"""

headers_to_split_on=[
    ("h1","Header 1"),
    ("h2","Header 2"),
    ("h3","Header 3")
]

html_splitter=HTMLHeaderTextSplitter(headers_to_split_on)
html_header_splits=html_splitter.split_text(html_string)
html_header_splits

### RecursiveJsonSplitter

In [None]:
from langchain_text_splitters import RecursiveJsonSplitter

json_splitter=RecursiveJsonSplitter(max_chunk_size=300)
json_chunks=json_splitter.split_json(json_data)

## Vector Embedding

### FAISS

In [None]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

# Using Inner Product in FAISS Index

index=faiss.IndexFlatIP(3072) # Number of dimensions in the embedding model

db = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# Using Euclidiean Distance in FAISS Index

index=faiss.IndexFlatL2(384) # Number of dimensions in the embedding model

db = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# If we just need the docstrings
db.add_texts(doc_string)

# If we need to add metadata info as well
db.add_texts(texts, metadatas=metadatas)

# Note: Add texts only works with array of docs

### Saving and Loading Indexes

In [None]:
# Saving Index

db.save_local("saved_index")

In [None]:
# Loading Index

new_vector_store=FAISS.load_local(
  "saved_index",
  embeddings,
  allow_dangerous_deserialization=True
)

### Chroma

In [None]:
from langchain_community.vectorstores import Chroma

vectorstore=Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chrome", # Any Name
    embedding=embeddings
    
)

### Pinecone

In [None]:
import os
pinecone_api_key=os.getenv("PINECONE_API_KEY")

In [None]:
from pinecone import Pinecone
from pinecone import ServerlessSpec  #Serverless: Server will be Managed by the cloud provider

pc=Pinecone(api_key=pinecone_api_key)

# Index Creation and Loading

index_name="agentic-ai"

#creating a index
if not pc.has_index(index_name):
    pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws",region="us-east-1")    
)

#loading the index
index=pc.Index(index_name)

In [None]:
from langchain_pinecone import PineconeVectorStore

# Vector Store and Similarity Search
vector_store=PineconeVectorStore(index=index,embedding=embeddings)

results = vector_store.similarity_search("what is a langchain?")
results

In [None]:
# Vector Store Retriever

retriever=vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.7} #hyperparameter
)
retriever.invoke("langchain")

## Langchain Inbuilt Tools

### Wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper=WikipediaAPIWrapper(top_k_results=5,doc_content_chars_max= 500)
wiki_tool = WikipediaQueryRun(api_wrapper= api_wrapper)

# To get tool name
wiki_tool.name
# To get tool description
wiki_tool.description
# To get tool args
wiki_tool.args

# Running
wiki_tool.run({"query": "elon musk"})

### Youtube Search

In [None]:
from langchain_community.tools import YouTubeSearchTool

tool = YouTubeSearchTool()

# To get tool name
tool.name
# To get tool description
tool.description
# To get tool args
tool.args

# Running
tool.run("Emergency Awesome")

### Tavily (Search Engine)

In [None]:
import os
TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

tool=TavilySearchResults(tavily_api_key=TAVILY_API_KEY)

# Running - 1
tool.invoke({"query":"what happend between Trump and Musk today?"})

# Running - 2
question = "what happend between Trump and Musk today?"
complete_query = "Anwer the follow question by searching the internet and getting best response. Following is the user question: " + question

tool.invoke(complete_query)

In [None]:
from langchain_tavily import TavilySearch

tavily_tool=TavilySearch(tavily_api_key=TAVILY_API_KEY)

question = "what happend between Trump and Musk today?"
complete_query = "Anwer the follow question by searching the internet and getting best response. Following is the user question: " + question

tavily_tool.invoke(complete_query)

### DuckDuckGo

In [None]:
from langchain_community.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun()

search.invoke("what is the latest update on iphone17 release?")

## Custom Tools

### Addition

In [None]:
from langchain.tools import tool

@tool
def add(a: int, b: int) -> int:
    """
    Add two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The Sum of a and b
    """

    return a + b

### Subtract

In [None]:
from langchain.tools import tool

@tool
def subtract(a: int, b: int) -> int:
    """
    Subtract two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The difference of a and b
    """

    return a - b

### Absolute Difference

In [None]:
from langchain.tools import tool

@tool
def abs_diff(a: int, b: int) -> int:
    """
    Subtract two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The absolute difference of a and b
    """

    return abs(a - b)

### Multiplication

In [None]:
from langchain.tools import tool

@tool
def multiple(a: int, b: int) -> int:
    """
    Multiple two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The product of a and b
    """

    return a * b

### Divide

In [None]:
from langchain.tools import tool

@tool
def divide(a: int, b: int) -> int:
    """
    Divide two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The result of division
    """

    if b == 0:
        raise ValueError("Denominator cannot be zero.")
    return a / b

### Length of Word

In [None]:
@tool
def get_word_length(word:str)->int:
    """
    Calculate the length of the word.

    Args:
    word(str): The word in string

    Returns:
        int: The length of the word
    """
    return len(word)

## Agentic Orchestration

### Pydantic Class for some kind of validation -> used as an Output Parser

In [None]:
from pydantic import BaseModel , Field
from langchain.output_parsers import PydanticOutputParser

class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)
parser.get_format_instructions()

### Custom Agent State Initiation

In [None]:
import operator
from langchain_core.messages import BaseMessage
from typing import TypedDict, Annotated, Sequence

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [None]:
from langgraph.graph import StateGraph

workflow = StateGraph(AgentState)

### Prebuilt Agent State

In [None]:
from langgraph.graph import StateGraph, MessagesState

workflow = StateGraph(MessagesState) 
# This is the same as our custom defined Agent State Function (right now), if we need something custom, we can use our methods, else MessageState is better

### Workflow 1: Agentic Orchestration using parser(pydantic class), custom AgentState, and Custom Router Function

![alt text](01a93893-3dce-4f90-80d0-b335c9bd36a2.png)

In [None]:
from typing import List
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph, START, END

In [None]:
# Functions

# LLM Supervisor Function using Pydantic Parser, Custom Agent State, Chaining
def llm_supervisor_function(state: AgentState):
    question = state["messages"][-1]

    print("Question", question)

    template="""
    Your task is to classify the given user query into one of the following categories: [USA, Not Related]. 
    Only respond with the category name and nothing else.

    User query: {question}
    {format_instructions}
    """

    prompt = PromptTemplate(
        template=template,
        input_variables=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions}
    )

    chain = prompt | model | parser

    response = chain.invoke({"question": question})

    print("Parsed response", response)

    return {"messages": [response.Topic]}

# Custom Router Function
def router_function(state: AgentState):
    print("-> Router ->")

    last_message = state["messages"][-1]
    print("last_message: ", last_message)

    if "usa" in last_message.lower():
        return "RAG Call"
    else:
        return "LLM Call"
    
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG Function
def function2(state: AgentState):
    print("-> RAG Call ->")
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template = """
        You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. 
        If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n
        Question: {question} \n
        Context: {context} \n
        Answer:
        """,
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

# LLM Function
def function3(state: AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [None]:
workflow.add_node("Supervisor", llm_supervisor_function)
workflow.add_node("RAG", function2)
workflow.add_node("LLM", function3)

# One Way
workflow.set_entry_point("Supervisor")

# Other Way
workflow.add_edge(START, "Supervisor")

# When Conditional Edges
workflow.add_conditional_edges(
    "Supervisor",
    router_function,
    {
        "RAG Call" : "RAG",
        "LLM Call" : "LLM"
    }
)

workflow.add_edge("RAG", END)
workflow.add_edge("LLM", END)

app = workflow.compile()
app

### Workflow 2: Agentic Orchestration Using Message State, Tool Node, Custom Tools, Multi Tool Calls

![alt text](95f33936-ba48-4300-9742-488a7f1cd6f3.png)

In [None]:
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,MessagesState,START,END
from langgraph.prebuilt import ToolNode

In [None]:
# Custom Tool

@tool
def search(query:str):
    """this is my custom tool for searching a weather"""
    if "delhi" in query.lower():
        return "the temp is 45 degree celsius"
    return "the temp is 25 degree celsius"

# Binding Tool
tools = [search]
llm_with_tool = llm.bind_tools(tools)

response = llm_with_tool.invoke("what is weather in delhi?")
response.tool_calls

In [None]:
# Using Message State

def call_model(state: MessagesState):
    question = state['messages']
    response = llm_with_tool.invoke(question)
    return {"messages": [response]}

# Custom Router Function, which will be replaced by langgraph.prebuilt tools_condition 
def router_function(state: MessagesState):
    message = state["messages"]
    last_message = message[-1]

    if last_message.tool_calls:
        return "tools"
    return END

In [None]:
# Creating tool Node from Tool Node class

tool_node = ToolNode(tools)

In [None]:
workflow2 = StateGraph(MessagesState)
workflow2.add_node("llmwithtool",call_model)
workflow2.add_node("mytools", tool_node) # The tool Node is used here
workflow2.add_edge(START, "llmwithtool")
workflow2.add_conditional_edges("llmwithtool",
                                router_function,
                                {"tools": "mytools",
                                END: END})
workflow2.add_edge("mytools", "llmwithtool") # This edge makes multi tool call
app2 = workflow2.compile()
app2

### Using tools_condition instead of our custom Router function

In [None]:
# Instead of using router_function in the add_condition_edges method, use tools_condition. This is a inbuilt function in langgraph which returns "tools" (same as in our router function)

from langgraph.prebuilt import tools_condition

workflow.add_conditional_edges("llmwithtool",
                            tools_condition)

### Use of Memory Saver, Stream Messages, and Pretty Print

In [None]:
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()

# Only need to add checkpointer to this memory 
app2 = workflow2.compile(checkpointer=memory)
app2

In [None]:
config = {"configurable": {"thread_id": "1"}}

# Creating this events 
events = app2.stream(
    {"messages":["what is a weather in delhi can you tell me some good hotel for staying in north delhi"]}, 
    config=config, 
    stream_mode="values"
    )

for event in events:
    event["messages"][-1].pretty_print()

### Viewing Compiled Workflow stored in app variable

In [None]:
from IPython.display import Image, display
display(Image(app.get_graph().draw_mermaid_png()))

### Agentic RAG

![alt text](004a619e-f184-41ff-b629-ecdad04b9eb0.png)