## Assignment 2 Useful Concepts Walkthrough
This notebook covers the concepts that will be useful for solving the Assignment 2 problems:
1. Structured Outputs
2. LangGraph Basics
3. Tavily
4. How to do RAG in LangGraph

In [None]:
from dotenv import load_dotenv

load_dotenv()

import os
print(os.environ['TAVILY_API_KEY'][:20])
print(os.environ['OPENAI_API_KEY'][:20])

### Structured Outputs
Force the LLM to return the output in a specific format (Pydantic class) instead of just a single text. This is extremely useful for integrating LLMs into your applications in a seamless way.

Read more about that here - https://python.langchain.com/docs/concepts/structured_outputs/.

In [None]:
from pydantic import BaseModel, Field
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate

model = init_chat_model("gpt-4o-mini", model_provider="openai")

class ClassificationResult(BaseModel):
    category: str = Field(description="The category of the user question")
    explanation: str = Field(description="The explanation of why the category is chosen")

model = model.with_structured_output(ClassificationResult)
prompt = ChatPromptTemplate.from_template("""
Take the user question and classify it into one of the following categories:
Geography: Questions about countries, cities, or geographical features.
Science: Questions about science, technology, or scientific concepts.
Math: Questions about math, algebra, geometry, or mathematical concepts.
""")
chain = prompt | model
result = chain.invoke({"question": "What is the capital of France?"})
print(type(result))
print(result)

# Without structured output, LLM will return only the text.
plain_model = init_chat_model("gpt-4o-mini", model_provider="openai")
model_only_result = plain_model.invoke("What is the capital of France?")
print(type(model_only_result))
print(model_only_result)

Structured output can also work with more complex data structures like lists as well. See the below example for identifying all places user asks in a question.

In [None]:
from pydantic import BaseModel, Field

class Places(BaseModel):
    places: list[str] = Field(description="List of places")

model = init_chat_model("gpt-4o-mini", model_provider="openai")

model = model.with_structured_output(Places)

# NOTE: This example is not answering the question, just extracting information
# from the question. You can also use it to answer the question.
prompt = ChatPromptTemplate.from_template("""
List all the places that the user asked in the question: {question}
""")

chain = prompt | model
result = chain.invoke({"question": "What is the capital of France and India?"})
print(type(result))
print(result)

## Simple non-AI graph example

In [None]:
# Define the state of the graph:
from typing import TypedDict

class State(TypedDict):
    num_values: int
    generated_values: list[int]
    result: int | None = None


In [None]:
import random

# Define the nodes in the graph:
def generate_values(state: State):
    values = [random.randint(0, 10) for _ in range(state["num_values"])]
    return {"generated_values": values}

def add(state: State):
    result = sum(state["generated_values"])
    return {"result": result}

In [None]:
# Define the graph:
from langgraph.graph import StateGraph
from langgraph.graph import START, END

graph_builder = StateGraph(State)
graph_builder.add_node("generate_values", generate_values)
graph_builder.add_node("add", add)

graph_builder.add_edge(START, "generate_values")
graph_builder.add_edge("generate_values", "add")
graph_builder.add_edge("add", END)

# Now graph is also a 'Runnable'
graph = graph_builder.compile()

In [None]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
graph.invoke({"num_values": 20})

## Conditional Edge example in LangGraph

In [None]:
class ConditionalEdgeState(TypedDict):
    num_values: int
    generated_values: list[int]
    total: int
    # Prints happy if the total is even, sad if the total is odd
    final_message: str

def generate_values(state: ConditionalEdgeState):
    generated_values = [random.randint(0, 10) for _ in range(state["num_values"])]
    return {"generated_values": generated_values}

def add(state: ConditionalEdgeState):
    total = sum(state["generated_values"])
    return {"total": total}

def check_total(state: ConditionalEdgeState):
    if state["total"] % 2 == 0:
        return "happy_message"
    else:
        return "sad_message"

def happy_message(state: ConditionalEdgeState):
    return {"final_message": "happy"}

def sad_message(state: ConditionalEdgeState):
    return {"final_message": "sad"}

graph_builder = StateGraph(ConditionalEdgeState)

graph_builder.add_node("generate_values", generate_values)
graph_builder.add_node("add", add)
# graph_builder.add_node("check_total", check_total)
graph_builder.add_node("happy_message", happy_message)
graph_builder.add_node("sad_message", sad_message)

graph_builder.add_edge(START, "generate_values")
graph_builder.add_edge("generate_values", "add")
# THIS is the place where the condition is evaluated
graph_builder.add_conditional_edges(
    "add",
    check_total,
    {
        "happy_message": "happy_message",
        "sad_message": "sad_message",
    },
)
graph_builder.add_edge("happy_message", END)
graph_builder.add_edge("sad_message", END)

graph = graph_builder.compile()


In [None]:
# Display the graph
display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
graph.invoke({"num_values": 10})

## Tavily Web Search Example

In [None]:
from langchain_community.tools import TavilySearchResults

tool = TavilySearchResults(
    max_results=5,
    include_answer=True,
    include_raw_content=True,
    include_images=False,
    search_depth="advanced",
    # include_domains = []
    # exclude_domains = []
)

In [None]:
# Calling the tool directly
tool.invoke({"query": "What are some latest innovations in AI?"})

In [None]:
class WebSearchState(TypedDict):
    query: str
    results: list[dict]
    char_counts: list[int]

def web_search(state: WebSearchState):
    results = tool.invoke({"query": state["query"]})
    return {"results": results}

def sum_char_counts(state: WebSearchState):
    # Count length of each result:
    char_counts = [len(result["content"]) for result in state["results"]]
    return {"char_counts": char_counts}

graph_builder = StateGraph(WebSearchState)

graph_builder.add_node("web_search", web_search)
graph_builder.add_node("sum_char_counts", sum_char_counts)

graph_builder.add_edge(START, "web_search")
graph_builder.add_edge("web_search", "sum_char_counts")
graph_builder.add_edge("sum_char_counts", END)

graph = graph_builder.compile()

In [None]:
# Display the graph
display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
graph.invoke({"query": "What are some latest innovations in AI?"})

In [None]:
# Lets implement the above in LangChain instead of LangGraph to cross-check your understanding:
from langchain_core.runnables import RunnableLambda
from langchain_community.tools import TavilySearchResults

search_tool = TavilySearchResults(
    max_results=5,
    include_answer=True,
    include_raw_content=True,
    include_images=False,
    search_depth="advanced",
    # include_domains = []
    # exclude_domains = []
)

# Define the character counting function
def sum_char_counts(results: dict) -> dict:
    print(results)
    char_counts = [len(result["content"]) for result in results]
    return {"char_counts": char_counts}

# Create the chain using RunnableLambda
chain = search_tool | RunnableLambda(sum_char_counts)

# Example usage:
result = chain.invoke({"query": "What are some latest innovations in AI?"})
print(result)

## Why LangGraph then where I can do everything in LangChain?
- THE central framework from LangChain company.
- Less opinionated, more flexibility.
- Can still use LangChain within the nodes of a graph.
- Highly useful for Agentic AI which you'll be building next week.

## RAG Example in LangGraph

In [None]:
# Example of loading a PDF file:
# Refer here for more examples of document loaders: https://python.langchain.com/docs/how_to/document_loader_pdf/
from langchain_community.document_loaders import PyPDFLoader

file_path = "/Users/aish/Downloads/2019-annual-performance-report.pdf"
loader = PyPDFLoader(file_path)
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [None]:
print(len(pages))
print(pages[10].page_content)

## Indexing the PDF file into a vector store

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import OpenAIEmbeddings

vector_store = InMemoryVectorStore.from_documents(pages, OpenAIEmbeddings())
docs = vector_store.similarity_search("What are OPM's goals?", k=4)
print(len(docs))
for doc in docs:
    print(f'Page {doc.metadata["page"]}: {doc.page_content[:300]}\n')

## Retrieval in RAG

In [None]:
from typing import TypedDict

# Create a retriever object on the vector store
retriever = vector_store.as_retriever()

from langchain.chat_models import init_chat_model
from langchain_core.prompts import PromptTemplate

model = init_chat_model("gpt-4o-mini", model_provider="openai")

class GraphState(TypedDict):
    question: str
    documents: list[str]
    generated_answer: str

def retrieve_documents(state: GraphState):
    docs = retriever.get_relevant_documents(state["question"])
    return {"documents": docs}

def generate_answer(state: GraphState):
    # Call the model to generate the answer using the retrieved documents
    # and the question
    # Join all the retrieved documents into a single string
    prompt = PromptTemplate.from_template(
        "Answer the question based on the context provided. \n"
        "Question: {question}\n"
        "Context: {context}\n"
        "Answer: "
    )
    context = "\n".join([doc.page_content for doc in state["documents"]])
    prompt = prompt.format(question=state["question"], context=context)
    answer = model.invoke(prompt).content
    return {"generated_answer": answer}

graph_builder = StateGraph(GraphState)

graph_builder.add_node("retrieve_documents", retrieve_documents)
graph_builder.add_node("generate_answer", generate_answer)

graph_builder.add_edge(START, "retrieve_documents")
graph_builder.add_edge("retrieve_documents", "generate_answer")
graph_builder.add_edge("generate_answer", END)

graph = graph_builder.compile()

In [None]:
# Display the graph
display(Image(graph.get_graph().draw_mermaid_png()))

In [None]:
graph.invoke({"question": "What are OPM's goals?"})

In [None]:
graph.invoke({"question": "Who won the french open today?"})