# RAG w/Langgraph

- https://python.langchain.com/docs/tutorials/rag/

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# 1. Loader (웹문서)
from langchain_community.document_loaders import WebBaseLoader
from bs4.filter import SoupStrainer # 웹페이지 문서 필터기 # pip install beautifulsoup4
from pprint import pprint


loader = WebBaseLoader(
    # 문서 출처 URL
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    # 웹페이지 안에서 필요한 정보만 선택하기
    bs_kwargs = {
        'parse_only': SoupStrainer(class_=['post-content'])
    }
    # header_template= {}
)

docs = loader.load()
pprint(docs[0].page_content)

# 2. Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splitted_docs = splitter.split_documents(docs)
print(len(splitted_docs))

# 3. Embedding Model
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model='text-embedding-3-small') # small <-> large

# 4. Vectorstore (지금은 FAISS -> 클라우드- Pinecone)
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(splitted_docs, embedding=embedding)

('Building agents with LLM (large language model) as its core controller is a '
 'cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer '
 'and BabyAGI, serve as inspiring examples. The potentiality of LLM extends '
 'beyond generating well-written copies, stories, essays and programs; it can '
 'be framed as a powerful general problem solver.\n'
 'Agent System Overview#\n'
 'In a LLM-powered autonomous agent system, LLM functions as the agent’s '
 'brain, complemented by several key components:\n'
 '\n'
 'Planning\n'
 '\n'
 'Subgoal and decomposition: The agent breaks down large tasks into smaller, '
 'manageable subgoals, enabling efficient handling of complex tasks.\n'
 'Reflection and refinement: The agent can do self-criticism and '
 'self-reflection over past actions, learn from mistakes and refine them for '
 'future steps, thereby improving the quality of final results.\n'
 '\n'
 '\n'
 'Memory\n'
 '\n'
 'Short-term memory: I would consider all the in-cont

In [6]:
from langchain import hub

prompt = hub.pull('rlm/rag-prompt')
for m in prompt.messages:
    m.pretty_print()


You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: [33;1m[1;3m{question}[0m 
Context: [33;1m[1;3m{context}[0m 
Answer:


In [12]:
# LLM
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model='gpt-4.1-nano')
# State
from langchain_core.documents import Document
from typing_extensions import TypedDict, List

class State(TypedDict):
    question: str
    context: List[Document] # RAG 데이터 문서조각/ 리스트 안에 document가 들어가야한다고 명시
    answer: str

# Node -  Edge
# 검색노드
def retrieve(state: State):
    # [Document 타입 객체 4개]
    retrieved_docs = vectorstore.similarity_search(state['question'], k=4)

    # 나머지 return하지 않은 state항목들은, 알아서 그대로 감(question, answer는 알아서 그대로 나감)
    return {'context': retrieved_docs, } 

# 답변 생성노드
def generate(state: State):
    # Document 객체의 필요없는 정보는 다 빼고, 내용에 해당하는 page_content만 모아서 넘기면 토큰 절약 가능
    context_str = ''
    for doc in state['context']:
        context_str += doc.page_content + '\n ------------------------ \n'

    question_with_context = prompt.invoke({'question': state['question'], 'context': context_str})
    response = llm.invoke(question_with_context)
    return {'answer': response.content}

# Graph
from langgraph.graph import StateGraph, START, END

builder = StateGraph(State)
builder.add_node('retrieve', retrieve)
builder.add_node('generate', generate)

builder.add_edge(START, 'retrieve')
builder.add_edge('retrieve', 'generate')
builder.add_edge('generate', END)

graph = builder.compile()

# 출력
# from IPython.display import Image, display

# display(Image(graph.get_graph().draw_mermaid_png()))

In [13]:
final_state = graph.invoke({'question': '에이전트 시스템에 대해서 알려줘!'})

final_state['answer']

'에이전트 시스템은 독립적으로 환경을 인식하고 행동하며 목표를 달성하도록 설계된 소프트웨어 또는 하드웨어 시스템입니다. 이 시스템은 입력을 받아 분석 후 적절한 행동을 선택하고 수행하는 과정을 반복합니다. 주로 인공지능, 자동화, 로보틱스에서 사용되며, 자율성을 갖춘 의사결정 구조를 포함합니다.'

### 메세지 스트리밍

In [11]:
for event in graph.stream({'question': 'LangGraph란 뭐야?'}):
    for node_name, output in event.items():
        print(f"[{node_name}] {output}")

[retrieve] {'context': 'Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\n\nThe system comprises of 4 stages:\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\nInstruction:\n ------------------------ \n(4) Response generation: LLM receives the execution results and provides summarized results to users.\nTo put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.\n ------------------------ \nBuilding agents with LLM (large language model) as its core controller is a

## RAG + a
- Metadata 편집
- Query 분석 - 보완


In [17]:
# 문서 63개중 1/3 지점
third = len(splitted_docs) // 3

# metadata 에 'section' 추가중
for idx, doc in enumerate(splitted_docs):
    if idx < third:
        doc.metadata['section'] = 'beginning'
    elif idx < third * 2:
        doc.metadata['section'] = 'middle'
    else:
        doc.metadata['section'] = 'end'

splitted_docs[0].metadata

vectorstore = FAISS.from_documents(splitted_docs, embedding=embedding)

In [None]:
# State를 더 빡빡하게 정의하기 위해, 위에 따로 정의한 클래스 Search
# StructuredOutput

from typing import Literal # 말 그대로
from typing_extensions import Annotated # 할말이 좀 더 있다

class Search(TypedDict):
    """vectorstore Search Query"""
    # 1. 타입, 2. ... -> NOT NULL, 3. 설명
    query: Annotated[str, ..., 'Search query to run']
    section: Annotated[
        Literal['beginning', 'middle', 'end'],
        ...,
        'Section to query'
    ]

class MyState(TypedDict):
    question: str
    query: Search # State를 더 빡빡하게 정의하기 위해, 위에 따로 정의한 클래스  Search
    context: List[Document]
    answer: str

In [18]:
# Node
def analyze_query(state: MyState):
    # Search 클래스에 맞춰 사용자 question을  {query, section}로 바꿈
    s_llm = llm.with_structured_output(Search)
    query = s_llm.invoke(state['question'])
    return {'query': query}

def retrieve(state: MyState):
    query = state['query']
    docs = vectorstore.similarity_search(
        query['query'],
        # LLM이 판단한 section과 실제 문서조각의 section이 맞을 경우에만 검색.
        filter=lambda metadata: metadata.get('section') == query['section']
    )
    return {'context': docs}

def generate(state: MyState):
    # Token 아끼기 위해, 내용만 추려서 문자열로 만들기
    doc_str = ''
    for doc in state['context']:
        doc_str += doc.page_content + '\n===========================\n'
    question_with_context = prompt.invoke({'question': state['question'], 'context': doc_str})
    res = llm.invoke(question_with_context)
    return{'answer': res.content}

builder = StateGraph(MyState)
builder.add_node('analyze_query', analyze_query)
builder.add_node('retrieve', retrieve)
builder.add_node('generate', generate)

builder.add_edge(START, 'analyze_query')
builder.add_edge('analyze_query', 'retrieve')
builder.add_edge('retrieve', 'generate')
builder.add_edge('generate', END)

graph = builder.compile()


In [None]:
graph.invoke({'question': '작업ㅂ분배가  뭐임?'}) # 개떡같이 말해도 찰떡같이 알아들음

{'question': '작업ㅂ분배가  뭐임?',
 'query': {'query': '작업분배가 뭐임?', 'section': '중본부'},
 'context': [],
 'answer': '작업 분배는 여러 사람이나 팀이 업무를 적절하게 나누어 수행하는 것을 의미합니다. 이를 통해 효율적으로 작업을 완료하고 시간과 자원을 절약할 수 있습니다. 일반적으로 프로젝트 관리나 조직 내에서 중요하게 다뤄지는 개념입니다.'}