In [None]:
# 1. 보험 약관 챗봇 에이전트 (RAG-약관)
# 2. 학습 조교 에이전트 (RAG-수업자료)
# 3. 스크립트에서 관련된 문장을 찾아와서 해석해주는 에이전트

### 스크립트에서 관련 문장을 찾아와서 해석해주는 에이전트 (Friends)

In [13]:
# 1. 문서 load

from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

loader = DirectoryLoader(
    "./data/",
    glob="Friends*.pdf",   # friends_로 시작하는 pdf만 로딩
    loader_cls=PyMuPDFLoader
)
docs = loader.load()
print('pdf장수', len(docs))

pdf장수 1169


In [95]:
# 2. Split

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,      # 청크 길이
    chunk_overlap=0,   # 겹치기 (앞뒤 맥락 조금씩 포함)
)

splits = text_splitter.split_documents(docs)
print(len(splits), "chunks created")
print(splits[0].page_content[:200])  # 첫 chunk 미리보기

7480 chunks created
Season 1 
 
1
101. The One Where Monica Gets a New Roommate 
(The Pilot-The Uncut Version) 
 
Written by: Marta Kauffman & David Crane 
Transcribed by: guineapig 
Additional transcribing by: Eric Aase


In [92]:
# 3. 벡터 임베딩  4. 벡터 스토어 저장
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embedding = OpenAIEmbeddings() # 각 청크 텍스트를 숫자 벡터로 바꿔

vectorstore = FAISS.from_documents(documents=splits, embedding=embedding)


# test
results = vectorstore.similarity_search('ross marry', k=1)

for doc in results:
    print(doc.page_content)

ROSS: Good morning. 
OLD WOMAN: Well, somebody got some last night. 
ROSS: Twice. 
END


In [98]:
from langchain.agents import create_openai_tools_agent, AgentExecutor # 에이전트 실행기
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory # 기억

from langchain_openai import ChatOpenAI
from langchain.tools.retriever import create_retriever_tool


llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0)

retriever = vectorstore.as_retriever(k=1) # 리트리버=도서관 사서

rag_tool = create_retriever_tool(
    retriever,
    name='pdf_search',
    description='PDF 문서에서 질문과 관련된 내용을 검색합니다.' # Agent가 언제 이 tool을 쓸지 알게 됨
)

text = f'''
- Do not print the entire text.
- When a user asks a question in Korean, find the relevant content.
- All sentences must be translated, and each sentence is printed one sentence in English-Korean form

- Divide up the sentences one by one, and show them **including who said it.
- Show the original English text and natural Korean interpretation for each sentence.
- At this time, interpret it informally, and make it natural according to the context.
- Please keep the dialogue, cultural context, and character characteristics alive.
- It is translated to be read naturally in Korean rather than in direct translation.

출력 예시:
[MONICA: She is unbelievable, our mother is...]
[모니카: 우리 엄마는 정말 믿을 수가 없어...]
[RACHEL: Uh, no she doesn't but I can, I can get a message to her.]
[레이첼: 음, 아니요, 그녀는 아니지만, 나는 그녀에게 메시지를 전달할 수 있어요.]

'''

prompt = ChatPromptTemplate.from_messages([
    ('system', text),
    MessagesPlaceholder(variable_name='chat_history'), # 사용자가 여태까지 했던 얘기들이 들어옴
    ('human', '{input}'),
    MessagesPlaceholder(variable_name='agent_scratchpad') #  임시저장 변수.... 검색 도구 호출 때 필요함
])

memory = ConversationBufferMemory(
    return_messages=True,
    memory_key='chat_history'
)

agent = create_openai_tools_agent(
    llm=llm,
    tools=[rag_tool],
    prompt=prompt,
)

agent_executor = AgentExecutor(agent=agent, 
                               memory=memory, 
                               tools=[rag_tool], 
                               verbose=True) # verbose 수다스러운.. 출력을 더 많이 한다는 뜻!


In [104]:
agent_executor.invoke({'input':'챈들러가 갇혔다'})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `pdf_search` with `{'query': 'Chandler trapped'}`


[0m[36;1m[1;3mChandler: Oh God!

Chandler: It did enter my mind! But then something happened that made it, shoot right out. 
Rachel: Y'know what Chandler, you got yourself into those cuffs, you get yourself out of them. 
Chandler: No- no- no- no- no- no- no!! I can’t get myself right out of them! You must have me confused

Chandler: (trapped) Uh, uh. 
Monica: Well? 
Chandler: Dear God! This parachute is a knapsack! (throws himself over the back of the chair he was 
sitting in) 
(Rachel enters, in a formal dress.) 
Rachel: Hey. 
Phoebe: Hey. Ooh, look at you, dressy- dress.

Rachel: Thanks Chandler.[0m[32;1m[1;3m[Chandler: Oh God!]
[챈들러: 아, 세상에!]

[Chandler: It did enter my mind! But then something happened that made it, shoot right out.]
[챈들러: 그런 생각이 들긴 했어! 근데 뭔가가 있어서 바로 잊어버렸지.]

[Rachel: Y'know what Chandler, you got yourself into those cuffs, you get yours

{'input': '챈들러가 갇혔다',
 'chat_history': [HumanMessage(content='모니카의 엄마', additional_kwargs={}, response_metadata={}),
  AIMessage(content="[Monica: She is unbelievable, our mother is...]\n[모니카: 우리 엄마는 정말 믿을 수가 없어...]\n\n[Ross: Okay, relax, relax. We are gonna be here for a while, it looks like, and we still have boyfriends and your career to cover.]\n[로스: 좋아, 진정해, 진정해. 우리 여기 좀 오래 있을 것 같아, 그리고 아직 남자친구랑 네 커리어 얘기도 해야 하고.]\n\n[Monica: Oh God!]\n[모니카: 아, 세상에!]\n\n[Monica: We just want you to think it through.]\n[모니카: 그냥 네가 잘 생각해보길 바라는 거야.]\n\n[Rachel: Yeah, honey, maybe you can talk to somebody who’s had a baby. Like your mom?]\n[레이첼: 응, 자기야, 아기를 낳아본 사람한테 얘기해보는 게 어때? 네 엄마처럼?]\n\n[Monica: That is a wonder. So tell me something, Mom. If you had to do it all over again, I mean, if she was here right now, would you tell her?]\n[모니카: 그건 정말 대단한 일이야. 엄마, 그러니까 다시 할 수 있다면, 지금 여기 있다면, 엄마가 그녀한테 말해줄 거야?]\n\n[Mrs. Geller: Tell her what?]\n[젤러 부인: 뭐라고 말하라는 거야?]\n\n[Monica: How she drove you crazy, picking