In [26]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory






In [53]:
# 1) 문서 로드 및 스플릿
loader = UnstructuredFileLoader("document.txt")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n\n", chunk_size=600, chunk_overlap=100
)
docs = loader.load_and_split(text_splitter=splitter)

# 2) 임베딩 + 캐시
embeddings = OpenAIEmbeddings()
cache_store = LocalFileStore("cache")
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_store)

# 3) 벡터스토어 & 검색기
vectorstore = FAISS.from_documents(docs, embedding=cached_embeddings)
retriever = vectorstore.as_retriever()

# 4) 대화 메모리
memory = ConversationBufferMemory(memory_key="history", return_messages=True)

# 5) 각 청크에서 관련 텍스트 뽑아 결합하는 함수
map_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up: {context}"),
    ("user", "Question:\n{question}"),
])
map_llm = ChatOpenAI(temperature=0.1)

# 6) 단일 RunnableLambda로 Map + Reduce 입력 준비
def map_and_prepare(inputs: dict) -> dict:
    question = inputs["input"]
    # 1) 검색
    docs = retriever.get_relevant_documents(question)
    # 2) 각 청크 평가
    snippets = []
    for doc in docs:
        out = map_prompt | map_llm
        resp = out.invoke({"context": doc.page_content, "question": question})
        text = resp.content.strip()
        if text:
            snippets.append(text)
    # 3) 추출된 텍스트 결합
    combined_context = "\n\n".join(snippets)
    return {"context": combined_context, "input": question, "history": inputs["history"]}

map_reduce_step = RunnablePassthrough.assign(history=lambda _: memory.load_memory_variables({})['history'])
map_reduce_step |= RunnableLambda(map_and_prepare)

# 7) 최종 프롬프트와 LLM
final_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "Given the following extracted text snippets and conversation history, answer the question. "
     "If you don't know, just say you don't know. Do not invent an answer."),
    ("assistant", "Extracted Context:\n{context}"),
    MessagesPlaceholder(variable_name="history"),
    ("user", "{input}"),
])
final_llm = ChatOpenAI(temperature=0.1)

# 8) 전체 파이프라인 조립
chain = map_reduce_step | final_prompt | final_llm

In [54]:
chain.invoke({"input": "Is Aaronson guilty?"})


AIMessage(content='Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.')

In [55]:
chain.invoke({"input": "What message did he write in the table?"})

AIMessage(content='He wrote "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.')

In [56]:
chain.invoke({"input": "Who is Julia?"})

AIMessage(content='Julia is a character who was involved in a relationship with Winston, the protagonist of the story.')