In [1]:
"""
MapReduce LCEL Chain.

todos
* list of docs
    # 문서를 얻어서..
* for doc in list of docs | prompt | llm
    # 각각의 문서를 llm 으로 필요한 정보를 얻는다.
* for responsein list of llms response | put them all together
    # 각각의 문서 요약본을 하나의 문서로 정리.
* final doc | promt | llm
    # 요약 문서를 llm 으로 필요한 정보를 정리한다.
* chain.invoke

new
RunnableRambda

map_reduce2 -> map_reduce 로 바꾸는 연습.

execute and after langchain.smith view.
"""

'\nMapReduce LCEL Chain.\n\ntodos\n* list of docs\n    # 문서를 얻어서..\n* for doc in list of docs | prompt | llm\n    # 각각의 문서를 llm 으로 필요한 정보를 얻는다.\n* for responsein list of llms response | put them all together\n    # 각각의 문서 요약본을 하나의 문서로 정리.\n* final doc | promt | llm\n    # 요약 문서를 llm 으로 필요한 정보를 정리한다.\n* chain.invoke\n\nnew\nRunnableRambda\n\nexecute and after langchain.smith view.\n'

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1,
#    model_name="gpt-4",
)

cache_dir = LocalFileStore("./.cache/") # gitignore 추가.

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# vectorstore = Chroma.from_documents(docs, cache_embeddings)
vectorstore = FAISS.from_documents(docs, cache_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages([
    ("system", """
        Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
        -------
        {context}
     """),
    ("human", "{question}"),
])

map_doc_chain = map_doc_prompt | llm

def map_reduce2(inputs):
    documents = inputs['documents']
    question = inputs['question']
    results = []

    for document in documents:
        results.append(map_doc_chain.invoke({
            "context": document.page_content,
            "question": question,
        }).content)

    results = "\n\n".join(results)
    return results

def map_reduce(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(
        map_doc_chain.invoke({
            "context": doc,
            "question": question,
        }).content for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough()
} | RunnableLambda(map_reduce)


prompt = ChatPromptTemplate.from_messages([
    ("system", """
     Given the following extracted parts of a long document and a question, create a final answer. 
     If you don't know the answer, just say that you don't know. Don't try to make up an answer.
     ------
     {context}
     """),
    ("human", "{question}")
])

# 검색 방법 설정.
chain = {"context": map_chain, "question": RunnablePassthrough()} | prompt | llm

chain_result = chain.invoke("Describe Victory Mansions.")

# chain_result

ko_result = llm.predict(chain_result.content + "\n\n한글로 번역해줘.""")

print(chain_result, ko_result)

content="Victory Mansions is an old, dilapidated building with constant maintenance issues. The flats were built around 1930 and are falling apart, with flaking plaster, burst pipes, leaky roofs, and an unreliable heating system. Repairs are delayed by remote committees, making it difficult to fix even minor issues. The Parsons' flat within Victory Mansions is described as larger than Winston's but has a dingy, battered appearance. The walls are adorned with games equipment, dirty dishes, Youth League and Spies banners, and a poster of Big Brother. The building has a common smell of boiled cabbage mixed with sweat. Despite its poor condition, Victory Mansions offers a view of all four Ministries, including the Ministry of Truth, although it is dwarfed by the surrounding architecture." 승리 아파트는 오래되고 낡은 건물로, 지속적인 유지보수 문제가 있다. 이 아파트는 1930년대에 지어졌으며 벽이 벗겨지고 파이프가 터지며 지붕이 새고 난방 시스템이 불안정하다. 수리는 원격 위원회에 의해 지연되어 작은 문제조차 해결하기 어렵다. 파슨스 가족의 아파트는 윈스턴의 것보다 크지만 험한 외관을 가지고 있다. 벽에는 게임 장비, 더러운 그릇, 청소년 연맹과