In [12]:
from langchain.chat_models import ChatOpenAI 
from langchain.document_loaders import UnstructuredFileLoader # 이거 하나로 모든 txt 관련 파일 로드
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
# from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS   # Chroma와 대응되는 기법중에 하나
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(
    temperature=0.1 # 창의적인 답변을 원하지 않음
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# 이부분에서 비용 발생 - 조심해야 함 (실행할때마다)
vectorstore = FAISS.from_documents(docs, cached_embeddings) 

retriever = vectorstore.as_retriever()

# step1 : list of docs

# 이 document를 읽고, 사용자의 질문에 다변하기에 적절한 정보가 있는지 확인해주세요
# step2 : for doc in list of docs | prompt | llm

# step3 : for response in list of llms response | put them all together

# step4 : final doc | prompt | llm

# retriver가 검색 결과로 천개 이상의 document를 반환하다면, stuff는 사용할수 없다고 함.
# 왜냐하면 stuff의 prompt에 그 document들을 모두 넣을 수 없기 때문에

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human","{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

# step1
def map_docs(inputs):
    documents = inputs['documents']
    question = inputs['question']
    return "\n\n".join(map_doc_chain.invoke({
        "context": doc.page_content,
        "question":question
    }).content for doc in documents)

map_chain = { "documents": retriever, "question":RunnablePassthrough() } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages([
    ("system",
    """
    Given the following extracted parts of a long document and question, create a final answer.
    If you don't know the answer, just say that you don't know. Don't try to make up an answer.
    ------
    {context}
    """,
    ),
    ("human", "{question}"),
])

# step4
chain = {"context": map_chain, "question":RunnablePassthrough()} | final_prompt | llm

#chain.invoke("Describe Victory Mansions")
chain.invoke("Where does Winston go to work?")




AIMessage(content='Winston goes to work at the Ministry of Truth.')