In [2]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chat_models.openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

__import__("pysqlite3")
import sys

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n", chunk_size=600, chunk_overlap=100
)

loader = UnstructuredFileLoader("./examples/chapter_one.md")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            당신은 문서에서 질문과 관련된 부분을 잘 찾는 전문가입니다.
            당신에게 주어질 긴 문서의 일부를 보고 질문과 관련 있는 부분을 변경하지 말고 그대로 말해주세요.
            주어진 문서만 보고 찾아주세요. 관련된 내용이 없으면 없다고 말하세요, 이야기를 지어내지 마십시오.
            ----------문서----------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {
                "context": doc.page_content,
                "question": question,
            }
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            당신은 전문적인 문서 정리를 할 수 있는 비서입니다.
            주어진 긴 문서들의 발췌문들을 이용해서 주어지는 질문에 알맞는 최종 답변을 생성하세요.
            주어진 context로만 답변하세요. 당신이 모른다면 모른다고 말하세요, 이야기를 지어내지 마세요.
            ---------Context---------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("Who is Winston?")

Winston is a character in the text. He is described as a smallish, frail figure with fair hair, a sanguine face, and rough skin. Winston is thirty-nine years old and lives in Victory Mansions. He is experiencing a mixture of emotions when faced with the face of Goldstein, setting his features into an expression of quiet optimism when facing the telescreen. Winston sacrificed his lunch in the canteen and went into the kitchen to find only a hunk of dark-colored bread for tomorrow's breakfast. He then poured himself a drink of Victory Gin, which he found to be unpleasant but ultimately made him feel better.
