In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import (
    TextLoader,
    Docx2txtLoader,
    PyPDFLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    separators="\n",
    chunk_size=600,
    chunk_overlap=100,
)

txt_loader = TextLoader("./files/document.txt")  # 리스트로 나옴
# pdf_loader = PyPDFLoader("./files/FAQ.pdf")  # 리스트로 나옴 메타데이터에 각 페이지
# docx_loader = Docx2txtLoader("./files/FAQ.docx")

docs = txt_loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)


chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm


chain.invoke("Aaronson 은 유죄인가요?")

chain.invoke("Julia 는 누구인가요?")
chain.invoke("그가 테이블에 어떤 메시지를 썼나요?")

AIMessage(content='그가 쓴 메시지는 다음과 같습니다:\n- FREEDOM IS SLAVERY\n- TWO AND TWO MAKE FIVE\n- GOD IS POWER')