In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
llm = ChatOpenAI(
    temperature=0.1,
) 

cache_dir = LocalFileStore("./.cache/")



splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=500,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader('./files/00713_small.txt', encoding='utf-8')

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,cache_dir
)


vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()   

### list of docs

### for doc in list of docs | prompt | llm

### for respone in list of lims response | put them all together

map_doc_prompt = ChatPromptTemplate.from_messages([
    (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. 
            Return any relevant text verbatim. 
            If there is no relevant text, return : ''
            -------
            {context}
            """,
    ),
    ("human", "{question}"),
])

map_doc_chain = map_doc_prompt | llm




### final doc | prompt | llm
def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

map_chain = {
    "documents": retriver, 
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)

final_prompt= ChatPromptTemplate.from_messages([
    (
        "system", 
        """
            당신은 훌륭한 조력자 입니다. 
            Answer questions using only the following context. 
            If you don't know the answer just say you don't know, don't make it up an answer.
            -----
            {context}
        """,
    ),
    ("human", "{question}"),
])



######
chain = (
    {
        "context":map_chain,
        "question":RunnablePassthrough(),
        "extra":RunnablePassthrough()
    } 
    | final_prompt
    | llm
)




#chain.run("맹견의 범위는 어떻게 되나요?")
#chain.run("반려동물의 범위 어떻게 되나요?")
chain.invoke("맹견의 사육을 허가 받으려면 어떻게 해야 하나요?")

Created a chunk of size 807, which is longer than the specified 500
Created a chunk of size 1116, which is longer than the specified 500
Created a chunk of size 3012, which is longer than the specified 500
Created a chunk of size 725, which is longer than the specified 500
Created a chunk of size 932, which is longer than the specified 500
Created a chunk of size 1841, which is longer than the specified 500


Created a chunk of size 1770, which is longer than the specified 500


AIMessage(content='해당 지역의 시ㆍ도지사 또는 시장ㆍ군수ㆍ구청장에게 맹견의 사육을 신고하고 허가를 받아야 합니다.')