In [None]:
"""
document 를 이용한 검색.

off-the-shelf chain 들을 이용해서 작업.

** chain_type
* stuff : 채워넣다.
* refine : 정제, 가다듬기. 각각의 문서를 주면서 답변을 개선해나가도록 하는 방법.
* map_reduce : 각각의 문서별로 요약, 각각의 요약본을 llm 에게 전달하여 최종
* map_rerank : 각 문서별로 요약하고, 요약점수를 주고, 최고점과 함께 요약내용을 반환.
"""

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
# from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
# from langchain.llms import OpenLLM
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/") # gitignore 추가.

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

# vectorstore = Chroma.from_documents(docs, cache_embeddings)
vectorstore = FAISS.from_documents(docs, cache_embeddings)
chain_type="map_rerank"

In [None]:

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=vectorstore.as_retriever(),
)

chain.run("Where does Winston live?")

In [None]:

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=vectorstore.as_retriever(),
)

chain.run("Describe Victory Mansions.")

In [None]:
llm.predict("""Victory Mansions is a run-down apartment building where Winston Smith, the protagonist of George Orwell\'s "1984," lives. The building is described as dilapidated, with faulty plumbing, shabby furniture, and overcrowded conditions. The apartments are small and uncomfortable, lacking basic amenities. The residents of Victory Mansions are subjected to strict surveillance and control by the Party, adding to the oppressive and bleak atmosphere of the place.
한글로 해석해줘.
""")