In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm = ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("../files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)
# retriever은 vectorstoe에서 가져와도 되지만 invoke등을 통해 가져올 수 도 있음
chain.run("Where does Winston live?")
chain.run("Describe Victory Mansions")

'Victory Mansions is a building where Winston Smith lives in George Orwell\'s novel "1984." It is described as a run-down, dilapidated apartment building located in Airstrip One (formerly known as London) in the province of Oceania. The hallway of Victory Mansions smells of boiled cabbage and old rag mats. The building has a faulty lift, and the electricity is cut off during daylight hours as part of an economy drive. The flat itself is on the seventh floor, with a poster of Big Brother watching over the residents. Winston\'s flat in Victory Mansions is small, and he tries to avoid being watched by the telescreen, a surveillance device that is a constant presence.'