In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.memory import ConversationBufferMemory
from langchain.chains import RetrievalQA

# 언어 모델 초기화
llm = ChatOpenAI()

# 임베딩을 저장할 캐시 디렉토리 초기화
cache_dir = LocalFileStore("./.cache/")

# 텍스트 분할기 설정
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

# 문서를 로드하고 분할
loader = UnstructuredFileLoader("./rag_files/document.txt")
docs = loader.load_and_split(text_splitter=splitter)

# 임베딩 설정
embeddings = OpenAIEmbeddings()

# 성능 최적화를 위해 임베딩을 캐시로 저장
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

# Chroma 벡터 스토어 초기화
vectorstore = Chroma.from_documents(docs, cached_embeddings)

# 검색기 설정
retriever = vectorstore.as_retriever()

# 대화 메모리 초기화
memory = ConversationBufferMemory()

# RetrievalQA 체인을 사용하여 RAG 파이프라인 생성
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    memory=memory,
)

In [2]:
chain.run("Is Aaronson guilty?")

'Yes, according to the text, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'

In [3]:
chain.run("What message did he write in the table?")

'He wrote "2+2=5" in the dust on the table.'

In [5]:
chain.run("Who is Julia?")

'Julia is a character in George Orwell\'s novel "1984." She is Winston Smith\'s lover and a fellow rebel against the oppressive Party in Oceania. Julia is portrayed as a passionate, independent woman who becomes involved in a romantic relationship with Winston despite the strict rules and surveillance of the Party.'