* Stuff Documents 체인을 사용하여 완전한 RAG 파이프라인을 구현하세요.  
* 체인을 수동으로 구현해야 합니다. 
* 체인에 ConversationBufferMemory를 부여합니다. 
* 이 문서를 사용하여 RAG를 수행하세요: https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223 
* 체인에 다음 질문을 합니다: 
    * Aaronson 은 유죄인가요? 
    * 그가 테이블에 어떤 메시지를 썼나요? 
    * Julia 는 누구인가요? 


In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
import os

# Step 1: Load the document
file_path = "document.txt"  # Ensure this file exists locally in the same folder
loader = TextLoader(file_path)
documents = loader.load()

# Step 2: Split the document into fragments
text_splitter = CharacterTextSplitter(
    chunk_size=500,  # Adjust the chunk size based on your requirements
    chunk_overlap=50
)
split_docs = text_splitter.split_documents(documents)

# Step 3: Generate embeddings and use cache (OpenAIEmbeddings)
embeddings = OpenAIEmbeddings()  # Assumes OpenAI API key is set in the environment

# Step 4: Create the FAISS vector store
vector_store = FAISS.from_documents(split_docs, embeddings)

# Step 5: Set up memory for conversational context
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Step 6: Create the conversational retrieval chain
llm = OpenAI(model="gpt-3.5-turbo", temperature=0)  # Replace with your preferred LLM
retrieval_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_store.as_retriever(),
    memory=memory
)

# Step 7: Ask questions to the chain
questions = [
    "Is Aaronson guilty?",
    "What message did he write on the table?",
    "Who is Julia?"
]

# Interact with the chain
for question in questions:
    response = retrieval_chain.run(question)
    print(f"Question: {question}")
    print(f"Answer: {response}\n")


Created a chunk of size 717, which is longer than the specified 500
Created a chunk of size 608, which is longer than the specified 500
Created a chunk of size 642, which is longer than the specified 500
Created a chunk of size 1444, which is longer than the specified 500
Created a chunk of size 1251, which is longer than the specified 500
Created a chunk of size 1012, which is longer than the specified 500
Created a chunk of size 584, which is longer than the specified 500
Created a chunk of size 2313, which is longer than the specified 500
Created a chunk of size 1458, which is longer than the specified 500
Created a chunk of size 572, which is longer than the specified 500
Created a chunk of size 529, which is longer than the specified 500
Created a chunk of size 1673, which is longer than the specified 500
Created a chunk of size 742, which is longer than the specified 500
Created a chunk of size 669, which is longer than the specified 500
Created a chunk of size 906, which is long

InvalidRequestError: This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?

In [3]:
from langchain.chat_models import ChatOpenAI  # OpenAI의 챗 모델을 사용하기 위해 임포트
from langchain.document_loaders import TextLoader  # 텍스트 문서를 로드하는 모듈
from langchain.text_splitter import CharacterTextSplitter  # 텍스트를 특정 기준으로 분리하기 위한 모듈
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings  # 임베딩과 캐시를 위한 모듈
from langchain.vectorstores import FAISS  # 벡터 저장소인 FAISS를 사용하기 위한 모듈
from langchain.storage import LocalFileStore  # 로컬 파일 시스템에 데이터를 저장하기 위한 모듈
from langchain.schema.runnable import RunnablePassthrough  # 데이터 흐름 제어를 위한 패스스루 모듈
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder  # 대화형 프롬프트 템플릿 생성 모듈
from langchain.memory import ConversationBufferMemory  # 대화 내용을 저장하고 관리하기 위한 메모리 모듈

# 언어 모델 초기화, temperature는 응답의 창의성을 조절
llm = ChatOpenAI(
    temperature=0.1,
)

# 캐시 데이터를 저장할 로컬 디렉터리 경로 설정
cache_dir = LocalFileStore("./.cache/")

# 텍스트 분리 설정
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",  # 텍스트를 줄 단위로 분리
    chunk_size=600,  # 각 조각의 최대 크기
    chunk_overlap=100,  # 조각 간의 겹치는 크기
)

# 텍스트 파일 로드 및 분리
loader = TextLoader("./document.txt")  # 텍스트 파일을 로드
docs = loader.load_and_split(text_splitter=splitter)  # 텍스트를 splitter 설정에 따라 분리

# OpenAI 임베딩 생성
embeddings = OpenAIEmbeddings()

# 캐시 지원 임베딩 생성, 이전 임베딩 결과를 재사용하도록 설정
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir,
)

# 문서를 벡터 저장소(FAISS)로 변환
vectorstore = FAISS.from_documents(docs, cached_embeddings)

# 대화 메모리 초기화, 이전 대화 내용을 저장 및 관리
memory = ConversationBufferMemory(
    llm=llm,
    max_token_limit=20,  # 저장할 대화 기록의 최대 토큰 수
    return_messages=True,  # 메모리에서 메시지 형태로 데이터를 반환
)

# 메모리에서 대화 기록 로드
def load_memory(_):
    return memory.load_memory_variables({})["history"]

# 벡터 저장소에서 검색 기능 제공
retriever = vectorstore.as_retriever()

# 대화형 프롬프트 템플릿 생성
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",  # 시스템의 기본 안내 메시지
        ),
        MessagesPlaceholder(variable_name="history"),  # 대화 기록 삽입
        ("human", "{question}"),  # 사용자의 질문 삽입
    ]
)

# 체인 구성: 데이터를 처리하는 여러 단계 연결
chain = (
    {
        "context": retriever,  # 벡터 저장소에서 컨텍스트 검색
        "question": RunnablePassthrough(),  # 질문을 그대로 전달
        "history": load_memory,  # 대화 기록 로드
    }
    | prompt  # 프롬프트 생성
    | llm  # 생성된 프롬프트를 모델에 전달하여 응답 생성
)

# 체인 실행 함수 정의
def invoke_chain(question):
    result = chain.invoke(question)  # 체인을 실행하여 결과 생성
    print(result)  # 결과 출력
    memory.save_context({"input": question}, {"output": result.content})  # 입력과 출력 저장


In [6]:
# Example usage
if __name__ == "__main__":
    # List of questions to ask the chain
    questions = [
        "Is Aaronson guilty?",
        "What message did he write on the table?",
        "Who is Julia?"
    ]
    
    # Loop through each question and invoke the chain
    for question in questions:
        print(f"Question: {question}")
        invoke_chain(question)  # Call the function to process the question
        print("\n")  # Add some spacing for clarity


Question: Is Aaronson guilty?
content='According to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'


Question: What message did he write on the table?


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in project proj_g9s6HNOKQv9gm52qQPXNPVG2 organization org-ceC9tz2b3da3lsyCowAVlRUu on tokens per min (TPM): Limit 5000, Used 2531, Requested 2799. Please try again in 3.96s. Visit https://platform.openai.com/account/rate-limits to learn more..


content='He wrote "FREEDOM IS SLAVERY" and "TWO AND TWO MAKE FIVE" on the table.'


Question: Who is Julia?


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in project proj_g9s6HNOKQv9gm52qQPXNPVG2 organization org-ceC9tz2b3da3lsyCowAVlRUu on tokens per min (TPM): Limit 5000, Used 4747, Requested 2786. Please try again in 30.396s. Visit https://platform.openai.com/account/rate-limits to learn more..


RateLimitError: Rate limit reached for gpt-3.5-turbo in project proj_g9s6HNOKQv9gm52qQPXNPVG2 organization org-ceC9tz2b3da3lsyCowAVlRUu on tokens per min (TPM): Limit 5000, Used 4399, Requested 2786. Please try again in 26.22s. Visit https://platform.openai.com/account/rate-limits to learn more.