In [3]:
import requests
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader, PyPDFLoader


load_dotenv()

# ✅ 1. 문서 로드 (Gist에서 가져오기)
# gist_url = "https://gist.githubusercontent.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223/raw"
# response = requests.get(gist_url)
# document_text = response.text

loader = TextLoader("./documents.txt")
documents = loader.load()

# ✅ 2. 문서의 텍스트만 추출
text_data = [doc.page_content for doc in documents]  # Document 객체에서 텍스트 추출

# ✅ 2. 문서 쪼개기 (RecursiveCharacterTextSplitter 사용)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.create_documents(text_data)

# ✅ 3. 임베딩 생성 (OpenAIEmbeddings 활용)
embeddings = OpenAIEmbeddings()

# ✅ 4. 벡터 스토어 생성 (FAISS 사용)
vector_store = FAISS.from_documents(documents, embeddings)

# ✅ 5. 대화 메모리 설정
memory = ConversationBufferMemory(memory_key="history", input_key="question")

# ✅ 6. StuffDocumentsChain 직접 구현
prompt_template = """You are an AI that answers questions based on documents
Please refer to the document below to respond to user inquiries.

documents:
{context}

question: {question}
answer:"""

prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

llm = OpenAI(temperature=0.5)  # OpenAI 모델 사용

def ask_question(question):
    # 🔎 1. 유사 문서 검색
    relevant_docs = vector_store.similarity_search(question, k=3)
    
    # 🔎 2. 문서 내용 합치기
    context = "\n\n".join([doc.page_content for doc in relevant_docs])
    
    # 🔎 3. LLM에 전달할 입력 구성
    chain = LLMChain(llm=llm, prompt=prompt, memory=memory)
    answer = chain.run({"context": context, "question": question})
    
    return answer

# ✅ 7. 질문 실행
questions = [
    "Is Aaronson guilty?",
    "What message did he write in the table?",
    "Who is Julia?",
]

for q in questions:
    print(f"🧐 질문: {q}")
    print(f"🤖 답변: {ask_question(q)}\n")

🧐 질문: Is Aaronson guilty?
🤖 답변:  Yes, Aaronson is guilty of the crimes he was charged with.

🧐 질문: What message did he write in the table?
🤖 답변:  FREEDOM IS SLAVERY and TWO AND TWO MAKE FIVE

🧐 질문: Who is Julia?
🤖 답변:  Julia is a person mentioned in the documents. She is someone who is loved by the speaker and is in need of help.

