In [1]:
%pip install -qU langchain langchain_community openai faiss-cpu wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25ldone
[?25h  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11680 sha256=128ee0a6df2d63045abe102a76abac5461f4c78b71fa06721e40638776ed4fef
  Stored in directory: /Users/jikime/Library/Caches/pip/wheels/63/47/7c/a9688349aa74d228ce0a9023229c6c0ac52ca2a40fe87679b8
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0
Note: you may need to restart the kernel to use updated packages.


In [25]:
from dotenv import load_dotenv

load_dotenv()

True

In [13]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser


# Wikipedia 검색기 초기화
retriever = WikipediaRetriever()

# 텍스트 분할기 초기화
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

# OpenAI 임베딩 초기화
embeddings = OpenAIEmbeddings()

# LLM 초기화
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")

# RAG 프롬프트 템플릿
template = """다음 정보를 사용하여 질문에 답변해주세요:

{context}

질문: {question}
답변:"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)


In [14]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def search_and_answer(query):
    # Wikipedia에서 관련 문서 검색
    docs = retriever.invoke(query)
    print(docs)
    return
    # 문서 분할
    splits = text_splitter.split_documents(docs)
    
    # 벡터 데이터베이스 생성
    vectorstore = FAISS.from_texts(splits, embeddings)
    
    # LCEL 체인 구성
    rag_chain = (
        {"context": vectorstore.as_retriever() | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    
    # 질문에 대한 답변 생성
    return rag_chain.invoke(query)


In [15]:
query = "What is the capital of France?"
answer = search_and_answer(query)
print(answer)

[Document(metadata={'title': 'Closed-ended question', 'summary': 'A closed-ended question refers to any question for which a researcher provides research participants with options from which to choose a response. Closed-ended questions are sometimes phrased as a statement which requires a response.\nA closed-ended question contrasts with an open-ended question, which cannot easily be answered with specific information.', 'source': 'https://en.wikipedia.org/wiki/Closed-ended_question'}, page_content='A closed-ended question refers to any question for which a researcher provides research participants with options from which to choose a response. Closed-ended questions are sometimes phrased as a statement which requires a response.\nA closed-ended question contrasts with an open-ended question, which cannot easily be answered with specific information.\n\n\n== Examples ==\nExamples of closed-ended questions which may elicit a "yes" or "no" response include:\n\nWere you born in 2010?\nIs L