In [18]:
from dotenv import load_dotenv
import os

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = os.getenv("LANGCHAIN_ENDPOINT")
LANGCHAIN_PROJECT_ID = os.getenv("LANGCHAIN_PROJECT_ID")

BM25Retriever와 FAISS 검색기 결합

In [19]:
!pip install -qU langchain_openai langchain-community rank_bm25 faiss-cpu

In [20]:
# 샘플 문서 리스트
doc_list = [
    "I like apples",
    "I like apple company",
    "I like apple's iphone",
    "Apple is my favorite company",
    "I like apple's ipad",
    "I like apple's macbook",
]

In [21]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# bm25 retriever와 faiss retriever를 초기화합니다.
bm25_retriever = BM25Retriever.from_texts(
    doc_list,
)
bm25_retriever.k = 1  # BM25Retriever의 검색 결과 개수를 1로 설정합니다.

embedding = OpenAIEmbeddings()  # OpenAI 임베딩을 사용합니다.
faiss_vectorstore = FAISS.from_texts(
    doc_list,
    embedding,
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 1})

# 앙상블 retriever를 초기화합니다.
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.7, 0.3],
)

In [22]:
# 질문
query = "my favorite fruit is apple"

In [23]:
# 앙상블 리트리버를 사용한 결과
ensemble_result = ensemble_retriever.invoke(query)

# bm25 리트리버를 사용한 결과: keyword-based search
bm25_result = bm25_retriever.invoke(query)

# faiss 리트리버를 사용한 결과: embedding-based search
faiss_result = faiss_retriever.invoke(query)

In [24]:
# 가져온 문서를 출력합니다.
print("[Ensemble Retriever]")
for doc in ensemble_result:
    print(f"Content: {doc.page_content}")
    print()

print("[BM25 Retriever]")
for doc in bm25_result:
    print(f"Content: {doc.page_content}")
    print()

print("[FAISS Retriever]")
for doc in faiss_result:
    print(f"Content: {doc.page_content}")
    print()

[Ensemble Retriever]
Content: Apple is my favorite company

Content: I like apples

[BM25 Retriever]
Content: Apple is my favorite company

[FAISS Retriever]
Content: I like apples



In [26]:
from langchain_core.runnables import ConfigurableField

ensemble_retriever = EnsembleRetriever(
    # 리트리버 목록을 설정합니다. 여기서는 bm25_retriever와 faiss_retriever를 사용합니다.
    retrievers=[bm25_retriever, faiss_retriever],
).configurable_fields(
    weights=ConfigurableField(
        # 검색 매개변수의 고유 식별자를 설정합니다.
        id="ensemble_weights",
        # 검색 매개변수의 이름을 설정합니다.
        name="Ensemble Weights",
        # 검색 매개변수에 대한 설명을 작성합니다.
        description="Ensemble Weights",
    )
)

In [34]:
config_1 = {"configurable":{"ensemble_weights": [0.9, 0.1]}}

docs_1 = ensemble_retriever.invoke("my favorite fruit is apple", config=config_1)
docs_1

[Document(metadata={}, page_content='Apple is my favorite company'),
 Document(id='8e71a981-0475-4b8a-b961-7541ce0483e3', metadata={}, page_content='I like apples')]

In [32]:
config_2 = {"configurable":{"ensemble_weights": [0.3, 0.7]}}

docs_2 = ensemble_retriever.invoke("my favorite fruit is apple", config=config_2)
docs_2

[Document(id='8e71a981-0475-4b8a-b961-7541ce0483e3', metadata={}, page_content='I like apples'),
 Document(metadata={}, page_content='Apple is my favorite company')]