In [1]:
from dotenv import load_dotenv

# API 키 정보 로드
load_dotenv()

True

In [2]:
from langchain_teddynote import logging

# 프로젝트 이름을 입력합니다.
logging.langsmith("CH11-Retriever")

LangSmith 추적을 시작합니다.
[프로젝트명]
CH11-Retriever


In [3]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.retrievers import BM25Retriever, EnsembleRetriever

# 샘플 문서 리스트
doc_list = [
    "I like apples",
    "I like apple company",
    "I like apple's iphone",
    "Apple is my favorite company",
    "I like apple's ipad",
    "I like apple's macbook",
]

# bm25 retriever와 faiss retriever를 초기화
bm25_retriever = BM25Retriever.from_texts(doc_list)
bm25_retriever.k = 1 # BM25Retriever의 검색 결과 개수를 1로 설정

embedding = OpenAIEmbeddings()
faiss_vectorstore = FAISS.from_texts(
    doc_list,
    embedding
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 1})

# 앙상블 retreiver를 초기화
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.7, 0.3]
)

In [4]:
# 검색 결과 문서를 가져옴
query = "my favorite fruit is apple"
ensemble_result = ensemble_retriever.invoke(query)
bm25_result = bm25_retriever.invoke(query)
faiss_result = faiss_retriever.invoke(query)

# 가져온 문서를 출력
print("[Ensemble Retriever]")
for doc in ensemble_result:
    print(f"Content: {doc.page_content}")
    print()

print("[BM25 Retriever]")
for doc in bm25_result:
    print(f"Content: {doc.page_content}")
    print()

print("[FAISS Retriever]")
for doc in faiss_result:
    print(f"Content: {doc.page_content}")
    print()

[Ensemble Retriever]
Content: Apple is my favorite company

Content: I like apples

[BM25 Retriever]
Content: Apple is my favorite company

[FAISS Retriever]
Content: I like apples



In [5]:
# 검색 결과 문서를 가져옵니다.
query = "Apple company makes my favorite iphone"
ensemble_result = ensemble_retriever.invoke(query)
bm25_result = bm25_retriever.invoke(query)
faiss_result = faiss_retriever.invoke(query)

# 가져온 문서를 출력합니다.
print("[Ensemble Retriever]")
for doc in ensemble_result:
    print(f"Content: {doc.page_content}")
    print()

print("[BM25 Retriever]")
for doc in bm25_result:
    print(f"Content: {doc.page_content}")
    print()

print("[FAISS Retriever]")
for doc in faiss_result:
    print(f"Content: {doc.page_content}")
    print()

[Ensemble Retriever]
Content: Apple is my favorite company

Content: I like apple's iphone

[BM25 Retriever]
Content: Apple is my favorite company

[FAISS Retriever]
Content: I like apple's iphone



# 런타임 Config 변경

In [6]:
from langchain_core.runnables import ConfigurableField

ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
).configurable_fields(
    weights=ConfigurableField(
        #검색 매개변수의 고유 식별자를 설정
        id="ensemble_wights",
        # 검색 매개변수의 이름을 설정
        name="Ensemble Weights",
        # 검색 매개변수에 대한 설명을 작성
        description="Ensemble Weights"
    )
)

In [7]:
config = {"configurable": {"ensemble_weights": [1, 0]}}

# config 매개변수를 사용하여 검색 설정을 지정
docs = ensemble_retriever.invoke("my favorite fruit is apple", config=config)
docs

[Document(metadata={}, page_content='Apple is my favorite company'),
 Document(id='f0b078c7-8f55-49e7-8b6d-1d01090bf52c', metadata={}, page_content='I like apples')]

In [8]:
config = {"configurable": {"ensemble_weights": [0, 1]}}

# config 매개변수를 사용하여 검색 설정을 지정
docs = ensemble_retriever.invoke("my favorite fruit is apple", config=config)
docs

[Document(metadata={}, page_content='Apple is my favorite company'),
 Document(id='f0b078c7-8f55-49e7-8b6d-1d01090bf52c', metadata={}, page_content='I like apples')]