## BM25_Retriever


In [1]:
from dotenv import load_dotenv
from langchain_teddynote.community.pinecone import init_pinecone_index
from langchain_upstage.embeddings import UpstageEmbeddings
from langchain_teddynote.community.pinecone import PineconeKiwiHybridRetriever
from langchain_teddynote.korean import stopwords
import os

load_dotenv()


pinecone_params = init_pinecone_index(
    index_name="globalmacro-chatbot",
    namespace="financical-data-00",
    api_key=os.environ["PINECONE_API_KEY"],
    sparse_encoder_path="../data/sparse_encoder_01.pkl",
    stopwords=stopwords(),
    tokenizer="kiwi",
    embeddings=UpstageEmbeddings(model="solar-embedding-1-large-query"),
    top_k=10,
    alpha=0.4,  # alpha=0.75로 설정한 경우, (0.75: Dense Embedding, 0.25: Sparse Embedding)
)


pinecone_retriever = PineconeKiwiHybridRetriever(**pinecone_params)

[init_pinecone_index]
{'dimension': 4096,
 'index_fullness': 0.0,
 'namespaces': {'financical-data-00': {'vector_count': 2012}},
 'total_vector_count': 2012}


# Reranker


## Cohere


In [None]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

compressor = CohereRerank(model="rerank-multilingual-v3.0")

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=pinecone_retriever
)

compressed_docs = compression_retriever.invoke("90년대 사건들을 나열해줘")
compressed_docs

## Jina


In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors import JinaRerank

compressor = JinaRerank(model="jina-reranker-v2-base-multilingual", top_n=5)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=pinecone_retriever
)

compressed_docs = compression_retriever.invoke("90년대 사건들을 나열해줘")

compressed_docs

## bge-reranker-v2-m3


In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")

compressor = CrossEncoderReranker(model=model, top_n=5)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=pinecone_retriever
)

compressed_docs = compression_retriever.invoke("90년대 사건들을 나열해줘")

compressed_docs

## Ko-reranker


In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

model = HuggingFaceCrossEncoder(model_name="Dongjin-kr/ko-reranker")

compressor = CrossEncoderReranker(model=model, top_n=5)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=pinecone_retriever
)

compressed_docs = compression_retriever.invoke("90년대 사건들을 나열해줘")

compressed_docs

# Reranker TestSet


In [2]:
from datasets import Dataset
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import PromptTemplate
import pandas as pd
import sys
import os
import json

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
from DataProcessing.utils import load_yaml
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_community.document_compressors import JinaRerank
from langchain_cohere import CohereRerank


reranker_list = ["cohere", "jina", "bge", "ko-reranker"]

for reranker in reranker_list:
    path = "./data/custom_testdataset.xlsx"
    # testset = pd.read_csv(path)
    testset = pd.read_excel(path)

    questions = testset["question"].to_list()
    ground_truth = testset["ground_truth"].to_list()

    data = {"question": [], "answer": [], "contexts": [], "ground_truth": ground_truth}

    prompt_template = load_yaml("../prompts/Retriever._prompt.yaml")["prompt"]
    prompt = PromptTemplate.from_template(prompt_template)
    # llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.5)
    llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0.5)

    if reranker == "cohere":
        compressor = CohereRerank(model="rerank-multilingual-v3.0")
    elif reranker == "jina":
        compressor = JinaRerank(model="jina-reranker-v2-base-multilingual", top_n=5)
    elif reranker == "bge":
        model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
        compressor = CrossEncoderReranker(model=model, top_n=5)
    elif reranker == "ko-reranker":
        model = HuggingFaceCrossEncoder(model_name="Dongjin-kr/ko-reranker")
        compressor = CrossEncoderReranker(model=model, top_n=5)

    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, base_retriever=pinecone_retriever
    )

    rag_chain = (
        {"context": compression_retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    for query in questions:
        data["question"].append(query)
        data["answer"].append(rag_chain.invoke(query))
        data["contexts"].append(
            [doc.page_content for doc in compression_retriever.invoke(query)]
        )

    path = f"./data/customtestset_sonnet_bm25_{reranker}.json"
    with open(path, "w") as file:
        json.dump(data, file)

# Evaluation


In [None]:
import pandas as pd
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)
from langchain_upstage.embeddings import UpstageEmbeddings
from langchain_openai import ChatOpenAI
import os
import json
from datasets import Dataset
from dotenv import load_dotenv


def run_evaluate(json_path: str, testcount: int = 0):
    load_dotenv()
    with open(json_path, "r", encoding="utf-8") as file:
        json_data = json.load(file)

    filename = os.path.basename(json_path).split(".")[0]

    dataset = Dataset.from_dict(json_data)

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1)
    embeddings = UpstageEmbeddings(model="solar-embedding-1-large-passage")

    result = evaluate(
        dataset=dataset,
        metrics=[
            context_precision,
            context_recall,
            faithfulness,
            answer_relevancy,
        ],
        llm=llm,
        embeddings=embeddings,
    )

    df = result.to_pandas()
    df.to_csv(f"./data/results/result_{filename}_{testcount}.csv")


base_path = "./data"
json_list = [
    x for x in os.listdir(base_path) if x.startswith("customtestset_sonnet_bm25")
]
print(json_list)
for path in json_list:
    file_path = os.path.join(base_path, path)
    run_evaluate(file_path, 0)
    print("Done")

In [4]:
import pandas as pd
import os

base_path = "./data/results"
result_list = os.listdir(base_path)


def print_describe(base_path, filename):
    path = os.path.join(base_path, filename)
    df = pd.read_csv(path, index_col=0)
    df = df[["context_precision", "context_recall", "faithfulness", "answer_relevancy"]]
    return df.agg(["mean", "std"])


result_df_mean = pd.DataFrame()
result_df_std = pd.DataFrame()

for file in result_list:
    if file.startswith("result_customtestset_sonnet_bm25"):
        result = print_describe(base_path, file)
        file_name = file.replace("result_customtestset_", "").replace("_0.csv", "")

        result_mean = result.loc["mean"]
        result_mean["파일명"] = file_name
        result_df_mean = pd.concat([result_df_mean, result_mean.to_frame().T])

        result_std = result.loc["std"]
        result_std["파일명"] = file_name
        result_df_std = pd.concat([result_df_std, result_std.to_frame().T])

In [5]:
result_df_mean

Unnamed: 0,context_precision,context_recall,faithfulness,answer_relevancy,파일명
mean,0.966393,0.549444,0.695256,0.036348,sonnet_bm25
mean,1.0,0.253333,0.780321,0.055719,sonnet_bm25_bge
mean,1.0,0.452857,0.667213,0.049887,sonnet_bm25_cohere
mean,0.98875,0.240714,0.628002,0.032591,sonnet_bm25_jina
mean,0.98875,0.52,0.629102,0.025016,sonnet_bm25_ko-reranker


In [6]:
result_df_std

Unnamed: 0,context_precision,context_recall,faithfulness,answer_relevancy,파일명
std,0.074134,0.449237,0.38319,0.089534,sonnet_bm25
std,0.0,0.327806,0.345748,0.121834,sonnet_bm25_bge
std,0.0,0.429842,0.434855,0.096625,sonnet_bm25_cohere
std,0.035576,0.38841,0.338389,0.084572,sonnet_bm25_jina
std,0.035576,0.473286,0.347224,0.06592,sonnet_bm25_ko-reranker
