In [1]:
!pip install faiss-cpu sentence-transformers pandas


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import faiss
import pickle
from sentence_transformers import SentenceTransformer, util

In [None]:
def load_faiss_db(folder_path):
    index = faiss.read_index(f"{folder_path}/faiss.index")
    with open(f"{folder_path}/texts.pkl", "rb") as f:
        texts = pickle.load(f)
    with open(f"{folder_path}/metadatas.pkl", "rb") as f:
        metadatas = pickle.load(f)
    return index, texts, metadatas

def evaluate_model(model, index, texts, metadatas, query_gt_pairs, k=3):
    results = []

    for item in query_gt_pairs:
        query = item["query"]
        ground_truth = item["ground_truth"]

        query_vec = model.encode([query], normalize_embeddings=True).astype("float32")
        D, I = index.search(query_vec, k)

        top_titles = [metadatas[idx]["title"] for idx in I[0]]
        found = any(ground_truth.strip() in title for title in top_titles)
        top1_sim = float(D[0][0])

        results.append({
            "query": query,
            "ground_truth": ground_truth,
            "top1_title": top_titles[0],
            "top1_similarity": round(top1_sim, 4),
            "found_in_top3": found
        })
    return pd.DataFrame(results)


In [None]:
# ✅ 4. 모델 & 데이터 불러오기
# 평가 쿼리셋
query_df = pd.read_csv("query_gt_pairs.csv")
query_gt_pairs = query_df.to_dict(orient="records")

# BGE
bge_model = SentenceTransformer("dragonkue/BGE-m3-ko", token="")
bge_index, bge_texts, bge_metadatas = load_faiss_db("bge_faiss_db")

# KURE
kure_model = SentenceTransformer("nlpai-lab/KURE-v1")
kure_index, kure_texts, kure_metadatas = load_faiss_db("kure_faiss_db")


In [None]:
# ✅ 5. 평가 실행
bge_results = evaluate_model(bge_model, bge_index, bge_texts, bge_metadatas, query_gt_pairs)
kure_results = evaluate_model(kure_model, kure_index, kure_texts, kure_metadatas, query_gt_pairs)


In [None]:
# ✅ 6. 결과 비교 출력
bge_results["model"] = "BGE"
kure_results["model"] = "KURE"
final_df = pd.concat([bge_results, kure_results]).reset_index(drop=True)

# 정량 평가
summary = final_df.groupby("model")[["top1_similarity", "found_in_top3"]].mean()
print("📊 성능 비교 요약:")
print(summary)

# 개별 결과 출력
final_df[["model", "query", "top1_title", "top1_similarity", "found_in_top3"]]


📊 성능 비교 요약:
       top1_similarity  found_in_top3
model                                
BGE           0.699789       0.555556
KURE          0.746000       0.555556


Unnamed: 0,model,query,top1_title,top1_similarity,found_in_top3
0,BGE,강아지가 털이 빠지고 자꾸 긁어요. 왜 그런가요?,"자꾸만 몸 긁는 강아지, 혹시 병일까원인은 이것",0.7006,False
1,BGE,강아지가 토하고 설사를 해요. 식중독일까요?,강아지 구토 설사 원인 식중독 증상 치료,0.6868,True
2,BGE,강아지 귀가 부어있고 아파해요. 무슨 병일까요?,강아지 귀질환 이개혈종 귀가 붓고 통증 있는 귓병 치료,0.681,True
3,BGE,강아지 잇몸색이 이상해요. 건강에 문제가 있을까요?,강아지 잇몸색 구강관리 건강 상태 체크,0.7256,True
4,BGE,강아지가 구토를 자주 해요. 이유가 뭘까요?,강아지 구토 이유 | 증상 및 대처 방법,0.7203,False
5,BGE,강아지가 피부를 핥고 상처가 생겼어요. 치료 방법이 궁금해요.,"강아지 가려움, 강아지가 자꾸 긁는 이유 5가지 및 해결책",0.6815,False
6,BGE,강아지가 밥을 안 먹고 구토를 해요. 병원에 가야 할까요?,우리집 강아지가 구토를 했어요,0.7143,False
7,BGE,강아지가 귀를 자꾸 긁고 붓기가 있어요.,강아지 귀질환 이개혈종 귀가 붓고 통증 있는 귓병 치료,0.6782,True
8,BGE,강아지 잇몸이 누렇게 변했어요. 괜찮은가요?,"강아지 입술, 검은색에서 다른 색으로 변한다면?",0.7098,True
9,KURE,강아지가 털이 빠지고 자꾸 긁어요. 왜 그런가요?,"자꾸만 몸 긁는 강아지, 혹시 병일까원인은 이것",0.7462,False


In [4]:
!pip install -U langchain_openai
!pip install -U langchain_community

Collecting langchain_openai
  Downloading langchain_openai-0.3.17-py3-none-any.whl.metadata (2.3 kB)
Downloading langchain_openai-0.3.17-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.9/62.9 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_openai
Successfully installed langchain_openai-0.3.17


In [6]:
import openai
from langchain_openai import OpenAIEmbeddings
import os

In [None]:
# ✅ OpenAI 임베딩용 모듈 불러오기
os.environ["OPENAI_API_KEY"] = ""

# 2. 임베딩 모델 생성
openai_embedder = OpenAIEmbeddings(model="text-embedding-3-small")

# ✅ OpenAI 벡터 DB 로드
openai_index = faiss.read_index("openai_faiss_db/index.faiss")
with open("openai_faiss_db/index.pkl", "rb") as f:
    openai_texts = pickle.load(f)

# ✅ 평가 함수 (OpenAI용)
def evaluate_openai(embedding_model, index, texts, query_gt_pairs, k=3):
    results = []
    for item in query_gt_pairs:
        query = item["query"]
        ground_truth = item["ground_truth"]

        try:
            query_vec = embedding_model.embed_query(query)
        except Exception as e:
            print(f"❌ 임베딩 실패: {query} -> {e}")
            continue

        query_vec = np.array([query_vec]).astype("float32")
        D, I = index.search(query_vec, k)
        # =========임수여니 코드 ========
        # top_texts = [texts[idx] for idx in I[0]]
        # ==========다인 수정 코드 ==========
        top_texts = [texts[idx] for idx in I[0] if idx != -1 and idx < len(texts)]
        found = any(ground_truth.strip() in t for t in top_texts)
        top1_sim = float(D[0][0])

# ====== 임수여니 코드 ===========
        # results.append({
        #     "query": query,
        #     "ground_truth": ground_truth,
        #     "top1_excerpt": top_texts[0][:100],
        #     "top1_similarity": round(top1_sim, 4),
        #     "found_in_top3": found
        # })

# ========= 다인 수정 코드 ===============
        results.append({
            "query": query,
            "ground_truth": ground_truth,
            "top1_excerpt": top_texts[0][:100] if top_texts else "(검색 결과 없음)",
            "top1_similarity": round(top1_sim, 4) if top1_sim is not None else None,
            "found_in_top3": found
        })

    return pd.DataFrame(results)


In [14]:
# ✅ OpenAI 모델 준비
openai_embedder = OpenAIEmbeddings(model="text-embedding-3-small")

# ✅ 평가 실행
openai_results = evaluate_openai(openai_embedder, openai_index, openai_texts, query_gt_pairs)
openai_results["model"] = "OpenAI"


In [15]:
print("🔍 벡터 개수:", openai_index.ntotal)
print("🧾 문서 개수:", len(openai_texts))


🔍 벡터 개수: 6500
🧾 문서 개수: 2


In [None]:
# 기존 BGE & KURE 결과도 통일된 형태로 맞추기
bge_results["model"] = "BGE"
kure_results["model"] = "KURE"

combined_df = pd.concat([bge_results, kure_results, openai_results]).reset_index(drop=True)

# 평균 성능 비교
summary = combined_df.groupby("model")[["top1_similarity", "found_in_top3"]].mean()
print("📊 세 모델 성능 요약 비교:")
print(summary)

# 개별 결과 확인
combined_df[["model", "query", "top1_similarity", "found_in_top3"]]
