In [4]:
from sentence_transformers import SentenceTransformer
import pandas as pd

# 모델 로드 (한국어 SBERT)
model = SentenceTransformer("snunlp/KR-SBERT-V40K-klueNLI-augSTS")

# CSV 로드
df = pd.read_csv("../data/school_info/dmu_menu_urls.csv")
titles = df['title'].tolist()
urls = df['url'].tolist()

# 타이틀 임베딩
title_vectors = model.encode(titles, convert_to_tensor=True)

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
import torch

def find_best_url(query, top_k=1):
    query_vec = model.encode([query], convert_to_tensor=True)
    scores = cosine_similarity(query_vec.cpu().numpy(), title_vectors.cpu().numpy())[0]
    top_idx = scores.argsort()[-top_k:][::-1]
    results = [(titles[i], urls[i], scores[i]) for i in top_idx]
    return results

In [6]:
query = "경영학부 사이트 알려줘"
result = find_best_url(query)[0]

print(f"📌 관련 메뉴: {result[0]}")
print(f"👉 링크: {result[1]}")

📌 관련 메뉴: 대학생활/경영학부
👉 링크: https://www.dongyang.ac.kr/dmu/4873/subview.do
