In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain_community.vectorstores import ElasticsearchStore
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import multiprocessing
import os

In [None]:
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, ".."))
MODEL_PATH = os.path.join(project_root, "ai_models", "hyperclova", "hyperclova-seed-text-1.5b-q4-k-m.gguf")

In [None]:
# base_system_prompt.txt 로드
with open("./prompts/system/base_system_prompt.txt", "r", encoding="utf-8") as f:
    base_system_prompt = f.read()

# qa_prompt.txt 로드 
"F:\chat_test\prompt\prompts\tasks\prompts\tasks\qa_prompt.txt"
with open("./prompts/tasks/qa_prompt.txt", "r", encoding="utf-8") as f:
    qa_prompt = f.read()

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="../ai_models/base_models/BGE-m3-ko",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

In [None]:
vectorstore = ElasticsearchStore(
    es_url="http://localhost:9200",
    index_name="documents",
    embedding=embeddings
)

In [None]:
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 3}
)

In [None]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
llm = LlamaCpp(
    model_path=MODEL_PATH,
    temperature=0.7,
    max_tokens=512,
    top_p=1,
    callback_manager=callback_manager, 
    verbose=True,
    n_ctx=2048,  # 컨텍스트 길이
    n_threads=multiprocessing.cpu_count() - 1,
)

In [None]:
template=f"{base_system_prompt}\n{{context}}\n\n{qa_prompt}"
prompt_template = PromptTemplate.from_template(template=template, template_format="f-string")

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,                     # llama-cpp나 OpenAI 등 langchain-compatible LLM
    retriever=retriever,         # langchain-compatible retriever
    chain_type="stuff",          # "stuff", "map_reduce", "refine" 중 선택
    chain_type_kwargs={"prompt": prompt_template}  # PromptTemplate을 전달
)


In [None]:
rag_chain.invoke("피지컬컬 AI 시장 동향 알려줘").get("result")