In [5]:
import pandas as pd
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

df = pd.read_csv("./data/data_topic.csv", encoding="utf8")
df.head(10)

texts = df["text"].tolist()
topics = df["topic"].tolist()
docs = []
for i in range(len(texts)):
    text = texts[i]
    topic = topics[i]
    doc = Document(page_content=text, metadata={"topic": topic})
    docs.append(doc)

docs[0:3]

embedding_model= HuggingFaceEmbeddings(model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding = embedding_model, 
    persist_directory="./chromaDB_topic"
)


In [None]:
import uuid
import pandas as pd
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langgraph.graph import StateGraph, END
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForQuestionAnswering, pipeline
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableConfig
from langchain_core.chat_history import BaseChatMessageHistory


embdedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraprhrase-multilingual-MiniLM-L12-v2")
model_id = "monologg/koelecsmall-v2-finetuned-korquad"
qa_tokenizer = AutoTokenizer.from_pretrained(model_id)
qa_model = AutoModelForQuestionAnswering.from_pretrained(model_id)
text_gen = pipeline(
    "question-answering",
    model = qa_model,
    tokenizer = qa_tokenizer,
    device =-1   
    max_length=512,
    do_sample=False,
    temperature=0.1,
    truncation=True
)
llm = HuggingFacePipeline(pipeline=text_gen)


chats_by_ssetion_id = {}

def get_chat_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in chats_by_ssetion_id:
        chats_by_ssetion_id[session_id] = InMemoryChatMessageHistory()
    return chats_by_ssetion_id[session_id]

def ask_question(state, config:RunnableConfig):
    session_id = config["configurable"]["session_id"]
    chat_history = get_chat_history(session_id)

    question = input("질문을 입력하세요: ").strip()
    topic = input("관련 토픽을 입력해주세요 (공정 유형, 유지보수, 이상대응, 전산관리, 안전규칙):").strip()

    retriver = Chroma(
        embedding_function = embdedding_model,
        persist_directory="./chromaDB_topic"
    ).as_retriever(search_kwargs={"k":3, "filter": {"topic": topic}})

    docs = retriver.invoke(question)
    top_docs = docs[:3]
    best_contexts = [doc.page_content for doc in top_docs]
    combined_context = "\n".join(best_contexts)

    history_text = get_buffer_string(chat_history.messages)
    full_context = f"{history_text}\n{combined_context}" if history_text else combined_context

    result = text_gen(question=question, context=full_context)

    chat_history.add_user_message(question)
    chat_history.add_ai_message(result['answer'])

    return {
        "question": question,
        "answer": result['answer'],
        
        "context": best_contexts
    }