In [None]:
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFacePipeline
from typing import List, Literal, Optional, TypedDict
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langgraph.graph import StateGraph, END


def load_word_documents(filepath: str) :
    loader = UnstructuredWordDocumentLoader(filepath)
    docs = loader.load()
    return docs

def save_documents_to_chromadb(docs: list, persist_directory: str, embedding_model_name: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") -> Chroma:
    embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_name)
    vector_store = Chroma.from_documents(
        documents=docs,
        embedding=embedding_model,
        persist_directory=persist_directory
)
    print(f"Documents have been saved to ChromaDB at {persist_directory}.")
    return vector_store

# Example usage
docs = load_word_documents("NewBie_개발환경가이드.docx")
vector_store = save_documents_to_chromadb(docs, persist_directory="./chromaDB_Chatbot")


# ---------- Graph State 정의 ----------
def GraphState(TypedDict, total=False):
    query: str
    route: Literal["rag"]              # 지금은 rag만 사용
    retrieved_docs: List[str]
    answer: str

# ---------- 공통 LLM 생성 유틸 ----------
def build_zephyr_llm():
    rewrite_model_id = "HuggingFaceH4/zephyr-7b-beta"
    rewrite_token = AutoTokenizer.from_pretrained(rewrite_model_id)
    rewrite_model = AutoModelForCausalLM.from_pretrained(rewrite_model_id)
    rewrite_pipe = pipeline(
        "text-generation",
        model=rewrite_model,
        tokenizer=rewrite_token,
        max_new_tokens=512,
    )
    return HuggingFacePipeline(pipeline=rewrite_pipe)

def build_llama3_llm():
    model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
    tok = AutoTokenizer.from_pretrained(model_name)
    mdl = AutoModelForCausalLM.from_pretrained(model_name)
    pipe = pipeline(
        "text-generation",
        model=mdl,
        tokenizer=tok,
        max_new_tokens=1024,
    )
    return HuggingFacePipeline(pipeline=pipe)

def build_retriever(chroma_persist_dir: str):
    embed_model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
    embedding = HuggingFaceEmbeddings(model_name=embed_model_name)

    retriever = Chroma(
        collection_name="onboarding_docs",
        persist_directory=chroma_persist_dir,
        embedding_function=embedding,
    )
    return retriever.as_retriever(search_kwargs={"k": 5})



def supervisor_agent(state: GraphState) -> GraphState:
    """
    입력: GraphState(query)
    출력: GraphState(route)
    현재는 단순히 모든 요청을 RAG로 라우팅.
    나중에는 LLM을 써서 질의 유형 분석 후 분기 가능.
    """
    query = state["query"]

    # 예시: LLM을 써서 route를 결정하는 형태 (지금은 주석 처리)
    # prompt = f"""
    # 너는 라우팅 에이전트야.
    # 사용자의 질문이 단순 정보 질의라면 'rag'만 출력해.
    # 질문: {query}
    # 출력(딱 한 단어): 
    # """
    # route_text = supervisor_llm(prompt).strip()
    # route = "rag" if "rag" in route_text else "rag"

    route = "rag"  # 현재는 단일 경로

    return {
        **state,
        "route": route,
    }

def supervisor_node(state: GraphState):
    return supervisor_agent(state)

rewrite_llm = build_zephyr_llm()
def rag_orchestrator_agent(state: GraphState) -> GraphState:
    """
    - 질의를 검색에 적합하게 살짝 리라이팅(옵션)
    - 지금은 단순히 query를 정제해서 그대로 넘김
    """
    query = state["query"]

    # 간단한 query rewriting 예시 (LLM 사용 가능)
    prompt = f"""
    너는 검색용 쿼리 리라이팅 에이전트야.
    아래 한국어/영어 혼합 질문을 RAG 검색에 적합한
    짧은 한 문장의 쿼리로 바꿔줘.

    질문: {query}
    리라이팅된 검색 쿼리:
    """
    rewritten = rewrite_llm(prompt)
    rewritten_query = rewritten.strip()

    return {
        **state,
        "query": rewritten_query,  # 정제된 쿼리를 다음 노드에 전달
    }

def rag_orchestrator_node(state: GraphState):
    return rag_orchestrator_agent(state)

retriever = build_retriever(chroma_persist_dir="./chroma_onboarding")
def retrieval_agent(state: GraphState):
    query = state["query"]

    docs = retriever.get_relevant_documents(query)
    contents: List[str] = [d.page_content for d in docs]

    return {
        **state,
        "retrieved_docs": contents,
    }

def retrieval_node(state: GraphState):
    return retrieval_agent(state)


answer_llm = build_llama3_llm()
def answer_generation_agent(state: GraphState) -> GraphState:
    query = state["query"]
    docs = state.get("retrieved_docs", [])

    context = "\n\n---\n\n".join(docs[:5])  # 너무 길면 상위 몇 개만 사용

    prompt = f"""
    너는 개발 환경 온보딩/트러블슈팅 도우미야.
    아래는 Git, VSCode, Linux 등의 설정/문제 해결 관련 사내 문서 일부야.

    [컨텍스트]
    {context}

    [사용자 질문]
    {query}

    위 컨텍스트를 우선적으로 사용해서,
    한국어로 친절하고 구체적으로 답변해줘.
    모르면 모른다고 말하고, 추측은 최소화해.
    """

    answer = answer_llm(prompt)

    return {
        **state,
        "answer": answer.strip(),
    }
def answer_generation_node(state: GraphState) -> GraphState:
    return answer_generation_agent(state)


def build_graph():
    workflow = StateGraph(GraphState)

    workflow.add_node("supervisor", supervisor_node)
    #workflow.add_node("rag_orchestrator", rag_orchestrator_node)
    workflow.add_node("retrieval", retrieval_node)
    workflow.add_node("answer_generation", answer_generation_node)

    # 시작 노드
    workflow.set_entry_point("supervisor")

    # supervisor → rag_orchestrator → retrieval → answer_generation → END)
    workflow.add_edge("supervisor", "retrieval_node")
    #workflow.add_edge("rag_orchestrator", "retrieval")
    workflow.add_edge("retrieval", "answer_generation")
    workflow.add_edge("answer_generation", END)

    graph = workflow.compile()
    return graph



graph = build_graph()
init_state: GraphState = {"query": "VSCode에서 Python venv가 인식 안 될 때 설정 방법 알려줘"}
result = graph.invoke(init_state)
print(result["answer"])

Documents have been saved to ChromaDB at ./chromaDB_Chatbot.
