In [None]:


import json
import os
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.document import Document
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.runnables import RunnableConfig

OPENAI_API_KEY = "sk-GqA4Uj6iZXaykbOzIlFGtmdJr6VqiX94NhhjPZaf81kylRzh"
OPENAI_API_BASE = "https://api.opentyphoon.ai/v1"
MODEL_NAME = "typhoon-v2-70b-instruct"
EMBEDDING_MODEL_NAME = "BAAI/bge-m3"
JSON_PATH = "Jsonfile\M.JSON"
SIMILARITY_THRESHOLD = 0.7

def format_value(value):
    if isinstance(value, list):
        return "\n".join([f"- {item}" for item in value]) if value else "ไม่มีข้อมูล"
    elif isinstance(value, dict):
        return "\n".join([f"  {k}: {format_value(v)}" for k, v in value.items()]) if value else "ไม่มีข้อมูล"
    elif value is None:
        return "ไม่มีข้อมูล"
    else:
        return str(value).replace("\\n", "\n")

def parse_json_to_docs(data, parent_key="", docs=None):
    if docs is None:
        docs = []
    if isinstance(data, dict):
        current_topic = data.get("หัวข้อ", data.get("หัวข้อย่อย", parent_key))
        content_parts = []
        metadata = {"source": parent_key.strip('.')}

        for key, value in data.items():
            current_key = f"{parent_key}{key}" if parent_key else key
            if isinstance(value, (dict, list)) and key not in ["หัวข้อ", "หัวข้อย่อย"]:
                parse_json_to_docs(value, f"{current_key}.", docs)
            elif key not in ["หัวข้อ", "หัวข้อย่อย"]:
                readable_key = key.replace("_", " ").replace("เป้า ", "เป้าหมาย ")
                content_parts.append(f"{readable_key}: {format_value(value)}")

        if content_parts:
            page_content = f"หัวข้อ: {current_topic}\n" + "\n".join(content_parts)
            docs.append(Document(page_content=page_content.strip(), metadata=metadata))
    elif isinstance(data, list) and parent_key:
        page_content = f"หัวข้อ: {parent_key.strip('.')}\n{format_value(data)}"
        metadata = {"source": parent_key.strip('.')}
        docs.append(Document(page_content=page_content.strip(), metadata=metadata))
    return docs

def extract_questions_from_docs(docs):
    questions = []
    for doc in docs:
        lines = doc.page_content.split("\n")
        for line in lines:
            if "?" in line and len(line.strip()) > 10:
                questions.append(line.strip())
    return list(set(questions))

with open(JSON_PATH, "r", encoding="utf-8") as f:
    policy_data = json.load(f)

documents = parse_json_to_docs(policy_data["นโยบายสินเชื่อ_รวม"])
model_kwargs = {'device': 'cpu'}
encode_kwargs = {}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    query_instruction="Represent this query for retrieving relevant documents: "
)

llm = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    openai_api_base=OPENAI_API_BASE,
    model_name=MODEL_NAME,
    temperature=0.5,
    max_tokens=8192,
)

vectorstore = FAISS.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

prompt_template = """
คุณคือผู้ช่วย AI ที่เชี่ยวชาญด้านนโยบายสินเชื่อ กรุณาตอบคำถามต่อไปนี้โดยใช้ข้อมูลที่ให้มาเท่านั้น:

ข้อมูลที่เกี่ยวข้อง (Context):
{context}

คำถาม:
{input}

คำตอบ (เป็นภาษาไทย):
"""

prompt = ChatPromptTemplate.from_template(prompt_template)
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
candidate_questions = extract_questions_from_docs(documents)
qa_results = []

for question in candidate_questions:
    retrieved_docs = retriever.get_relevant_documents(question)
    if len(retrieved_docs) == 0:
        continue
    response = retrieval_chain.invoke({"input": question}, config=RunnableConfig(tags=["batch_run"]))
    if "answer" in response and response["answer"].strip():
        seen_sources = set()
        for doc in response.get("context", []):
            source = doc.metadata.get("source", "ไม่ทราบแหล่งที่มา")
            if source not in seen_sources:
                print(f"- {source}")
                seen_sources.add(source)

        qa_results.append({
            "question": question,
            "answer": response["answer"],
            "sources": list(seen_sources)
        })

with open("qa_results.json", "w", encoding="utf-8") as f:
    json.dump(qa_results, f, ensure_ascii=False, indent=2)

while True:
    user_query = input("คำถามของคุณ: ")
    if user_query.lower() in ["exit", "quit", "ออก"]:
        print("กำลังออกจาก Chatbot...")
        break
    if not user_query.strip():
        continue

    print("กำลังค้นหาคำตอบ...")
    response = retrieval_chain.invoke({"input": user_query})

    print("\nคำตอบ:")
    print(response["answer"])

    print("\nแหล่งข้อมูลที่เกี่ยวข้อง:")
    seen_sources = set()
    for doc in response.get("context", []):
        source = doc.metadata.get("source", "ไม่ทราบแหล่งที่มา")
        if source not in seen_sources:
            print(f"- {source}")
            seen_sources.add(source)
    print("--------------------------------------------------")


KeyboardInterrupt: 