In [1]:
import gradio as gr
import pandas as pd
import re
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

# 1. CSV 로드
df = pd.read_csv("techreader_data/content_based_questions_with_answers.csv")

# 2. FAQ 인덱스 생성
embeddings = OpenAIEmbeddings()
faq_docs = [
    Document(page_content=row["Question"], metadata=row.to_dict())
    for _, row in df.iterrows()
]
faq_db = FAISS.from_documents(faq_docs, embeddings)
faq_retriever = faq_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# 3. '답변:' 접두사 제거 함수
def clean_answer(text: str) -> str:
    if text.strip().startswith("답변"):
        return text.split(":", 1)[-1].strip()
    return text.strip()

# 4. 질문 후처리 함수 (**텍스트** 제거, > 제거)
def clean_question(q: str) -> str:
    # 1) **텍스트** → 텍스트
    q = re.sub(r"\*\*(.*?)\*\*", r"\1", q)
    # 2) ** 텍스트 → 텍스트  (앞에 ** + 공백 제거)
    q = re.sub(r"\*\*\s*", "", q)
    # 3) > 텍스트 → 텍스트
    q = re.sub(r"^\s*>\s*", "", q)
    return q.strip()



# 5. 카드 스타일 FAQ 템플릿
def format_faq_card(q, a, h1="", h2=""):
    return f"""
    <div style="margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;">
      <!-- 질문 영역 -->
      <details>
        <summary style="padding:12px; background:#f5f5f5; cursor:pointer; display:flex; align-items:center;">
          <div style="background:#1976d2; color:white; font-weight:bold;
                      border-radius:50%; width:32px; height:32px;
                      display:flex; align-items:center; justify-content:center;
                      margin-right:10px; font-size:16px; line-height:32px;">Q</div>
          <span style="font-weight:bold; font-size:16px;">{q}</span>
        </summary>
        
        <!-- 답변 영역 -->
        <div style="padding:15px; background:white; font-size:15px; line-height:1.6;">
          {a}
        </div>
      </details>
    </div>
    """

# 6. FAQ 검색
def search_faq(user_query):
    results = faq_retriever.get_relevant_documents(user_query)
    outputs = []
    for r in results:
        q = clean_question(r.page_content)         # ✅ 질문 후처리 적용
        a = clean_answer(r.metadata["Answer"])     # ✅ 답변 후처리 적용
        h1 = r.metadata.get("Header 1", "")
        h2 = r.metadata.get("Header 2", "")
        outputs.append(format_faq_card(q, a, h1, h2))
    return "".join(outputs)

# 7. FAQ 전체 보기
def show_faq():
    grouped = df.groupby("Header 1")
    html_blocks = []
    for h1, group in grouped:
        html_blocks.append(f"<h2 style='color:#1976d2; margin-top:40px;'>📘 {h1}</h2>")
        sub_group = group.groupby("Header 2")
        for h2, rows in sub_group:
            if h2 and h2 != "nan":
                html_blocks.append(f"<h3 style='color:#444; margin-top:20px;'>📌 {h2}</h3>")
            for _, row in rows.iterrows():
                q = clean_question(row["Question"])  # ✅ 질문 후처리 적용
                a = clean_answer(row["Answer"])      # ✅ 답변 후처리 적용
                html_blocks.append(format_faq_card(q, a, h1, h2))
    return "".join(html_blocks)

# 8. Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📘 Tech Library FAQ 뷰어")

    with gr.Tab("FAQ 검색"):
        query = gr.Textbox(label="질문을 입력하세요")
        output = gr.HTML()
        query.submit(search_faq, query, output)

    with gr.Tab("FAQ 전체 보기"):
        faq_output = gr.HTML(show_faq())

demo.launch()


* Running on local URL:  http://127.0.0.1:7881
* To create a public link, set `share=True` in `launch()`.




  results = faq_retriever.get_relevant_documents(user_query)


In [None]:
# TechReader_gayoon\techreader_data\header_based_FAQ2_with_paraphrases.csv 
# TechReader_gayoon\techreader_data\content_based_FAQ2_with_paraphrases.csv

In [3]:
import gradio as gr
import pandas as pd
import re
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

# -----------------------------
# 1. CSV 로드 (두 파일 합치기)
# -----------------------------
df_header = pd.read_csv("techreader_data/header_based_FAQ2_with_paraphrases.csv")
df_content = pd.read_csv("techreader_data/content_based_FAQ2_with_paraphrases.csv")

# 하나로 합치기 (구분 컬럼 추가)
df_header["Source"] = "Header-based"
df_content["Source"] = "Content-based"
df = pd.concat([df_header, df_content], ignore_index=True)

# -----------------------------
# 2. FAQ 인덱스 생성
# -----------------------------
embeddings = OpenAIEmbeddings()
faq_docs = [
    Document(page_content=row["Question"], metadata=row.to_dict())
    for _, row in df.iterrows()
]
faq_db = FAISS.from_documents(faq_docs, embeddings)
faq_retriever = faq_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# -----------------------------
# 3. 후처리 함수
# -----------------------------
def clean_answer(text: str) -> str:
    """'답변:' 접두사 제거"""
    if isinstance(text, str) and text.strip().startswith("답변"):
        return text.split(":", 1)[-1].strip()
    return str(text).strip()

def clean_question(q: str) -> str:
    """질문에서 **텍스트** 패턴, > 기호 제거"""
    if not isinstance(q, str):
        return ""
    q = re.sub(r"\*\*(.*?)\*\*", r"\1", q)   # **텍스트** → 텍스트
    q = re.sub(r"\*\*\s*", "", q)            # ** 텍스트 → 텍스트
    q = re.sub(r"^\s*>\s*", "", q)           # > 텍스트 → 텍스트
    return q.strip()

# -----------------------------
# 4. 카드 스타일 FAQ 템플릿
# -----------------------------
def format_faq_card(q, a, h1="", h2="", source=""):
    return f"""
    <div style="margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;">
      <details>
        <summary style="padding:12px; background:#f5f5f5; cursor:pointer; display:flex; align-items:center;">
          <div style="background:#1976d2; color:white; font-weight:bold;
                      border-radius:50%; width:32px; height:32px;
                      display:flex; align-items:center; justify-content:center;
                      margin-right:10px; font-size:16px; line-height:32px;">Q</div>
          <span style="font-weight:bold; font-size:16px;">{q}</span>
        </summary>
        <div style="padding:15px; background:white; font-size:15px; line-height:1.6;">
          {a}
          <br><br><span style="color:#666; font-size:13px;">출처: {h1} > {h2} ({source})</span>
        </div>
      </details>
    </div>
    """

# -----------------------------
# 5. FAQ 검색
# -----------------------------
def search_faq(user_query):
    results = faq_retriever.get_relevant_documents(user_query)
    outputs = []
    for r in results:
        q = clean_question(r.page_content)
        a = clean_answer(r.metadata.get("Answer", ""))
        h1 = r.metadata.get("Header 1", "")
        h2 = r.metadata.get("Header 2", "")
        source = r.metadata.get("Source", "")
        outputs.append(format_faq_card(q, a, h1, h2, source))
    return "".join(outputs)

# -----------------------------
# 6. FAQ 전체 보기
# -----------------------------
def show_faq():
    grouped = df.groupby("Header 1")
    html_blocks = []
    for h1, group in grouped:
        html_blocks.append(f"<h2 style='color:#1976d2; margin-top:40px;'>📘 {h1}</h2>")
        sub_group = group.groupby("Header 2")
        for h2, rows in sub_group:
            if h2 and h2 != "nan":
                html_blocks.append(f"<h3 style='color:#444; margin-top:20px;'>📌 {h2}</h3>")
            for _, row in rows.iterrows():
                q = clean_question(row["Question"])
                a = clean_answer(row["Answer"])
                source = row.get("Source", "")
                html_blocks.append(format_faq_card(q, a, h1, h2, source))
    return "".join(html_blocks)

# -----------------------------
# 7. Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📘 Tech Library FAQ 뷰어")

    with gr.Tab("FAQ 검색"):
        query = gr.Textbox(label="질문을 입력하세요")
        output = gr.HTML()
        query.submit(search_faq, query, output)

    with gr.Tab("FAQ 전체 보기"):
        faq_output = gr.HTML(show_faq())

demo.launch()


* Running on local URL:  http://127.0.0.1:7882
* To create a public link, set `share=True` in `launch()`.




# Gradio : FAQ + 일반 검색 통합 뷰어 

In [1]:
import gradio as gr
import pandas as pd
import re
import os
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

# -----------------------------
# Gemini 초기화 (문서 답변용)
# -----------------------------
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
model = genai.GenerativeModel("gemini-2.5-pro")

# -----------------------------
# 1. CSV 로드 (FAQ: header+content)
# -----------------------------
df_header = pd.read_csv("techreader_data/header_based_FAQ2_with_paraphrases.csv")
df_content = pd.read_csv("techreader_data/content_based_FAQ2_with_paraphrases.csv")
df_header["Source"] = "Header-based"
df_content["Source"] = "Content-based"
df_faq = pd.concat([df_header, df_content], ignore_index=True)

# -----------------------------
# 2. FAQ 인덱스 생성
# -----------------------------
embeddings = OpenAIEmbeddings()
faq_docs = [
    Document(page_content=row["Question"], metadata=row.to_dict())
    for _, row in df_faq.iterrows()
]
faq_db = FAISS.from_documents(faq_docs, embeddings)
faq_retriever = faq_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# -----------------------------
# 3. Chunk 인덱스 생성 (원문 검색용)
# -----------------------------
chunks_df = pd.read_csv("techreader_data/chunks_output.csv")

chunk_docs = [
    Document(
        page_content=row["Content"],
        metadata={"Chunk No": row["Chunk No"], **eval(row["Metadata"])}
    )
    for _, row in chunks_df.iterrows()
]
chunk_db = FAISS.from_documents(chunk_docs, embeddings)
chunk_retriever = chunk_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# -----------------------------
# 4. 후처리 함수
# -----------------------------
def clean_answer(text: str) -> str:
    if isinstance(text, str) and text.strip().startswith("답변"):
        return text.split(":", 1)[-1].strip()
    return str(text).strip()

def clean_question(q: str) -> str:
    if not isinstance(q, str):
        return ""
    q = re.sub(r"\*\*(.*?)\*\*", r"\1", q)   # **텍스트** → 텍스트
    q = re.sub(r"\*\*\s*", "", q)            # ** 텍스트 → 텍스트
    q = re.sub(r"^\s*>\s*", "", q)           # > 텍스트 → 텍스트
    return q.strip()

# -----------------------------
# 5. 카드 스타일 FAQ 템플릿
# -----------------------------
def format_faq_card(q, a, h1="", h2="", source=""):
    return f"""
    <div style="margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;">
      <details>
        <summary style="padding:12px; background:#f5f5f5; cursor:pointer; display:flex; align-items:center;">
          <div style="background:#1976d2; color:white; font-weight:bold;
                      border-radius:50%; width:32px; height:32px;
                      display:flex; align-items:center; justify-content:center;
                      margin-right:10px; font-size:16px; line-height:32px;">Q</div>
          <span style="font-weight:bold; font-size:16px;">{q}</span>
        </summary>
        <div style="padding:15px; background:white; font-size:15px; line-height:1.6;">
          {a}
          <br><br><span style="color:#666; font-size:13px;">출처: {h1} > {h2} ({source})</span>
        </div>
      </details>
    </div>
    """

# -----------------------------
# 6. FAQ 검색
# -----------------------------
def search_faq(user_query):
    results = faq_retriever.get_relevant_documents(user_query)
    outputs = []
    for r in results:
        q = clean_question(r.page_content)
        a = clean_answer(r.metadata.get("Answer", ""))
        h1 = r.metadata.get("Header 1", "")
        h2 = r.metadata.get("Header 2", "")
        source = r.metadata.get("Source", "")
        outputs.append(format_faq_card(q, a, h1, h2, source))
    return "".join(outputs)

# -----------------------------
# 7. FAQ 전체 보기
# -----------------------------
def show_faq():
    grouped = df_faq.groupby("Header 1")
    html_blocks = []
    for h1, group in grouped:
        html_blocks.append(f"<h2 style='color:#1976d2; margin-top:40px;'>📘 {h1}</h2>")
        sub_group = group.groupby("Header 2")
        for h2, rows in sub_group:
            if h2 and h2 != "nan":
                html_blocks.append(f"<h3 style='color:#444; margin-top:20px;'>📌 {h2}</h3>")
            for _, row in rows.iterrows():
                q = clean_question(row["Question"])
                a = clean_answer(row["Answer"])
                source = row.get("Source", "")
                html_blocks.append(format_faq_card(q, a, h1, h2, source))
    return "".join(html_blocks)

# -----------------------------
# 8. 일반 검색 (Chunk 기반, LLM 다듬기)
# -----------------------------
def chunk_answer(query, retrieved_docs):
    docs_text = "\n\n".join(
        [
            f"[Chunk {doc.metadata.get('Chunk No')}] "
            f"(Header1: {doc.metadata.get('Header 1','')}, "
            f"Header2: {doc.metadata.get('Header 2','')}, "
            f"Header3: {doc.metadata.get('Header 3','')})\n"
            f"{doc.page_content}"
            for doc in retrieved_docs
        ]
    )

    prompt = f"""
    너는 연구 보고서를 요약하는 LLM 어시스턴트다. 
    아래 문서 조각들만 참고해서 질문에 대한 답변을 작성하라. 

    [요구사항]
    - 문서 내용만 활용할 것 (새로운 사실 생성 금지).
    - 답변은 3~4문단, 600~800자 내외로 정리.
    - 결론 문단은 반드시 '따라서, ~이다.' 또는 '결론적으로, ~라고 할 수 있다.' 형태로 마무리.
    - 답변 후 반드시 "출처" 섹션을 추가하여 사용한 Header1/2/3와 Chunk No를 나열할 것.
    - 출력은 반드시 HTML 카드 형태로 (FAQ 카드 스타일) 반환.

    질문: {query}

    문서 조각:
    {docs_text}
    """

    try:
        response = model.generate_content(
            prompt,
            generation_config={"max_output_tokens": 9000, "temperature": 0.5}
        )

        if response.candidates and response.candidates[0].content.parts:
            answer = "".join(
                [p.text for p in response.candidates[0].content.parts if hasattr(p, "text")]
            ).strip()
        else:
            answer = "[⚠️ 답변 없음: 모델이 응답을 생성하지 않음]"

    except Exception as e:
        answer = f"[⚠️ 오류 발생: {e}]" 
        
    # ✅ 후처리: 코드 블록 마커 제거
    answer = answer.replace("```html", "").replace("```", "").strip()

    # ✅ 줄바꿈을 HTML <br>로 치환 (f-string 밖에서)
    answer_html = answer.replace("\n", "<br>")

    # 📌 카드 UI 스타일 감싸기
    return (
        "<div style='margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;'>"
        "<div style='padding:12px; background:#f5f5f5; font-weight:bold; font-size:16px;'>📑 문서 기반 응답</div>"
        f"<div style='padding:15px; background:white; font-size:15px; line-height:1.6;'>{answer_html}</div>"
        "</div>"
    )


# -----------------------------
# Hybrid Search 함수 정의 (UI보다 위에 있어야 함)
# -----------------------------
FAQ_THRESHOLD = 0.7  # 신뢰도 기준값

def hybrid_search(query, faq_retriever, chunk_retriever):
    # FAQ 검색
    faq_results = []
    try:
        faq_results = faq_retriever.vectorstore.similarity_search_with_score(query, k=3)
    except Exception:
        faq_results = [(doc, 1.0) for doc in faq_retriever.get_relevant_documents(query)[:3]]

    faq_outputs = []
    for doc, score in faq_results:
        if score >= FAQ_THRESHOLD:
            faq_outputs.append(format_faq_answer(doc))

    # Chunk 검색 (항상 메인)
    chunk_results = chunk_retriever.get_relevant_documents(query)
    chunk_output = chunk_answer(query, chunk_results) if chunk_results else "⚠️ Chunk 기반 결과 없음"


    # 합치기
    if faq_outputs:
        faq_block = "\n\n".join(faq_outputs)
        return f"{chunk_output}\n\n---\n\n{faq_block}"
    else:
        return chunk_output

# -----------------------------
# 9. Gradio UI 
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📘 Tech Library 검색 뷰어")

    with gr.Tab("일반 검색 (문서 기반)"):
        query2 = gr.Textbox(label="일반 질문을 입력하세요")
        output2 = gr.HTML()
        query2.submit(lambda q: hybrid_search(q, faq_retriever, chunk_retriever), query2, output2)

    with gr.Tab("FAQ 검색"):
        query = gr.Textbox(label="FAQ 질문을 입력하세요")
        output = gr.HTML()
        query.submit(search_faq, query, output)

    with gr.Tab("FAQ 전체 보기"):
        faq_output = gr.HTML(show_faq())

demo.launch() 


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




  chunk_results = chunk_retriever.get_relevant_documents(query)


In [3]:
import gradio as gr
import pandas as pd
import re
import os
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document

# -----------------------------
# Gemini 초기화 (문서 답변용)
# -----------------------------
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
model = genai.GenerativeModel("gemini-2.5-pro")

# -----------------------------
# 1. CSV 로드 (FAQ: header+content)
# -----------------------------
df_header = pd.read_csv("techreader_data/header_based_FAQ2_with_paraphrases.csv")
df_content = pd.read_csv("techreader_data/content_based_FAQ2_with_paraphrases.csv")
df_header["Source"] = "Header-based"
df_content["Source"] = "Content-based"
df_faq = pd.concat([df_header, df_content], ignore_index=True)

# -----------------------------
# 2. FAQ 인덱스 생성
# -----------------------------
embeddings = OpenAIEmbeddings()
faq_docs = [
    Document(page_content=row["Question"], metadata=row.to_dict())
    for _, row in df_faq.iterrows()
]
faq_db = FAISS.from_documents(faq_docs, embeddings)
faq_retriever = faq_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# -----------------------------
# 3. Chunk 인덱스 생성 (원문 검색용)
# -----------------------------
chunks_df = pd.read_csv("techreader_data/chunks_output.csv")

chunk_docs = [
    Document(
        page_content=row["Content"],
        metadata={"Chunk No": row["Chunk No"], **eval(row["Metadata"])}
    )
    for _, row in chunks_df.iterrows()
]
chunk_db = FAISS.from_documents(chunk_docs, embeddings)
chunk_retriever = chunk_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# -----------------------------
# 4. 후처리 함수
# -----------------------------
def clean_answer(text: str) -> str:
    if isinstance(text, str) and text.strip().startswith("답변"):
        return text.split(":", 1)[-1].strip()
    return str(text).strip()

def clean_question(q: str) -> str:
    if not isinstance(q, str):
        return ""
    q = re.sub(r"\*\*(.*?)\*\*", r"\1", q)   # **텍스트** → 텍스트
    q = re.sub(r"\*\*\s*", "", q)            # ** 텍스트 → 텍스트
    q = re.sub(r"^\s*>\s*", "", q)           # > 텍스트 → 텍스트
    return q.strip()

# -----------------------------
# 5. 카드 스타일 FAQ 템플릿
# -----------------------------
def format_faq_card(q, a, h1="", h2="", source=""):
    return f"""
    <div style="margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;">
      <details>
        <summary style="padding:12px; background:#f5f5f5; cursor:pointer; display:flex; align-items:center;">
          <div style="background:#1976d2; color:white; font-weight:bold;
                      border-radius:50%; width:32px; height:32px;
                      display:flex; align-items:center; justify-content:center;
                      margin-right:10px; font-size:16px; line-height:32px;">Q</div>
          <span style="font-weight:bold; font-size:16px;">{q}</span>
        </summary>
        <div style="padding:15px; background:white; font-size:15px; line-height:1.6;">
          {a}
          <br><br><span style="color:#666; font-size:13px;">출처: {h1} > {h2} ({source})</span>
        </div>
      </details>
    </div>
    """

# -----------------------------
# 6. FAQ 검색
# -----------------------------
def search_faq(user_query):
    results = faq_retriever.get_relevant_documents(user_query)
    outputs = []
    for r in results:
        q = clean_question(r.page_content)
        a = clean_answer(r.metadata.get("Answer", ""))
        h1 = r.metadata.get("Header 1", "")
        h2 = r.metadata.get("Header 2", "")
        source = r.metadata.get("Source", "")
        outputs.append(format_faq_card(q, a, h1, h2, source))
    return "".join(outputs)

# -----------------------------
# 7. FAQ 전체 보기
# -----------------------------
def show_faq():
    grouped = df_faq.groupby("Header 1")
    html_blocks = []
    for h1, group in grouped:
        html_blocks.append(f"<h2 style='color:#1976d2; margin-top:40px;'>📘 {h1}</h2>")
        sub_group = group.groupby("Header 2")
        for h2, rows in sub_group:
            if h2 and h2 != "nan":
                html_blocks.append(f"<h3 style='color:#444; margin-top:20px;'>📌 {h2}</h3>")
            for _, row in rows.iterrows():
                q = clean_question(row["Question"])
                a = clean_answer(row["Answer"])
                source = row.get("Source", "")
                html_blocks.append(format_faq_card(q, a, h1, h2, source))
    return "".join(html_blocks)

# -----------------------------
# 8. 일반 검색 (Chunk 기반, LLM 다듬기)
# -----------------------------
def chunk_answer(query, retrieved_docs):
    docs_text = "\n\n".join(
        [
            f"[Chunk {doc.metadata.get('Chunk No')}] "
            f"(Header1: {doc.metadata.get('Header 1','')}, "
            f"Header2: {doc.metadata.get('Header 2','')}, "
            f"Header3: {doc.metadata.get('Header 3','')})\n"
            f"{doc.page_content}"
            for doc in retrieved_docs
        ]
    )

    prompt = f"""
    너는 연구 보고서를 요약하는 LLM 어시스턴트다. 
    아래 문서 조각들만 참고해서 질문에 대한 답변을 작성하라. 

    [요구사항]
    - 문서 내용만 활용할 것 (새로운 사실 생성 금지).
    - 답변은 3~4문단, 600~800자 내외로 정리.
    - 결론 문단은 반드시 '따라서, ~이다.' 또는 '결론적으로, ~라고 할 수 있다.' 형태로 마무리.
    - 답변 후 반드시 "출처" 섹션을 추가하여 사용한 Header1/2/3와 Chunk No를 나열할 것.
    - 출력은 반드시 HTML 카드 형태로 (FAQ 카드 스타일) 반환.

    질문: {query}

    문서 조각:
    {docs_text}
    """

    try:
        response = model.generate_content(
            prompt,
            generation_config={"max_output_tokens": 9000, "temperature": 0.5}
        )

        if response.candidates and response.candidates[0].content.parts:
            answer = "".join(
                [p.text for p in response.candidates[0].content.parts if hasattr(p, "text")]
            ).strip()
        else:
            answer = "[⚠️ 답변 없음: 모델이 응답을 생성하지 않음]"

    except Exception as e:
        answer = f"[⚠️ 오류 발생: {e}]" 
        
    # ✅ 후처리: 코드 블록 마커 제거
    answer = answer.replace("```html", "").replace("```", "").strip()

    # ✅ 줄바꿈을 HTML <br>로 치환 (f-string 밖에서)
    answer_html = answer.replace("\n", "<br>")

    # 📌 카드 UI 스타일 감싸기
    return (
        "<div style='margin:15px 10px; border:1px solid #ddd; border-radius:10px; overflow:hidden;'>"
        "<div style='padding:12px; background:#f5f5f5; font-weight:bold; font-size:16px;'>📑 문서 기반 응답</div>"
        f"<div style='padding:15px; background:white; font-size:15px; line-height:1.6;'>{answer_html}</div>"
        "</div>"
    )


# -----------------------------
# Hybrid Search 함수 정의 (UI보다 위에 있어야 함)
# -----------------------------
FAQ_THRESHOLD = 0.7  # 신뢰도 기준값

def hybrid_search(query, faq_retriever, chunk_retriever):
    # FAQ 검색
    faq_results = []
    try:
        faq_results = faq_retriever.vectorstore.similarity_search_with_score(query, k=3)
    except Exception:
        faq_results = [(doc, 1.0) for doc in faq_retriever.get_relevant_documents(query)[:3]]

    faq_outputs = []
    for doc, score in faq_results:
        if score >= FAQ_THRESHOLD:
            faq_outputs.append(format_faq_answer(doc))

    # Chunk 검색 (항상 메인)
    chunk_results = chunk_retriever.get_relevant_documents(query)
    chunk_output = chunk_answer(query, chunk_results) if chunk_results else "⚠️ Chunk 기반 결과 없음"


    # 합치기
    if faq_outputs:
        faq_block = "\n\n".join(faq_outputs)
        return f"{chunk_output}\n\n---\n\n{faq_block}"
    else:
        return chunk_output
    
# ✅ 지역 검색기 함수 생성기
def local_search_factory(rows):
    def local_search(query):
        docs = [
            Document(page_content=row["Content"], metadata=eval(row["Metadata"]))
            for _, row in rows.iterrows()
        ]
        local_db = FAISS.from_documents(docs, embeddings)
        retriever = local_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
        results = retriever.get_relevant_documents(query)
        return chunk_answer(query, results)
    return local_search
# -----------------------------
# 9. Gradio UI 
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 📘 Tech Library 검색 뷰어")

    with gr.Tab("일반 검색 (문서 기반)"):
        gr.Markdown("### 🔎 전체 문서 검색")
        global_query = gr.Textbox(label="전체 문서에서 질문하기")
        global_output = gr.HTML()
        global_query.submit(lambda q: hybrid_search(q, faq_retriever, chunk_retriever),
                            global_query, global_output)

        gr.Markdown("### 📘 목차별 검색")
        grouped = chunks_df.groupby("Metadata")
        
        for idx, (meta, rows) in enumerate(grouped):
            header1 = eval(meta).get("Header 1", "Unknown")
            header2 = eval(meta).get("Header 2", "")
            
            with gr.Accordion(f"{header1} > {header2}", open=False):
                local_query = gr.Textbox(label=f"{header1} - {header2} 내 검색")
                local_output = gr.HTML()
                search_fn = local_search_factory(rows)   # ✅ rows를 클로저로 캡처
                local_query.submit(search_fn, local_query, local_output)


    with gr.Tab("FAQ 검색"):
        query = gr.Textbox(label="FAQ 질문을 입력하세요")
        output = gr.HTML()
        query.submit(search_faq, query, output)

    with gr.Tab("FAQ 전체 보기"):
        faq_output = gr.HTML(show_faq())

demo.launch() 


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


