In [None]:

import subprocess
import sys
import streamlit as st

@st.cache_resource
def install_packages(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

packages = [
    "certifi",
    "charset-normalizer",
    "faiss-cpu",
    "idna",
    "numpy",
    "packaging",
    "python-dotenv",
    "requests",
    "urllib3",
    "pyarrow",
    "PyPDF2",
    "python-docx",
    "spacy",
]

if "setup_complete" not in st.session_state:
    progress_placeholder = st.empty()
    progress_bar = progress_placeholder.progress(0, text="Setting up the application...")
    total = len(packages)
    for i, package in enumerate(packages, start=1):
        progress_bar.progress(int((i - 1) / total * 100), text=f"Setting up step {i} of {total}...")
        try:
            install_packages(package)
        except subprocess.CalledProcessError:
            progress_placeholder.empty()
            st.error("Something went wrong while setting things up. Please try again or contact support.")
            break
        progress_bar.progress(int(i / total * 100), text=f"Finished step {i} of {total}")
    else:
        progress_placeholder.empty()
        st.session_state.setup_complete = True
        st.toast("You're all set! Choose 'Upload document' to load an RFP or 'Ask a question' to chat. Use the sidebar to switch interface modes and provide any required API keys.")


In [None]:
import streamlit as st
import os
import tempfile
import json
import re
import io
import contextlib
from pathlib import Path
from typing import List, Optional, Callable

from cli_app import (
    load_input_text,
    extract_questions,
    build_docx,
)
from qa_core import answer_question
from answer_composer import CompletionsClient, get_openai_completion
from input_file_reader.interpreter_sheet import collect_non_empty_cells
from rfp_xlsx_slot_finder import ask_sheet_schema
from rfp_xlsx_apply_answers import write_excel_answers
from rfp_docx_slot_finder import extract_slots_from_docx
from rfp_docx_apply_answers import apply_answers_to_docx
import my_module
from my_module import _classify_intent, _detect_followup, gen_answer
MODEL_DESCRIPTIONS = {
    "gpt-4.1-nano-2025-04-14_research": "Lighter, faster model",
    "o3-2025-04-16_research": "Slower, reasoning model",
}
MODEL_SHORT_NAMES = {
    "gpt-4.1-nano-2025-04-14_research": "4.1",
    "o3-2025-04-16_research": "o3",
}
MODEL_OPTIONS = list(MODEL_DESCRIPTIONS.keys())


def load_fund_tags() -> List[str]:
    path = Path('~/derivs-tool/rfp-ai-tool/structured_extraction/embedding_data.json').expanduser()
    try:
        with path.open('r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception:
        return []
    tags = {t for item in data for t in item.get('metadata', {}).get('tags', [])}
    return sorted(tags)


class OpenAIClient:
    def __init__(self, model: str):
        self.model = model

    def get_completion(self, prompt: str, json_output: bool = False):
        return get_openai_completion(prompt, self.model, json_output=json_output)


def save_uploaded_file(uploaded_file) -> str:
    suffix = Path(uploaded_file.name).suffix
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    tmp.write(uploaded_file.read())
    tmp.flush()
    return tmp.name


def build_generator(
    search_mode: str,
    fund: Optional[str],
    k: int,
    length: Optional[str],
    approx_words: Optional[int],
    min_confidence: float,
    include_citations: bool,
    llm,
    extra_docs: Optional[List[str]] = None,
):
    def gen(question: str, progress: Optional[Callable[[str], None]] = None):
        ans, cmts = answer_question(
            question,
            search_mode,
            fund,
            k,
            length,
            approx_words,
            min_confidence,
            llm,
            extra_docs=extra_docs,
            progress=progress,
        )
        if not include_citations:
            ans = re.sub(r"\[\d+\]", "", ans)
            return ans
        citations = {
            lbl: {"text": snippet, "source_file": src}
            for lbl, src, snippet, score, date in cmts
        }
        return {"text": ans, "citations": citations}

    return gen


def main():
    st.title("RFP Responder")
    st.markdown(
        """
        <style>
        div.block-container{
            max-width: 750px;
            padding-top: 2rem;
        }
        div[data-testid="stChatMessage"]{
            border-radius: 0.5rem;
            padding: 1rem;
            margin-bottom: 1rem;
        }
        div[data-testid="stChatMessage-user"]{
            background-color: #DCF8C6;
        }
        div[data-testid="stChatMessage-assistant"]{
            background-color: #FFFFFF;
        }
        div[data-testid="stChatInput"] textarea{
            border-radius: 0.5rem;
            padding: 0.75rem;
        }
        @keyframes shimmer{
            0%{background-position:-1000px 0;}
            100%{background-position:1000px 0;}
        }
        .shimmer{
            background:linear-gradient(90deg,#d0d0d0 0%,#b0b0b0 50%,#d0d0d0 100%);
            background-size:1000px 100%;
            animation:shimmer 2s infinite linear;
            -webkit-background-clip:text;
            -webkit-text-fill-color:transparent;
        }
        </style>
        """,
        unsafe_allow_html=True,
    )
    view_mode = st.sidebar.radio("Interface mode", ["User", "Developer"], index=0)
    input_mode = st.radio("How would you like to proceed?", ["Upload document", "Ask a question"], index=1, horizontal=True)
    llm_model = MODEL_OPTIONS[0]

    framework_env = os.getenv("ANSWER_FRAMEWORK")
    if framework_env:
        if view_mode == "Developer":
            st.info(f"Using framework from ANSWER_FRAMEWORK: {framework_env}")
        framework = framework_env
    else:
        framework = st.selectbox("Framework", ["aladdin", "openai"], index=0, help="Choose backend for language model.")

    if framework == "aladdin":
        for key, label in [
            ("aladdin_studio_api_key", "Aladdin Studio API key"),
            ("defaultWebServer", "Default Web Server"),
            ("aladdin_user", "Aladdin user"),
            ("aladdin_passwd", "Aladdin password"),
        ]:
            if os.getenv(key):
                if view_mode == "Developer":
                    st.info(f"{key} loaded from environment")
            else:
                val = st.text_input(label, type="password" if "passwd" in key or "api_key" in key else "default")
                if val:
                    os.environ[key] = val
    else:
        if os.getenv("OPENAI_API_KEY"):
            if view_mode == "Developer":
                st.info("OPENAI_API_KEY loaded from environment")
        else:
            api_key = st.text_input("OpenAI API key", type="password", help="API key for OpenAI.")
            if api_key:
                os.environ["OPENAI_API_KEY"] = api_key

    if input_mode == "Upload document":
        uploaded = st.file_uploader(
            "Upload document",
            type=["pdf", "docx", "txt", "xlsx"],
            help="Upload the RFP or question file.",
        )
    else:
        uploaded = None

    if view_mode == "Developer":
        st.info("Search mode fixed to 'both'")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Filter answers for a specific fund or strategy.",
        )
        llm_model = st.selectbox(
            "LLM model",
            MODEL_OPTIONS,
            index=0,
            format_func=lambda m: f"{m} - {MODEL_DESCRIPTIONS[m]}",
            help="Model name for generating answers.",
        )
        k_max_hits = st.number_input("Hits per question", value=20, help="Maximum documents retrieved per question.")
        min_confidence = st.number_input("Min confidence", value=0.0, help="Minimum score for retrieved documents.")
        docx_as_text = st.checkbox("Treat DOCX as text", value=False)
        docx_write_mode = st.selectbox("DOCX write mode", ["fill", "replace", "append"], index=0)
        extra_uploads = st.file_uploader(
            "Additional documents", type=["pdf", "docx", "txt"], accept_multiple_files=True
        )
    else:
        st.markdown("### Settings")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Select fund or strategy context for better answers.",
        )
        k_max_hits = 20
        min_confidence = 0.0
        docx_as_text = False
        docx_write_mode = "fill"
        extra_uploads = None

    with st.expander("More options"):
        if view_mode == "User":
            llm_model = st.selectbox(
                "Model",
                MODEL_OPTIONS,
                index=MODEL_OPTIONS.index(llm_model),
                format_func=lambda m: f"{MODEL_SHORT_NAMES[m]} - {MODEL_DESCRIPTIONS[m]}",
                help="Choose which model generates answers.",
            )
        length_opt = st.selectbox("Answer length", ["auto", "short", "medium", "long"], index=3)
        approx_words = st.text_input("Approx words", value="", help="Approximate words per answer (optional).")
        include_env = os.getenv("RFP_INCLUDE_COMMENTS")
        if include_env is not None:
            include_citations = include_env != "0"
            st.info(f"Using include citations from RFP_INCLUDE_COMMENTS: {include_citations}")
        else:
            include_citations = st.checkbox("Include citations", value=True)
        show_live = st.checkbox("Show questions and answers during processing", value=True)


    if input_mode == "Ask a question":
        llm = CompletionsClient(model=llm_model) if framework == "aladdin" else OpenAIClient(model=llm_model)
        gen = build_generator(
            search_mode,
            fund,
            int(k_max_hits),
            length_opt,
            int(approx_words) if approx_words else None,
            float(min_confidence),
            include_citations,
            llm,
        )
        my_module._llm_client = llm
        if "chat_messages" not in st.session_state:
            st.session_state.chat_messages = []
        if "question_history" not in st.session_state:
            st.session_state.question_history = []
        sidebar = st.sidebar.container()
        sidebar.markdown("### References")
        answer_idx = 0
        for msg in st.session_state.chat_messages:
            with st.chat_message(msg["role"]):
                st.markdown(msg["content"])
                if msg.get("model"):
                    name = MODEL_SHORT_NAMES.get(msg["model"], msg["model"]) if view_mode == "User" else msg["model"]
                    st.caption(f"Model: {name}")
                if view_mode == "Developer" and msg.get("debug"):
                    st.expander("Debug info").markdown(f"```\n{msg['debug']}\n```")
                if msg["role"] == "assistant":
                    answer_idx += 1
                    sidebar.markdown(f"**Answer {answer_idx}**")
                    for lbl, cite in (msg.get("citations") or {}).items():
                        with sidebar.expander(str(lbl)):
                            st.markdown(f"**Document:** {cite.get('source_file', 'Unknown')}**")
                            section = cite.get('section')
                            if section:
                                st.markdown(f"**Section:** {section}**")
                            st.markdown(cite.get('text', ''))
        if prompt := st.chat_input("Ask a question"):
            st.chat_message("user").markdown(prompt)
            st.session_state.chat_messages.append({"role": "user", "content": prompt})
            with st.chat_message("assistant"):
                message_placeholder = st.empty()
                def update_status(msg: str):
                    message_placeholder.markdown(f'<span class="shimmer">{msg}</span>', unsafe_allow_html=True)
                update_status("Thinking...")
                history = st.session_state.get("question_history", [])
                intent = _classify_intent(prompt, history)
                follow = _detect_followup(prompt, history) if intent == "follow_up" else []
                buf = io.StringIO() if view_mode == "Developer" else None
                if buf:
                    with contextlib.redirect_stdout(buf):
                        ans = gen_answer(prompt, progress=update_status) if intent == "follow_up" else gen(prompt, progress=update_status)
                    debug_text = f"Intent: {intent}\nFollow-up indices: {follow}\n" + buf.getvalue()
                else:
                    ans = gen_answer(prompt, progress=update_status) if intent == "follow_up" else gen(prompt, progress=update_status)
                    debug_text = ""
                text = ans.get("text", "") if isinstance(ans, dict) else ans
                citations = ans.get("citations", {}) if isinstance(ans, dict) else {}
                message_placeholder.markdown(text)
                label = MODEL_SHORT_NAMES.get(llm_model, llm_model) if view_mode == "User" else llm_model
                st.caption(f"Model: {label}")
                if view_mode == "Developer":
                    st.expander("Debug info").markdown(f"```\n{debug_text}\n```")
                if intent != "follow_up":
                    my_module.QUESTION_HISTORY.append(prompt)
                    my_module.QA_HISTORY.append({"question": prompt, "answer": text, "citations": []})
            msg = {"role": "assistant", "content": text, "citations": citations, "model": llm_model}
            if view_mode == "Developer":
                msg["debug"] = debug_text
            st.session_state.chat_messages.append(msg)
            history = st.session_state.get("question_history", [])
            history.append(prompt)
            st.session_state.question_history = history
            st.rerun()
    else:
        run_clicked = st.button("Run")
        if run_clicked and uploaded is not None and fund:
            phase_placeholder = st.empty()
            sub_placeholder = st.empty()
            dev_placeholder = st.empty()
            dev_logs = []
            state = {"step": 0, "phase": None}
            suffix = Path(uploaded.name).suffix.lower()
            base_steps = 1
            if suffix in (".xlsx", ".xls"):
                branch_steps = 4
            elif suffix == ".docx" and not docx_as_text:
                branch_steps = 3
            else:
                branch_steps = 3
            total_steps = base_steps + branch_steps + 1
            step_bar = st.progress(0)
            def log_step(dev_msg, user_msg=None):
                if user_msg and user_msg != state["phase"]:
                    state["phase"] = user_msg
                    phase_placeholder.markdown(f"**{state['phase']}**")
                    sub_placeholder.empty()
                sub_placeholder.markdown(dev_msg)
                if view_mode == "Developer":
                    dev_logs.append(f"{state['phase']}: {dev_msg}")
                    dev_placeholder.markdown("\n".join(f"{i+1}. {m}" for i, m in enumerate(dev_logs)))
                state["step"] += 1
                step_bar.progress(state["step"] / total_steps, text=state["phase"])
            log_step("Saving uploaded file", "Preparing document...")
            input_path = save_uploaded_file(uploaded)
            extra_docs = [save_uploaded_file(f) for f in extra_uploads] if extra_uploads else None
            llm = CompletionsClient(model=llm_model) if framework == "aladdin" else OpenAIClient(model=llm_model)
            if suffix in (".xlsx", ".xls"):
                log_step("Collecting non-empty cells", "Reading workbook...")
                cells = collect_non_empty_cells(input_path)
                log_step("Inferring sheet schema", "Analyzing workbook structure...")
                schema = ask_sheet_schema(input_path)
                log_step("Building answer generator", "Preparing answer generator...")
                gen = build_generator(
                    search_mode,
                    fund,
                    int(k_max_hits),
                    length_opt,
                    int(approx_words) if approx_words else None,
                    float(min_confidence),
                    include_citations,
                    llm,
                    extra_docs,
                )
                log_step("Starting question-answering", "Generating answers...")
                answers = []
                total_qs = len(schema)
                progress = st.progress(0)
                qa_box = st.container() if show_live else None
                for i, entry in enumerate(schema, 1):
                    question = (entry.get("question_text") or "").strip()
                    if show_live and question:
                        qa_box.markdown(f"**Q{i}:** {question}")
                    ans = gen(question)
                    answers.append(ans)
                    progress.progress(i / total_qs, text=f"{i}/{total_qs}")
                    if show_live:
                        text = ans.get('text', '') if isinstance(ans, dict) else ans
                        qa_box.markdown(f"**A{i}:** {text}")
                out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
                write_excel_answers(
                    schema,
                    answers,
                    input_path,
                    out_tmp.name,
                    include_comments=include_citations,
                )
                with open(out_tmp.name, "rb") as f:
                    st.download_button(
                        "Download answered workbook",
                        f,
                        file_name=Path(uploaded.name).stem + "_answered.xlsx",
                    )
                if include_citations:
                    base, _ = os.path.splitext(out_tmp.name)
                    comments_path = base + "_comments.docx"
                    if os.path.exists(comments_path):
                        with open(comments_path, "rb") as f:
                            st.download_button(
                                "Download comments DOCX",
                                f,
                                file_name=Path(uploaded.name).stem + "_comments.docx",
                            )
            elif suffix == ".docx" and not docx_as_text:
                log_step("Extracting slots from DOCX", "Analyzing document...")
                slots_payload = extract_slots_from_docx(input_path)
                slots_tmp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False, suffix=".json")
                json.dump(slots_payload, slots_tmp)
                slots_tmp.flush()
                log_step("Building answer generator", "Preparing answer generator...")
                gen = build_generator(
                    search_mode,
                    fund,
                    int(k_max_hits),
                    length_opt,
                    int(approx_words) if approx_words else None,
                    float(min_confidence),
                    include_citations,
                    llm,
                    extra_docs,
                )
                log_step("Starting question-answering", "Generating answers...")
                answers_dict = {}
                slot_list = slots_payload.get('slots', [])
                progress = st.progress(0)
                qa_box = st.container() if show_live else None
                for i, slot in enumerate(slot_list, 1):
                    question = (slot.get("question_text") or "").strip()
                    if show_live and question:
                        qa_box.markdown(f"**Q{i}:** {question}")
                    ans = gen(question)
                    answers_dict[slot.get('id', f'slot_{i}')] = ans
                    progress.progress(i / len(slot_list), text=f"{i}/{len(slot_list)}")
                    if show_live:
                        text = ans.get('text', '') if isinstance(ans, dict) else ans
                        qa_box.markdown(f"**A{i}:** {text}")
                answers_tmp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False, suffix=".json")
                json.dump({'by_id': answers_dict}, answers_tmp)
                answers_tmp.flush()
                out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
                apply_answers_to_docx(
                    docx_path=input_path,
                    slots_json_path=slots_tmp.name,
                    answers_json_path=answers_tmp.name,
                    out_path=out_tmp.name,
                    mode=docx_write_mode,
                    generator=None,
                    gen_name="streamlit_app:rag_gen",
                )
                with open(out_tmp.name, "rb") as f:
                    st.download_button(
                        "Download answered DOCX",
                        f,
                        file_name=Path(uploaded.name).stem + "_answered.docx",
                    )
            else:
                log_step("Loading input text", "Reading document...")
                raw = load_input_text(input_path)
                log_step("Extracting questions", "Finding questions...")
                questions = extract_questions(raw, llm)
                log_step("Starting question-answering", "Generating answers...")
                answers = []
                comments = []
                total_qs = len(questions)
                progress = st.progress(0)
                qa_box = st.container() if show_live else None
                for i, q in enumerate(questions, 1):
                    if show_live and q:
                        qa_box.markdown(f"**Q{i}:** {q}")
                    ans, cmts = answer_question(
                        q,
                        search_mode,
                        fund,
                        int(k_max_hits),
                        length_opt,
                        int(approx_words) if approx_words else None,
                        float(min_confidence),
                        llm,
                    )
                    if not include_citations:
                        ans = re.sub(r"\[\d+\]", "", ans)
                        cmts = []
                    answers.append(ans)
                    comments.append(cmts)
                    progress.progress(i / total_qs, text=f"{i}/{total_qs}")
                    if show_live:
                        qa_box.markdown(f"**A{i}:** {ans}")
                qa_doc = build_docx(
                    questions,
                    answers,
                    comments,
                    include_comments=include_citations,
                )
                out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
                out_tmp.write(qa_doc)
                out_tmp.flush()
                with open(out_tmp.name, "rb") as f:
                    st.download_button(
                        "Download Q/A report",
                        f,
                        file_name=Path(uploaded.name).stem + "_answered.docx",
                    )
            step_bar.progress(1.0, text="Done")
        elif run_clicked and not fund:
            st.warning("Please select a fund or strategy before running.")
        elif run_clicked:
            st.warning("Please upload a document before running.")
    
    
if __name__ == "__main__":
    main()