In [None]:
import subprocess
import sys
import streamlit as st

@st.cache_resource
def install_packages():
    packages = [
        "certifi",
        "charset-normalizer",
        "faiss-cpu",
        "idna",
        "numpy",
        "packaging",
        "python-dotenv",
        "requests",
        "urllib3",
        "pyarrow",
        "PyPDF2",
        "python-docx",
        "spacy"
    ]

    progress_bar = st.progress(0, text="Setting up the application...")
    total = len(packages)
    for i, package in enumerate(packages, start=1):
        progress_bar.progress(int((i - 1) / total * 100), text=f"Setting up step {i} of {total}...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        except subprocess.CalledProcessError:
            st.error("Something went wrong while setting things up. Please try again or contact support.")
            break
        progress_bar.progress(int(i / total * 100), text=f"Finished step {i} of {total}")
    progress_bar.progress(100, text="Setup complete")
    st.success("You're all set!")
    return True

install_packages()


In [None]:
import streamlit as st
import os
import tempfile
import json
import re
import builtins
from datetime import datetime
from openpyxl import Workbook, load_workbook
from pathlib import Path
from typing import List, Optional

from cli_app import (
    load_input_text,
    extract_questions,
    build_docx,
)
from qa_core import answer_question
from answer_composer import CompletionsClient, get_openai_completion
from input_file_reader.interpreter_sheet import collect_non_empty_cells
from rfp_xlsx_slot_finder import ask_sheet_schema
from rfp_xlsx_apply_answers import write_excel_answers
from rfp_docx_slot_finder import extract_slots_from_docx
from rfp_docx_apply_answers import apply_answers_to_docx
MODEL_OPTIONS = [
    "gpt-4.1-nano-2025-04-14_research",
    "o3-2025-04-16_research",
]
FEEDBACK_FILE = "feedback.xlsx"


def load_fund_tags() -> List[str]:
    path = Path('~/derivs-tool/rfp-ai-tool/structured_extraction/embedding_data.json').expanduser()
    try:
        with path.open('r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception:
        return []
    tags = {t for item in data for t in item.get('metadata', {}).get('tags', [])}
    return sorted(tags)


class OpenAIClient:
    def __init__(self, model: str):
        self.model = model

    def get_completion(self, prompt: str, json_output: bool = False):
        return get_openai_completion(prompt, self.model, json_output=json_output)


def save_uploaded_file(uploaded_file) -> str:
    suffix = Path(uploaded_file.name).suffix
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    tmp.write(uploaded_file.read())
    tmp.flush()
    return tmp.name

def save_feedback(rating, reason, other, dev_logs, log_text):
    entry = [
        datetime.utcnow().isoformat(),
        rating,
        reason,
        other,
        "
".join(dev_logs),
        log_text,
    ]
    if os.path.exists(FEEDBACK_FILE):
        wb = load_workbook(FEEDBACK_FILE)
        ws = wb.active
    else:
        wb = Workbook()
        ws = wb.active
        ws.append(["timestamp", "rating", "reason", "other", "dev_logs", "log_text"])
    ws.append(entry)
    wb.save(FEEDBACK_FILE)


def build_generator(
    search_mode: str,
    fund: Optional[str],
    k: int,
    length: Optional[str],
    approx_words: Optional[int],
    min_confidence: float,
    include_citations: bool,
    llm,
    extra_docs: Optional[List[str]] = None,
):
    def gen(question: str):
        ans, cmts = answer_question(
            question,
            search_mode,
            fund,
            k,
            length,
            approx_words,
            min_confidence,
            llm,
            extra_docs=extra_docs,
        )
        if not include_citations:
            ans = re.sub(r"\[\d+\]", "", ans)
            return ans
        citations = {
            lbl: {"text": snippet, "source_file": src}
            for lbl, src, snippet, score, date in cmts
        }
        return {"text": ans, "citations": citations}

    return gen


def main():
    st.title("RFP Responder")
    view_mode = st.sidebar.radio("Interface mode", ["User", "Developer"], index=0)

    framework_env = os.getenv("ANSWER_FRAMEWORK")
    if framework_env:
        if view_mode == "Developer":
            st.info(f"Using framework from ANSWER_FRAMEWORK: {framework_env}")
        framework = framework_env
    else:
        framework = st.selectbox("Framework", ["aladdin", "openai"], index=0, help="Choose backend for language model.")

    if framework == "aladdin":
        for key, label in [
            ("aladdin_studio_api_key", "Aladdin Studio API key"),
            ("defaultWebServer", "Default Web Server"),
            ("aladdin_user", "Aladdin user"),
            ("aladdin_passwd", "Aladdin password"),
        ]:
            if os.getenv(key):
                if view_mode == "Developer":
                    st.info(f"{key} loaded from environment")
            else:
                val = st.text_input(label, type="password" if "passwd" in key or "api_key" in key else "default")
                if val:
                    os.environ[key] = val
    else:
        if os.getenv("OPENAI_API_KEY"):
            if view_mode == "Developer":
                st.info("OPENAI_API_KEY loaded from environment")
        else:
            api_key = st.text_input("OpenAI API key", type="password", help="API key for OpenAI.")
            if api_key:
                os.environ["OPENAI_API_KEY"] = api_key

    uploaded = st.file_uploader(
        "Upload document",
        type=["pdf", "docx", "txt", "xlsx"],
        help="Upload the RFP or question file.",
    )

    if view_mode == "Developer":
        st.info("Search mode fixed to 'both'")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Filter answers for a specific fund or strategy.",
        )
        llm_model = st.selectbox(
            "LLM model",
            MODEL_OPTIONS,
            index=0,
            help="Model name for generating answers.",
        )
        k_max_hits = st.number_input("Hits per question", value=20, help="Maximum documents retrieved per question.")
        length_opt = st.selectbox("Answer length", ["auto", "short", "medium", "long"], index=0)
        approx_words = st.text_input("Approx words", value="", help="Approximate words per answer (optional).")
        min_confidence = st.number_input("Min confidence", value=0.0, help="Minimum score for retrieved documents.")
        include_env = os.getenv("RFP_INCLUDE_COMMENTS")
        if include_env is not None:
            include_citations = include_env != "0"
            st.info(f"Using include citations from RFP_INCLUDE_COMMENTS: {include_citations}")
        else:
            include_citations = st.checkbox("Include citations with comments", value=True)
        docx_as_text = st.checkbox("Treat DOCX as text", value=False)
        docx_write_mode = st.selectbox("DOCX write mode", ["fill", "replace", "append"], index=0)
        extra_uploads = st.file_uploader(
            "Additional documents", type=["pdf", "docx", "txt"], accept_multiple_files=True
        )
    else:
        st.markdown("### Settings")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Select fund or strategy context for better answers.",
        )
        llm_model = MODEL_OPTIONS[0]
        k_max_hits = 20
        length_opt = st.selectbox(
            "Answer length", ["auto", "short", "medium", "long"], index=0,
            help="Controls how verbose the answer is; 'auto' lets the model decide based on sources.",
        )
        approx_words = st.text_input("Approx words", value="", help="Optional target word count for answers.")
        min_confidence = 0.0
        include_citations = st.checkbox(
            "Include citations", value=True,
            help="Attach source references to answers.",
        )
        docx_as_text = False
        docx_write_mode = "fill"
        extra_uploads = None

    show_live = st.checkbox("Show questions and answers during processing", value=False)
    if "output_files" not in st.session_state:
        st.session_state["output_files"] = None
    run_clicked = st.button("Run")
    if run_clicked and uploaded is not None and fund:
        phase_placeholder = st.empty()
        sub_placeholder = st.empty()
        dev_placeholder = st.empty()
        dev_logs = []
        log_messages = []
        original_print = builtins.print
        def capture_print(*args, **kwargs):
            original_print(*args, **kwargs)
            log_messages.append(" ".join(str(a) for a in args))
        builtins.print = capture_print
        current_phase = None
        suffix = Path(uploaded.name).suffix.lower()
        base_steps = 1
        if suffix in (".xlsx", ".xls"):
            branch_steps = 4
        elif suffix == ".docx" and not docx_as_text:
            branch_steps = 3
        else:
            branch_steps = 3
        total_steps = base_steps + branch_steps + 1
        step_bar = st.progress(0)
        step_count = 0
        def log_step(dev_msg, user_msg=None):
            nonlocal step_count, current_phase
            if user_msg and user_msg != current_phase:
                current_phase = user_msg
                phase_placeholder.markdown(f"**{current_phase}**")
                sub_placeholder.empty()
            sub_placeholder.markdown(dev_msg)
            if view_mode == "Developer":
                dev_logs.append(f"{current_phase}: {dev_msg}")
                dev_placeholder.markdown("\n".join(f"{i+1}. {m}" for i, m in enumerate(dev_logs)))
            step_count += 1
            step_bar.progress(step_count / total_steps, text=current_phase)
        log_step("Saving uploaded file", "Preparing document...")
        input_path = save_uploaded_file(uploaded)
        extra_docs = [save_uploaded_file(f) for f in extra_uploads] if extra_uploads else None
        llm = CompletionsClient(model=llm_model) if framework == "aladdin" else OpenAIClient(model=llm_model)
        if suffix in (".xlsx", ".xls"):
            log_step("Collecting non-empty cells", "Reading workbook...")
            cells = collect_non_empty_cells(input_path)
            log_step("Inferring sheet schema", "Analyzing workbook structure...")
            schema = ask_sheet_schema(input_path)
            log_step("Building answer generator", "Preparing answer generator...")
            gen = build_generator(
                search_mode,
                fund,
                int(k_max_hits),
                length_opt,
                int(approx_words) if approx_words else None,
                float(min_confidence),
                include_citations,
                llm,
                extra_docs,
            )
            log_step("Starting question-answering", "Generating answers...")
            answers = []
            total_qs = len(schema)
            progress = st.progress(0)
            qa_box = st.container() if show_live else None
            for i, entry in enumerate(schema, 1):
                question = (entry.get("question_text") or "").strip()
                if show_live and question:
                    qa_box.markdown(f"**Q{i}:** {question}")
                ans = gen(question)
                answers.append(ans)
                progress.progress(i / total_qs, text=f"{i}/{total_qs}")
                if show_live:
                    text = ans.get('text', '') if isinstance(ans, dict) else ans
                    qa_box.markdown(f"**A{i}:** {text}")
            out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
            write_excel_answers(
                schema,
                answers,
                input_path,
                out_tmp.name,
                include_comments=include_citations,
            )
            with open(out_tmp.name, "rb") as f:
                excel_bytes = f.read()
            questions = [(entry.get("question_text") or "").strip() for entry in schema]
            answer_texts = []
            comments_list = []
            for ans in answers:
                if isinstance(ans, dict):
                    answer_texts.append(ans.get("text", ""))
                    cmts = []
                    for lbl, meta in sorted(ans.get("citations", {}).items(), key=lambda x: int(x[0])):
                        cmts.append((lbl, meta.get("source_file", ""), meta.get("text", ""), 0.0, ""))
                    comments_list.append(cmts)
                else:
                    answer_texts.append(ans)
                    comments_list.append([])
            qa_doc_bytes = build_docx(questions, answer_texts, comments_list, include_comments=include_citations)
            st.session_state["output_files"] = {
                "Download answered workbook": (excel_bytes, Path(uploaded.name).stem + "_answered.xlsx"),
                "Download Q/A report": (qa_doc_bytes, Path(uploaded.name).stem + "_answered.docx"),
            }
        elif suffix == ".docx" and not docx_as_text:
            log_step("Extracting slots from DOCX", "Analyzing document...")
            slots_payload = extract_slots_from_docx(input_path)
            slots_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
            json.dump(slots_payload, slots_tmp)
            slots_tmp.flush()
            log_step("Building answer generator", "Preparing answer generator...")
            gen = build_generator(
                search_mode,
                fund,
                int(k_max_hits),
                length_opt,
                int(approx_words) if approx_words else None,
                float(min_confidence),
                include_citations,
                llm,
                extra_docs,
            )
            log_step("Starting question-answering", "Generating answers...")
            answers_dict = {}
            slot_list = slots_payload.get('slots', [])
            progress = st.progress(0)
            qa_box = st.container() if show_live else None
            for i, slot in enumerate(slot_list, 1):
                question = (slot.get("question_text") or "").strip()
                if show_live and question:
                    qa_box.markdown(f"**Q{i}:** {question}")
                ans = gen(question)
                answers_dict[slot.get('id', f'slot_{i}')] = ans
                progress.progress(i / len(slot_list), text=f"{i}/{len(slot_list)}")
                if show_live:
                    text = ans.get('text', '') if isinstance(ans, dict) else ans
                    qa_box.markdown(f"**A{i}:** {text}")
            answers_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
            json.dump({'by_id': answers_dict}, answers_tmp)
            answers_tmp.flush()
            out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
            apply_answers_to_docx(
                docx_path=input_path,
                slots_json_path=slots_tmp.name,
                answers_json_path=answers_tmp.name,
                out_path=out_tmp.name,
                mode=docx_write_mode,
                generator=None,
                gen_name="streamlit_app:rag_gen",
            )
            with open(out_tmp.name, "rb") as f:
                docx_bytes = f.read()
            st.session_state["output_files"] = {
                "Download answered DOCX": (docx_bytes, Path(uploaded.name).stem + "_answered.docx"),
            }
        else:
            log_step("Loading input text", "Reading document...")
            raw = load_input_text(input_path)
            log_step("Extracting questions", "Finding questions...")
            questions = extract_questions(raw, llm)
            log_step("Starting question-answering", "Generating answers...")
            answers = []
            comments = []
            total_qs = len(questions)
            progress = st.progress(0)
            qa_box = st.container() if show_live else None
            for i, q in enumerate(questions, 1):
                if show_live and q:
                    qa_box.markdown(f"**Q{i}:** {q}")
                ans, cmts = answer_question(
                    q,
                    search_mode,
                    fund,
                    int(k_max_hits),
                    length_opt,
                    int(approx_words) if approx_words else None,
                    float(min_confidence),
                    llm,
                )
                if not include_citations:
                    ans = re.sub(r"\[\d+\]", "", ans)
                    cmts = []
                answers.append(ans)
                comments.append(cmts)
                progress.progress(i / total_qs, text=f"{i}/{total_qs}")
                if show_live:
                    qa_box.markdown(f"**A{i}:** {ans}")
            qa_doc = build_docx(
                questions,
                answers,
                comments,
                include_comments=include_citations,
            )
            st.session_state["output_files"] = {
                "Download Q/A report": (qa_doc, Path(uploaded.name).stem + "_answered.docx"),
            }
        step_bar.progress(1.0, text="Done")
        builtins.print = original_print
        logs_text = "\\n".join(log_messages)
        st.session_state["dev_logs"] = dev_logs
        st.session_state["logs_text"] = logs_text
    elif run_clicked and not fund:
        st.warning("Please select a fund or strategy before running.")
    elif run_clicked:
        st.warning("Please upload a document before running.")

    if st.session_state.get("output_files"):
        for label, (data, fname) in st.session_state["output_files"].items():
            st.download_button(label, data, file_name=fname, key=label)
        feedback = st.radio(
            "Was the output helpful?", ["👍", "👎"], index=None, horizontal=True, key="feedback"
        )
        reason = ""
        other_feedback = ""
        if feedback == "👎":
            reason = st.selectbox(
                "Why was the result unsatisfactory?", ["Inaccurate info", "Typo", "Other"], key="feedback_reason"
            )
            if reason == "Other":
                other_feedback = st.text_input("Please describe the issue", key="feedback_other")
        if feedback and st.button("Submit feedback", key="feedback_submit"):
            save_feedback(
                feedback,
                reason,
                other_feedback,
                st.session_state.get("dev_logs", []),
                st.session_state.get("logs_text", ""),
            )
            st.success("Thanks for your feedback!")
        if st.button("Reset", key="reset"):
            for key in (
                "output_files",
                "dev_logs",
                "logs_text",
                "feedback",
                "feedback_reason",
                "feedback_other",
            ):
                st.session_state.pop(key, None)
            st.rerun()


if __name__ == "__main__":
    main()
