In [None]:
import subprocess
import sys
import streamlit as st

@st.cache_resource
def install_packages():
    packages = [
        "certifi",
        "charset-normalizer",
        "faiss-cpu",
        "idna",
        "numpy",
        "packaging",
        "python-dotenv",
        "requests",
        "urllib3",
        "pyarrow",
        "PyPDF2",
        "python-docx",
        "spacy"
    ]

    progress_bar = st.progress(0, text="Setting up the application...")
    total = len(packages)
    for i, package in enumerate(packages, start=1):
        progress_bar.progress(int((i - 1) / total * 100), text=f"Setting up step {i} of {total}...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        except subprocess.CalledProcessError:
            st.error("Something went wrong while setting things up. Please try again or contact support.")
            break
        progress_bar.progress(int(i / total * 100), text=f"Finished step {i} of {total}")
    progress_bar.progress(100, text="Setup complete")
    st.success("You're all set!")
    return True

install_packages()


In [None]:
import streamlit as st
import os
import tempfile
import json
import re
from pathlib import Path
from typing import List, Optional

from cli_app import (
    load_input_text,
    extract_questions,
    build_docx,
)
from qa_core import answer_question
from answer_composer import CompletionsClient, get_openai_completion
from input_file_reader.interpreter_sheet import collect_non_empty_cells
from rfp_xlsx_slot_finder import ask_sheet_schema
from rfp_xlsx_apply_answers import write_excel_answers
from rfp_docx_slot_finder import extract_slots_from_docx
from rfp_docx_apply_answers import apply_answers_to_docx

def load_fund_tags() -> List[str]:
    path = Path('~/derivs-tool/rfp-ai-tool/structured_extraction/embedding_data.json').expanduser()
    try:
        with path.open('r', encoding='utf-8') as f:
            data = json.load(f)
    except Exception:
        return []
    tags = {t for item in data for t in item.get('metadata', {}).get('tags', [])}
    return sorted(tags)


class OpenAIClient:
    def __init__(self, model: str):
        self.model = model

    def get_completion(self, prompt: str, json_output: bool = False):
        return get_openai_completion(prompt, self.model, json_output=json_output)


def save_uploaded_file(uploaded_file) -> str:
    suffix = Path(uploaded_file.name).suffix
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
    tmp.write(uploaded_file.read())
    tmp.flush()
    return tmp.name


def build_generator(
    search_mode: str,
    fund: Optional[str],
    k: int,
    length: Optional[str],
    approx_words: Optional[int],
    min_confidence: float,
    include_citations: bool,
    llm,
    extra_docs: Optional[List[str]] = None,
):
    def gen(question: str):
        ans, cmts = answer_question(
            question,
            search_mode,
            fund,
            k,
            length,
            approx_words,
            min_confidence,
            llm,
            extra_docs=extra_docs,
        )
        if not include_citations:
            ans = re.sub(r"\[\d+\]", "", ans)
            return ans
        citations = {
            lbl: {"text": snippet, "source_file": src}
            for lbl, src, snippet, score, date in cmts
        }
        return {"text": ans, "citations": citations}

    return gen


def main():
    st.title("RFP Responder")
    view_mode = st.sidebar.radio("Interface mode", ["User", "Developer"], index=0)

    framework_env = os.getenv("ANSWER_FRAMEWORK")
    if framework_env:
        if view_mode == "Developer":
            st.info(f"Using framework from ANSWER_FRAMEWORK: {framework_env}")
        framework = framework_env
    else:
        framework = st.selectbox("Framework", ["aladdin", "openai"], index=0, help="Choose backend for language model.")

    if framework == "aladdin":
        for key, label in [
            ("aladdin_studio_api_key", "Aladdin Studio API key"),
            ("defaultWebServer", "Default Web Server"),
            ("aladdin_user", "Aladdin user"),
            ("aladdin_passwd", "Aladdin password"),
        ]:
            if os.getenv(key):
                if view_mode == "Developer":
                    st.info(f"{key} loaded from environment")
            else:
                val = st.text_input(label, type="password" if "passwd" in key or "api_key" in key else "default")
                if val:
                    os.environ[key] = val
    else:
        if os.getenv("OPENAI_API_KEY"):
            if view_mode == "Developer":
                st.info("OPENAI_API_KEY loaded from environment")
        else:
            api_key = st.text_input("OpenAI API key", type="password", help="API key for OpenAI.")
            if api_key:
                os.environ["OPENAI_API_KEY"] = api_key

    uploaded = st.file_uploader(
        "Upload document",
        type=["pdf", "docx", "txt", "xlsx"],
        help="Upload the RFP or question file.",
    )

    if view_mode == "Developer":
        st.info("Search mode fixed to 'both'")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Filter answers for a specific fund or strategy.",
        )
        llm_model = st.text_input("LLM model", "gpt-4o-mini", help="Model name for generating answers.")
        k_max_hits = st.number_input("Hits per question", value=20, help="Maximum documents retrieved per question.")
        length_opt = st.selectbox("Answer length", ["one sentence", "short", "long", "default"], index=0)
        approx_words = st.text_input("Approx words", value="", help="Approximate words per answer (optional).")
        min_confidence = st.number_input("Min confidence", value=0.0, help="Minimum score for retrieved documents.")
        include_env = os.getenv("RFP_INCLUDE_COMMENTS")
        if include_env is not None:
            include_citations = include_env != "0"
            st.info(f"Using include citations from RFP_INCLUDE_COMMENTS: {include_citations}")
        else:
            include_citations = st.checkbox("Include citations with comments", value=True)
        docx_as_text = st.checkbox("Treat DOCX as text", value=False)
        docx_write_mode = st.selectbox("DOCX write mode", ["fill", "replace", "append"], index=0)
        extra_uploads = st.file_uploader(
            "Additional documents", type=["pdf", "docx", "txt"], accept_multiple_files=True
        )
    else:
        st.markdown("### Settings")
        search_mode = "both"
        fund = st.selectbox(
            "Fund", [""] + load_fund_tags(), index=0,
            help="Select fund or strategy context for better answers.",
        )
        llm_model = "gpt-4o-mini"
        k_max_hits = 20
        length_opt = st.selectbox(
            "Answer length", ["one sentence", "short", "long", "default"], index=0,
            help="Controls how verbose the answer is.",
        )
        approx_words = st.text_input("Approx words", value="", help="Optional target word count for answers.")
        min_confidence = 0.0
        include_citations = st.checkbox(
            "Include citations", value=True,
            help="Attach source references to answers.",
        )
        docx_as_text = False
        docx_write_mode = "fill"
        extra_uploads = None

    if st.button("Run") and uploaded is not None:
        input_path = save_uploaded_file(uploaded)
        extra_docs = [save_uploaded_file(f) for f in extra_uploads] if extra_uploads else None
        llm = CompletionsClient(model=llm_model) if framework == "aladdin" else OpenAIClient(model=llm_model)
        suffix = Path(uploaded.name).suffix.lower()
        if suffix in (".xlsx", ".xls"):
            cells = collect_non_empty_cells(input_path)
            schema = ask_sheet_schema(input_path)
            gen = build_generator(
                search_mode,
                fund,
                int(k_max_hits),
                length_opt,
                int(approx_words) if approx_words else None,
                float(min_confidence),
                include_citations,
                llm,
                extra_docs,
            )
            answers = [gen((entry.get("question_text") or "").strip()) for entry in schema]
            out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
            write_excel_answers(
                schema,
                answers,
                input_path,
                out_tmp.name,
                include_comments=include_citations,
            )
            with open(out_tmp.name, "rb") as f:
                st.download_button(
                    "Download answered workbook",
                    f,
                    file_name=Path(uploaded.name).stem + "_answered.xlsx",
                )
        elif suffix == ".docx" and not docx_as_text:
            slots = extract_slots_from_docx(input_path)
            slots_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
            json.dump(slots, slots_tmp)
            slots_tmp.flush()
            gen = build_generator(
                search_mode,
                fund,
                int(k_max_hits),
                length_opt,
                int(approx_words) if approx_words else None,
                float(min_confidence),
                include_citations,
                llm,
                extra_docs,
            )
            out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
            apply_answers_to_docx(
                docx_path=input_path,
                slots_json_path=slots_tmp.name,
                answers_json_path="",
                out_path=out_tmp.name,
                mode=docx_write_mode,
                generator=gen,
                gen_name="streamlit_app:rag_gen",
            )
            with open(out_tmp.name, "rb") as f:
                st.download_button(
                    "Download answered DOCX",
                    f,
                    file_name=Path(uploaded.name).stem + "_answered.docx",
                )
        else:
            raw = load_input_text(input_path)
            questions = extract_questions(raw, llm)
            answers = []
            comments = []
            for q in questions:
                ans, cmts = answer_question(
                    q,
                    search_mode,
                    fund,
                    int(k_max_hits),
                    length_opt,
                    int(approx_words) if approx_words else None,
                    float(min_confidence),
                    llm,
                )
                if not include_citations:
                    ans = re.sub(r"\[\d+\]", "", ans)
                    cmts = []
                answers.append(ans)
                comments.append(cmts)
            qa_doc = build_docx(
                questions,
                answers,
                comments,
                include_comments=include_citations,
            )
            out_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".docx")
            out_tmp.write(qa_doc)
            out_tmp.flush()
            with open(out_tmp.name, "rb") as f:
                st.download_button(
                    "Download Q/A report",
                    f,
                    file_name=Path(uploaded.name).stem + "_answered.docx",
                )


if __name__ == "__main__":
    main()
