## Install and Import Libraries

In [None]:
!pip install -q -U google-generativeai langchain langchain-community langchain-google-genai chromadb sentence-transformers tiktoken pypdf

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

## Setup Google Genai LLM

In [None]:
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)

In [None]:
os.environ["HUGGINGFACE_API_KEY"] = userdata.get('HUGGINGFACE_API_KEY')

## Download RAG documents

In [None]:
import os

os.makedirs("/content/papers", exist_ok=True)

pdf_files = {
    "kpi_rag_dataset.pdf":"1rgvkk0xC1yJ7sxsZkfxOkEcJpc6du_vP"
}

for filename, file_id in pdf_files.items():
  url = f"https://drive.google.com/uc?export=download&id={file_id}"
  output_path = f"/content/papers/{filename}"
  !wget -q --show-progress "{url}" -O "{output_path}"

print("Files downloaded successfully")

In [None]:
pdf_path = "/content/papers/kpi_rag_dataset.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(pages)

# Vector store
embedding_model = HuggingFaceEmbeddings()
vectorstore = Chroma.from_documents(chunks, embedding_model, persist_directory="rag_db")
vectorstore.persist()

# RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    chain_type="stuff"
)

In [None]:
test_inputs = [
    {
        "startup": {"type": "SaaS", "stage": "Early"},
        "kpis": ["Instagram Followers", "Website Visits", "Monthly Recurring Revenue", "Trial Conversion Rate"]
    },
    {
        "startup": {"type": "Marketplace", "stage": "Growth"},
        "kpis": ["GMV", "Likes", "Conversion Rate", "Active Sellers"]
    },
    {
        "startup": {"type": "D2C", "stage": "Seed"},
        "kpis": ["Impressions", "Repeat Purchase Rate", "Net Promoter Score"]
    },
    {
        "startup": {"type": "Subscription", "stage": "Scale"},
        "kpis": ["Subscribers", "Website Visits", "Churn Rate", "Engagement Rate"]
    }
]

for i, test_case in enumerate(test_inputs, 1):
    startup = test_case["startup"]
    kpis = test_case["kpis"]

    print(f"\n===================== Test Case {i}: {startup['type']} ({startup['stage']} Stage) =====================")


## Agent 1: Startup Interpreter Agent

In [None]:
def interpret_startup(startup_info):
    prompt = f"""
You are a startup KPI expert.

Given the following startup information:
- Type: {startup_info["type"]}
- Stage: {startup_info["stage"]}

Suggest what the startup's core metric focus should be at this stage (e.g., retention, GMV, engagement, churn, growth, etc.).
Return the output as a Python dictionary with keys: type, stage, relevant_metric_focus.
"""
    response = llm.invoke(prompt).content.strip()

    # Remove code block markers if present
    if response.startswith("```"):
        response = response.split("```")[1].strip()

    return eval(response)


In [None]:
# context = interpret_startup(startup)
# print("✅ Agent 1 Output (Startup Interpretation):")
# print(context)

## Agent 2: KPI Critique Agent

In [None]:
def critique_kpis_with_llm(kpis, context):
    prompt = f"""
You are a KPI analysis expert for startups.

Startup Type: {context['type']}
Startup Stage: {context['stage']}

Classify each of the following KPIs as a 'Vanity Metric' or an 'Impact Metric'.
Provide a one-line explanation for each.

KPIs: {', '.join(kpis)}
"""
    return llm.invoke(prompt).content

In [None]:
# critique_output = critique_kpis_with_llm(kpis, context)
#     print("\n✅ Agent 2 Output (LLM Critique of KPIs):")
#     print(critique_output)

In [None]:
# test_kpi_inputs = [
#     {
#         "kpis": ["Instagram Followers", "Website Visits", "Monthly Recurring Revenue", "Trial Conversion Rate"],
#         "context": {"type": "SaaS", "stage": "Early"}
#     },
#     {
#         "kpis": ["GMV", "Likes", "Conversion Rate", "Active Sellers"],
#         "context": {"type": "Marketplace", "stage": "Growth"}
#     },
#     {
#         "kpis": ["Impressions", "Repeat Purchase Rate", "Net Promoter Score"],
#         "context": {"type": "D2C", "stage": "Seed"}
#     },
#     {
#         "kpis": ["Subscribers", "Website Visits", "Churn Rate", "Engagement Rate"],
#         "context": {"type": "Subscription", "stage": "Scale"}
#     }
# ]

# for i, test_case in enumerate(test_kpi_inputs, 1):
#     kpis = test_case["kpis"]
#     context = test_case["context"]
#     print(f"\n--- Test Case {i} ---")
#     print(f"Input KPIs: {kpis}")
#     print(f"Startup Context: {context}")
#     print("LLM Critique Output:\n")
#     result = critique_kpis_with_llm(kpis, context)
#     print(result)


##Agent 3: Impact Metric Retriever Agent

In [None]:
def retrieve_impact_metrics(context):
    query = f"What are the most impactful KPIs for a {context['stage']} stage {context['type']} startup?"
    return qa_chain.run(query)

In [None]:
rag_output = retrieve_impact_metrics(context)
    print("\nAgent 3 Output (RAG Retrieved KPIs):")
    print(rag_output)

In [None]:
# # --- Test Cases for Agent 3: Impact Metric Retriever Agent ---
# test_startup_contexts = [
#     {"type": "SaaS", "stage": "Early"},
#     {"type": "Marketplace", "stage": "Growth"},
#     {"type": "D2C", "stage": "Seed"},
#     {"type": "Subscription", "stage": "Scale"},
# ]

# # --- Run and Print Outputs ---
# for i, context in enumerate(test_startup_contexts, 1):
#     print(f"\n🔍 Test Case {i}: {context['type']} - {context['stage']} Stage")
#     query = f"What are the most impactful KPIs for a {context['stage']} stage {context['type']} startup?"
#     result = qa_chain.run(query)
#     print(result)


In [None]:
# Agent 4: Visualize Combined Insight
    print("\n✅ Agent 4: Insight Visualizer Output:")
    visualize_insights(startup, critique_output, rag_output)

In [None]:
def visualize_insights(startup, critique_output, rag_output):
    print(f"\n📊 Summary for {startup['type']} ({startup['stage']} stage)")
    print("----------------------------------------------------")
    print("🔍 KPI Classification:\n")
    print(critique_output)
    print("\n📘 Recommended Impact KPIs from Knowledge Base:\n")
    print(rag_output)
    print("----------------------------------------------------\n")

In [None]:
for i, test_case in enumerate(test_inputs, 1):
    startup = test_case["startup"]
    kpis = test_case["kpis"]

    print(f"\n===================== Test Case {i}: {startup['type']} ({startup['stage']} Stage) =====================")

    # Agent 1: Startup Interpretation
    context = interpret_startup(startup)
    print("✅ Agent 1 Output (Startup Interpretation):")
    print(context)

    # Agent 2: KPI Critique
    critique_output = critique_kpis_with_llm(kpis, context)
    print("\n✅ Agent 2 Output (LLM Critique of KPIs):")
    print(critique_output)

    # Agent 3: RAG-based Impact Metrics
    rag_output = retrieve_impact_metrics(context)
    print("\n✅ Agent 3 Output (RAG Retrieved KPIs):")
    print(rag_output)

    # Agent 4: Visualize Combined Insight
    print("\n✅ Agent 4: Insight Visualizer Output:")
    visualize_insights(startup, critique_output, rag_output)