In [None]:
# Problem Statement: "The Automated Investment Risk Analyst"
# The Business Context: You are a Quantitative Researcher at a hedge fund. Your job is to monitor portfolio companies for risks. A sudden drop in revenue is a "lagging indicator"—by the time you see it, it's often too late. You need to connect these financial dips to operational events immediately.
# The Data Challenge: You have two disconnected datasets for a company called "Solaris AI":
# The Hard Numbers (financials.csv): A structured dataset containing quarterly Revenue, OpEx, and Churn Rates.
# Constraint: LLMs are bad at math. You cannot just ask "What is the average?" and trust the answer. You need an engine that can execute code.
# The Soft Signals (meeting_notes.txt): Unstructured text logs from the CEO's board meetings.
# Constraint: This file is too large to paste into a prompt every time. You need a retrieval system to find only the relevant paragraphs.
# The Goal: Build a unified LangChain Agent that acts as a Controller. When a user asks a complex question, the Agent must intelligently route the sub-tasks to the correct "Brain":
# Logic/Math: A Pandas Agent that writes Python code to query the CSV.
# Context/Search: A Vector Store Retriever (FAISS) that performs semantic search on the text.
# The Question:
# "Using the Financial Data, identify which Quarter had the highest Churn Rate. Then, use the Meeting Notes to explain the specific operational incident that caused this spike."


In [None]:
pip install langchain==0.1.7 langchain-community==0.0.20 langchain-experimental==0.0.51 langchain-core==0.1.23 sentence-transformers faiss-cpu pandas langchain-google-genai google-generativeai

In [None]:
import os
import pandas as pd
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_experimental.agents import create_csv_agent
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents.agent_types import AgentType
from langchain.tools import Tool
from langchain.agents import initialize_agent

# 1) Load meeting notes
with open("solaris_meeting_notes.txt", "r", encoding="utf-8") as f:
    notes_text = f.read()

# 2) Embeddings (HF)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 3) Semantic chunking
splitter = SemanticChunker(embeddings)
print(notes_text)
chunks = splitter.split_text(notes_text)

# 4) Store in FAISS
vs = FAISS.from_texts(chunks, embeddings)
vs.save_local("meeting_notes_index")

print("✅ Saved semantic chunks to meeting_notes_index")

In [None]:
df = pd.read_csv("solaris_financials.csv")
os.environ["GOOGLE_API_KEY"] = ""
llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.5-flash",
    temperature=0.2
)
# df["Quarter"] = df["Quarter"].astype(str).str.upper().str.strip()
# print(df)

csv_agent = create_csv_agent(
    llm,
    'solaris_financials.csv',
    verbose=True,
    prefix="give the output as text",
    agent_type = AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

In [None]:
def financial_tool_func(query: str):
    return csv_agent.run(query)
    

financial_tool = Tool(
    name="financial_analyst",
    func=financial_tool_func,
    description="Use for numeric questions: revenue, churn, profit, averages, trends, comparisons."
)

In [None]:
def meeting_notes_func(query: str):
    retriever = vs.as_retriever(search_kwargs={"k": 4})
    docs = retriever.get_relevant_documents(query)
    return "\n".join([d.page_content for d in docs])

notes_tool = Tool(
    name="meeting_notes_search",
    func=meeting_notes_func,
    description="Use for operational context, incidents, reasons, explanations from internal meeting notes."
)

In [None]:
controller = initialize_agent(
    tools=[financial_tool, notes_tool],
    llm=llm,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

In [None]:
print(controller("Which quarter had the highest churn and why?"))
#controller.run("Which quarter had the highest churn and why?")

In [None]:
# def classify_query(query: str) -> str:
#     q = query.lower()

#     has_number = any(w in q for w in [
#         "highest", "lowest", "average", "trend", "drop", "increase", "decrease",
#         "revenue", "churn", "profit", "margin", "net income", "opex"
#     ])

#     has_reason = any(w in q for w in [
#         "why", "reason", "because", "due to", "cause", "explain"
#     ])

#     if has_number and has_reason:
#         return "HYBRID"
#     if has_number:
#         return "FINANCIAL"
#     return "CONTEXT"

In [None]:
# def safe_llm_call(prompt: str):
#     try:
#         result = llm.invoke(prompt)
#         if result is None or str(result).strip() == "":
#             return "⚠️ LLM returned empty response.\n\n" + prompt
#         return llm.invoke(prompt)
#     except Exception as e:
#         return f"⚠️ LLM failed: {e}"

In [None]:
# # controller = initialize_agent(
# #     tools=[financial_tool, notes_tool],
# #     llm=llm,
# #     agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
# #     verbose=True
# # )
# def run_financial(query):
#     return financial_tool.run(query)


# def run_context(query):
#     return notes_tool.run(query)


# def run_hybrid(query):
#     numeric = financial_tool.run(query)

#     import re
#     m = re.search(r"Q[1-4]", numeric)
#     quarter = m.group() if m else None

#     context_query = f"What operational events were discussed in {quarter}?" if quarter else query
#     context = notes_tool.run(context_query)

#     prompt = f"""
# Financial Analysis:
# {numeric}

# Meeting Notes:
# {context}

# Explain clearly what caused this.
# """
#     return safe_llm_call(prompt)

In [None]:
# def controller(query: str):
#     intent = classify_query(query)

#     if intent == "FINANCIAL":
#         return run_financial(query)

#     if intent == "CONTEXT":
#         return run_context(query)

#     if intent == "HYBRID":
#         return run_hybrid(query)

#     return "Could not classify query."

In [None]:
print(controller("Which quarter had the highest churn rate?"))

In [None]:
print(controller("What incident happened in Q3?"))

In [None]:
print(controller("Which quarter had the highest churn and why?"))