<a href="https://colab.research.google.com/github/amanmaurya7/langchain_collab/blob/main/AI_Financial_Report_Analyst.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
!pip install -U langchain-community
!pip install pypdf
!pip install -U langchain langchain-community chromadb sentence-transformers
!pip install -U langchain langchain-community




In [2]:
import os
os.environ["PPLX_API_KEY"] = "" # your perplexity api key here

In [49]:
from langchain_community.chat_models import ChatPerplexity
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnableSequence, RunnableWithMessageHistory, RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables.history import RunnableWithMessageHistory
from operator import itemgetter

llm = ChatPerplexity(model = 'sonar-pro', temperature=0.3, max_tokens = 100)
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [50]:
loader = PyPDFLoader('financial_report.pdf')
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap = 10,
    separators = ['\n\n','\n','.',' ']
)
texts = splitter.split_documents(docs)

In [51]:
db = Chroma.from_documents(texts, embeddings)

In [52]:
retriever = db.as_retriever()

In [53]:
prompt = ChatPromptTemplate.from_messages([
    ("system", """
You are a financial report analyst. Use the retrieved context to answer queries. If answer for the query is not rooted in the context provided, answer based on the query appropriately.
Answer any user questions based solely on the context below:
<context>
{context}
</context>
"""),
    ('placeholder',"{history}"),
    ("human", "{question}")
])

In [54]:
chain = (
    {
        "context": (lambda x: x["question"]) | retriever,
        "question": (lambda x: x["question"])
    }
    | prompt
    | llm
)

In [55]:
# chain = (
#     {
#         "context": itemgetter("question") | retriever,
#         "question": itemgetter("question")
#     }
#     | prompt
#     | llm
# )

In [56]:
store={}

def get_session_history(session_id:str):
  if session_id not in store:
    store[session_id] = InMemoryChatMessageHistory()
  return store[session_id]

In [57]:
chain_with_memory = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="question",
    history_messages_key="history",
)

In [58]:
response_1 = chain_with_memory.invoke(
    {"question": "Summarize the revenue growth trend"},
    config={"configurable": {"session_id": "session1"}}
)

In [59]:
print(response_1)

content='**No direct information on revenue is available in the financial report; it primarily details declines in net profit and adjusted net income alongside falling oil prices.[1][2]**\n\nKey financial trends from the report include:\n- **Net profit** dropped 22% in 2025 to **USD 22.7 billion**, linked to average realized oil prices falling from **USD 85.7/barrel** in Q2 2024 to **USD 66.7/barrel** in Q2' additional_kwargs={'citations': ['https://www.180ops.com/revenue-trend-analysis-what-it-is-and-how-to-use-it/', 'https://improvado.io/blog/revenue-growth-charts', 'https://stripe.com/resources/more/revenue-growth-101-a-guide-for-businesses', 'https://www.geckoboard.com/best-practice/kpi-examples/revenue-growth-rate/', 'https://www.hubifi.com/blog/revenue-financial-analysis-guide', 'https://www.paddle.com/resources/revenue-growth', 'https://onlinedegrees.scu.edu/media/blog/what-is-trend-analysis', 'https://www.business-literacy.com/financial-concepts/revenue-growth/']} response_meta

In [60]:
response_2 = chain_with_memory.invoke(
    {"question": "Now tell me about the risks mentioned."},
    config={"configurable": {"session_id": "session1"}}
)

In [61]:
print(response_2)

content='**The financial report on Aramco identifies two key risks: sustained oil price volatility and dividend sustainability pressures.**[context:0][context:1]\n\nThese challenges are highlighted in the "Challenges & Outlook" section, where oil price volatility is noted as ongoing despite Aramco\'s robust cash generation.[context:1] Dividend sustainability pressures stem from a **98% year-on-year drop in performance-linked dividends** during H1 2025, attributed to lower free cash flow fro' additional_kwargs={'citations': ['https://www.bairdwealth.com/globalassets/pdfs/help/baird-investment-risks-summary.pdf', 'https://www.straitsfinancial.com/insights/types-of-investment-risks', 'https://www.ncoa.org/article/a-guide-to-types-of-investment-risk/', 'https://ssb.texas.gov/risk-return-you-cant-have-one-without-other', 'https://www.indeed.com/career-advice/career-development/types-of-risk-in-finance', 'https://www.jpmorgan.com/disclosures/jpm-pb-risks-for-investments', 'https://www.wellin

In [62]:
response_3 = chain_with_memory.invoke(
    {"question": "Combine both insights into a single summary."},
    config={"configurable": {"session_id": "session1"}}
)

In [63]:
print(response_3)

content="**Aramco's 2025 Financial Summary.** In FY 2024, Aramco's net profit declined 12% to **USD 106.25 billion** due to lower energy prices, with total dividends at **USD 85.4 billion**.[1][2] In Q2 2025, net profit fell further by 22% to **USD 22.7 billion**, driven by average realized oil prices dropping from **USD 85.7/barre" additional_kwargs={'citations': ['https://www.sourcely.net/post/how-to-synthesize-information-from-multiple-sources', 'https://www.shadecoder.com/topics/multi-document-summarization-a-comprehensive-guide-for-2025', 'https://pmc.ncbi.nlm.nih.gov/articles/PMC4097839/', 'https://scisummary.com/how-to-use/summarizing-articles/multi-doc', 'https://www.evalacademy.com/articles/how-to-combine-data-from-multiple-sources-for-cleaning-and-analysis', 'https://www.youtube.com/watch?v=LtM8g2OI6VA', 'https://www.blastx.com/insights/merging-quantitative-qualitative-data-for-full-picture-analysis', 'https://www.nngroup.com/articles/which-ux-research-methods/']} response_me

In [65]:
response_1 = chain_with_memory.invoke(
    {"question": "What is the company’s revenue growth this year?"},
    config={"configurable": {"session_id": "session2"}}
)

In [66]:
print(response_1)

content="Based on the financial report provided, **Aramco's specific revenue growth for 2026 is not disclosed** in the available context. However, the report indicates that the company is operating in a challenging market environment.\n\nThe financial report notes that **net profit in FY 2024 declined by 12% to USD 106.25 billion due to lower energy prices**, and in the first half of 2025, **performance-linked dividends dropped significantly by " additional_kwargs={'citations': ['https://chiefexecutive.net/c-suite-survey-finds-revenue-expectations-accelerating-into-2026/', 'https://www.chase.com/business/knowledge-center/manage/blo-2026', 'https://www.jpmorgan.com/insights/markets-and-economy/business-leaders-outlook/2026-us-business-leaders-outlook', 'https://www.joorney.com/news/the-10-fastest-growing-businesses-for-2026/', 'https://www.thesmallbusinessexpo.com/blog/small-business-outlook-for-2026-will-growth-and-stability-continue-this-year/']} response_metadata={} id='lc_run--019c4

In [67]:
response_2 = chain_with_memory.invoke(
    {"question": "What risks were mentioned?"},
    config={"configurable": {"session_id": "session2"}}
)

In [68]:
print(response_2)

content='**The financial report mentions two key risks for Aramco: sustained oil price volatility and dividend sustainability pressures.**\n\nThese challenges are explicitly listed under "Challenges & Outlook," with performance-linked dividends dropping by 98% in H1 2025 as evidence of the dividend issue.[1] Oil price volatility is tied to the decline in average realized oil prices from USD 85.7/barrel in Q2 2024 to USD 66.7/barrel in Q2 202' additional_kwargs={'citations': ['https://compliance.temple.edu/enterprise-risk-management/types-risk-erm', 'https://risk.sais.jhu.edu/articles/key-types-of-global-risk/', 'https://projectmanagementacademy.net/resources/blog/risk-types-in-project-management/', 'https://www.allianz-trade.com/en_US/insights/business-risks.html', 'https://www.indeed.com/career-advice/career-development/risks-business', 'https://www.metricstream.com/learn/what-are-risk-categories.html', 'https://www.strikegraph.com/blog/what-are-the-7-types-of-risk', 'https://simplica

In [69]:
response_3 = chain_with_memory.invoke(
    {"question": "Give me a combined summary of revenue and risks so far."},
    config={"configurable": {"session_id": "session2"}}
)

In [70]:
print(response_3)

content='**No direct revenue figures are available in the financial report context, which focuses on net profit, adjusted net income, cash flows, and oil prices rather than revenue.**\n\n### Key Financial Metrics from the Report (Q2 and H1 2025):\n- **Net profit** in Q2 2025 fell 22% year-over-year to **USD 22.7 billion**, driven by average realized oil prices dropping from **USD 85.7/barrel** in Q2 20' additional_kwargs={'citations': ['https://www.venasolutions.com/blog/small-business-revenue-statistics', 'https://chiefexecutive.net/c-suite-survey-finds-revenue-expectations-accelerating-into-2026/', 'https://ebudget.ca.gov/2025-26/pdf/Revised/BudgetSummary/RevenueEstimates.pdf', 'https://www.chase.com/business/knowledge-center/manage/blo-2026', 'https://www.ibisworld.com/global/industry-trends/biggest-industries-by-revenue/', 'https://www.jpmorgan.com/insights/markets-and-economy/business-leaders-outlook/2026-us-business-leaders-outlook', 'https://www.lucahq.com/blog-posts/planning-fo