In [17]:
import requests
from bs4 import BeautifulSoup
import fitz  # PyMuPDF
import io



In [11]:
def extract_latest_concall_text(ticker: str) -> str:
    try:
        import requests, io
        import fitz
        from bs4 import BeautifulSoup

        base_url = f"https://www.screener.in/company/{ticker}/concall/"
        headers = {"User-Agent": "Mozilla/5.0"}

        # Step 1: Load concall page
        response = requests.get(base_url, headers=headers)
        soup = BeautifulSoup(response.text, "lxml")

        # Step 2: Find the first concall detail page link
        detail_links = soup.select("a[href^='/company/'][href*='/concall/']")
        if not detail_links:
            return "❌ No concall detail pages found."

        first_concall_url = "https://www.screener.in" + detail_links[0]["href"]

        # Step 3: Go to the detail page and find PDF
        detail_response = requests.get(first_concall_url, headers=headers)
        detail_soup = BeautifulSoup(detail_response.text, "lxml")
        pdf_link = detail_soup.find("a", href=True, string=lambda x: x and x.endswith(".pdf"))
        if not pdf_link:
            return "❌ PDF not found in concall detail page."

        pdf_url = "https://www.screener.in" + pdf_link["href"]
        print(f"📄 Downloading Transcript PDF: {pdf_url}")

        # Step 4: Extract PDF content
        pdf_response = requests.get(pdf_url)
        pdf_bytes = io.BytesIO(pdf_response.content)
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")

        text = ""
        for page in doc:
            text += page.get_text()

        return text.strip() or "⚠️ PDF is empty or not extractable."

    except Exception as e:
        return f"❌ Error extracting Transcript PDF content: {e}"


In [12]:
extract_latest_concall_text("Titan") 

'❌ No concall detail pages found.'

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd

from data_fetch_backup import (
    get_profit_loss_df,
    get_balance_sheet_df,
    get_shareholding_pattern,
    get_cashflow_df
)

# Load environment variables
load_dotenv()

# Get API key
openai_api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api_key)


def build_summary_input(pl_df, cf_df, bs_df, sh_df) -> str:
    def _get_row(df, regex):
        match = df[df["Line Item"].str.contains(regex, case=False, na=False)]
        if not match.empty:
            return match.iloc[0]
        return pd.Series(["N/A"] * len(df.columns), index=df.columns)

    def _get_latest(df, regex):
        row = _get_row(df, regex)
        return row.iloc[-1] if isinstance(row, pd.Series) else "N/A"

    def _to_num(x):
        try:
            return float(str(x).split()[0].replace(",", ""))
        except:
            return np.nan

    # ----- Balance Sheet -----
    total_assets = _get_latest(bs_df, r"Total Assets")
    borrowings = _get_latest(bs_df, r"Borrowings")
    cash_equiv = _get_latest(bs_df, r"Cash")
    curr_assets = _get_latest(bs_df, r"Current Assets")
    curr_liab = _get_latest(bs_df, r"Current Liabilities")
    current_ratio = round(_to_num(curr_assets) / _to_num(curr_liab), 2) if all(map(np.isfinite, [_to_num(curr_assets), _to_num(curr_liab)])) else "N/A"

    # ----- Shareholding -----
    try:
        promoter_row = sh_df[sh_df.columns[0]].str.lower().str.contains("promoter")
        promoter_holding = sh_df.loc[promoter_row].iloc[0, -1] if promoter_row.any() else "N/A"
    except:
        promoter_holding = "N/A"

    try:
        fii_row = sh_df[sh_df.columns[0]].str.lower().str.contains("fii")
        fii_holding = sh_df.loc[fii_row].iloc[0, -1] if fii_row.any() else "N/A"
    except:
        fii_holding = "N/A"

    # ----- P&L and Cash Flow -----
    sales_row = _get_row(pl_df, r"Sales")
    net_profit_row = _get_row(pl_df, r"Net Profit")
    cfo_row = _get_row(cf_df, r"Cash from Operating|Cash Flow from Ops")

    def last_n_years(row, n=5):
        try:
            return row[-n:].tolist()
        except:
            return ["N/A"] * n

    return f"""
📈 Profit & Loss (last 5 years):
- Sales: {last_n_years(sales_row)}
- Net Profit: {last_n_years(net_profit_row)}

💸 Cash Flow (last 5 years):
- CFO: {last_n_years(cfo_row)}
- Net Profit vs CFO Divergence: check if pattern diverges

🧮 Balance Sheet (latest year only):
- Total Assets: {total_assets}
- Borrowings: {borrowings}
- Cash & Equivalents: {cash_equiv}
- Current Ratio (CA / CL): {current_ratio}

🧾 Shareholding Pattern (latest):
- Promoter Holding: {promoter_holding}
- FII Holding: {fii_holding}
"""


def run_forensic_analysis(ticker: str, client):
    print(f"🔎 Running forensic analysis for: {ticker}")

    print("📥 Fetching Profit & Loss data...")
    pl_df = get_profit_loss_df(ticker)

    print("📥 Fetching Cash Flow data...")
    cf_df = get_cashflow_df(ticker)

    print("📥 Fetching Balance Sheet data...")
    bs_df = get_balance_sheet_df(ticker)

    print("📥 Fetching Shareholding Pattern data...")
    sh_df = get_shareholding_pattern(ticker)

    print("✅ All data fetched. Generating prompt...")

    summary = build_summary_input(pl_df, cf_df, bs_df, sh_df)

    forensic_prompt = f"""
You are a forensic accounting analyst AI.

Given structured financial data below, identify signs of accounting fraud, manipulation, or financial red flags.

Evaluate the following:

1. CFO vs Net Profit → consistent gap = low earnings quality?
2. Trade Receivables or Inventory buildup vs Sales?
3. Any abnormal jump in borrowings?
4. Decline in promoter holding?
5. Debt/equity trends — high leverage?
6. Any red flag combinations?

Use accounting logic and known fraud detection heuristics.

Return:
- 📛 Red Flags (3–5 bullets)
- 🔍 Reasoning for each red flag
- ✅ Forensic Risk Score (0–100)
- ⛔ Recommend 'Avoid', 'Caution', or 'No Red Flags'
- For each red flag provide the facts and figures being used
- You can ignore the red flag if some pattern is just for 1 year

Here is the financial data:
{summary}
"""

    print("🧠 Querying OpenAI model...")
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a forensic accounting analyst AI."},
            {"role": "user", "content": forensic_prompt}
        ]
    )

    result = response.choices[0].message.content
    print("\n📊 Forensic Analysis Result:\n")
    print(result)
    return result


In [18]:
run_forensic_analysis("TITAN", client)

🔎 Running forensic analysis for: TITAN
📥 Fetching Profit & Loss data...
📥 Fetching Profit & Loss data for TITAN...
✅ Profit & Loss data fetched successfully.
📥 Fetching Cash Flow data...
📥 Fetching Cash Flow data for TITAN...
✅ Cash Flow data fetched successfully.
📥 Fetching Balance Sheet data...
📥 Fetching Balance Sheet data for TITAN...
✅ Balance Sheet data fetched successfully.
📥 Fetching Shareholding Pattern data...
📥 Fetching Shareholding Pattern data for TITAN...
✅ Shareholding data fetched successfully.
✅ All data fetched. Generating prompt...
🧠 Querying OpenAI model...

📊 Forensic Analysis Result:

Based on the provided financial data, here are the identified red flags and evaluation:

### 📛 Red Flags
1. **Divergence Between CFO and Net Profit**
   - **Facts**: 
     - CFO: [4,139, -724, 1,370, 1,695, -541]
     - Net Profit: [974, 2,198, 3,274, 3,496, 3,337]
   - **Reasoning**: The fourth and fifth-year Net Profit figures are significantly higher than CFO, suggesting that prof

'Based on the provided financial data, here are the identified red flags and evaluation:\n\n### 📛 Red Flags\n1. **Divergence Between CFO and Net Profit**\n   - **Facts**: \n     - CFO: [4,139, -724, 1,370, 1,695, -541]\n     - Net Profit: [974, 2,198, 3,274, 3,496, 3,337]\n   - **Reasoning**: The fourth and fifth-year Net Profit figures are significantly higher than CFO, suggesting that profits may be inflated through accounting measures rather than genuine cash generation. A consistent gap raises concern about earnings quality.\n\n2. **Negative CFO in 2 out of 5 years**\n   - **Facts**: CFO (-724 in Year 2, -541 in Year 5)\n   - **Reasoning**: Negative CFO indicates that the company is not generating operational cash flow consistently, which can signal problems with operational efficiency or revenue recognition issues, especially if accompanied by rising net profits.\n\n3. **Decline in Promoter Holding**\n   - **Facts**: Promoter Holding: 52.90% \n   - **Reasoning**: A declining trend

In [None]:
import requests
from bs4 import BeautifulSoup

from data_fetch_backup import (
    get_profit_loss_df,
    get_cashflow_df,
    get_balance_sheet_df,
    get_shareholding_pattern    
)

def build_summary_input(pl_df, cf_df, bs_df, sh_df) -> str:
    def _get_row(df, regex):
        match = df[df["Line Item"].str.contains(regex, case=False, na=False)]
        if not match.empty:
            return match.iloc[0]
        return pd.Series(["N/A"] * len(df.columns), index=df.columns)

    def _get_latest(df, regex):
        row = _get_row(df, regex)
        return row.iloc[-1] if isinstance(row, pd.Series) else "N/A"

    def _to_num(x):
        try:
            return float(str(x).split()[0].replace(",", ""))
        except:
            return np.nan

    # Balance Sheet
    total_assets = _get_latest(bs_df, r"Total Assets")
    borrowings = _get_latest(bs_df, r"Borrowings")
    cash_equiv = _get_latest(bs_df, r"Cash")
    curr_assets = _get_latest(bs_df, r"Current Assets")
    curr_liab = _get_latest(bs_df, r"Current Liabilities")
    current_ratio = round(_to_num(curr_assets) / _to_num(curr_liab), 2) if all(map(np.isfinite, [_to_num(curr_assets), _to_num(curr_liab)])) else "N/A"

    # Shareholding
    try:
        promoter_row = sh_df[sh_df.columns[0]].str.lower().str.contains("promoter")
        promoter_holding = sh_df.loc[promoter_row].iloc[0, -1] if promoter_row.any() else "N/A"
    except:
        promoter_holding = "N/A"

    try:
        fii_row = sh_df[sh_df.columns[0]].str.lower().str.contains("fii")
        fii_holding = sh_df.loc[fii_row].iloc[0, -1] if fii_row.any() else "N/A"
    except:
        fii_holding = "N/A"

    # P&L and Cash Flow
    sales_row = _get_row(pl_df, r"Sales")
    net_profit_row = _get_row(pl_df, r"Net Profit")
    cfo_row = _get_row(cf_df, r"Cash from Operating|Cash Flow from Ops")

    def last_n_years(row, n=5):
        try:
            return row[-n:].tolist()
        except:
            return ["N/A"] * n

    return f"""
📈 Profit & Loss (last 5 years):
- Sales: {last_n_years(sales_row)}
- Net Profit: {last_n_years(net_profit_row)}

💸 Cash Flow (last 5 years):
- CFO: {last_n_years(cfo_row)}
- Net Profit vs CFO Divergence: check if pattern diverges

🧮 Balance Sheet (latest year only):
- Total Assets: {total_assets}
- Borrowings: {borrowings}
- Cash & Equivalents: {cash_equiv}
- Current Ratio (CA / CL): {current_ratio}

🧾 Shareholding Pattern (latest):
- Promoter Holding: {promoter_holding}
- FII Holding: {fii_holding}
"""

def get_all_financial_data(ticker: str):
    return {
        "ticker": ticker,
        "pnl": get_profit_loss_df(ticker),
        "cashflow": get_cashflow_df(ticker),
        "balance_sheet": get_balance_sheet_df(ticker),
        "shareholding": get_shareholding_pattern(ticker),
    }


import requests
from bs4 import BeautifulSoup
import re

import requests
from bs4 import BeautifulSoup
import re

def get_peer_companies(ticker: str, max_peers: int = 5):
    url = f"https://www.screener.in/company/{ticker}/peers/"
    headers = {"User-Agent": "Mozilla/5.0"}

    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "html.parser")

    # Look for the peer comparison table under section "Peer comparison"
    table = soup.find("table")
    if not table:
        print("❌ Peer comparison table not found.")
        return []

    rows = table.select("tbody tr")
    peers = []
    
    for row in rows[1:]:  # skip header
        first_col = row.find("td")
        if first_col and first_col.find("a"):
            href = first_col.find("a")["href"]
            match = re.search(r"/company/([^/]+)/", href)
            if match:
                peer_ticker = match.group(1).upper()
                if peer_ticker != ticker.upper():
                    peers.append(peer_ticker)
        if len(peers) >= max_peers:
            break

    if not peers:
        print("❌ No peers found.")
    else:
        print(f"✅ Found peers for {ticker.upper()}: {peers}")
    return peers




def summarize(data):
    return build_summary_input(
        data["pnl"], data["cashflow"], data["balance_sheet"], data["shareholding"]
    )

def run_peer_comparison(ticker: str, client):
    target_data = get_all_financial_data(ticker)
    peer_tickers = get_peer_companies(ticker)

    peer_data_list = []
    for peer in peer_tickers:
        try:
            peer_data = get_all_financial_data(peer)
            peer_data_list.append(peer_data)
        except Exception as e:
            print(f"Skipping {peer} due to error: {e}")
    
    peer_summaries = "\n\n".join([f"{p['ticker']}:\n{summarize(p)}" for p in peer_data_list])

    comparison_prompt = f"""
You are a financial analyst AI.

Compare the financial health and metrics of the target company with its peers.

Target: {ticker}
Peers: {', '.join([p['ticker'] for p in peer_data_list])}

🔹 Target Company Financials:
{summarize(target_data)}

🔸 Peer Company Financials:
{peer_summaries}

Give:
- Relative strengths and weaknesses
- Performance highlights
- Final peer comparison verdict
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a financial comparison analyst AI."},
            {"role": "user", "content": comparison_prompt}
        ]
    )

    return response.choices[0].message.content


In [26]:
target_data = get_all_financial_data("TITAN")

📥 Fetching Profit & Loss data for TITAN...
✅ Profit & Loss data fetched successfully.
📥 Fetching Cash Flow data for TITAN...
✅ Cash Flow data fetched successfully.
📥 Fetching Balance Sheet data for TITAN...
✅ Balance Sheet data fetched successfully.
📥 Fetching Shareholding Pattern data for TITAN...
✅ Shareholding data fetched successfully.


In [27]:
target_data

{'ticker': 'TITAN',
 'pnl':             Line Item Mar 2014 Mar 2015 Mar 2016 Mar 2017 Mar 2018 Mar 2019  \
 0             Sales +   10,927   11,913   11,276   13,261   16,120   19,779   
 1          Expenses +    9,880   10,761   10,337   12,097   14,476   17,785   
 2    Operating Profit    1,047    1,153      939    1,164    1,644    1,994   
 3               OPM %      10%      10%       8%       9%      10%      10%   
 4      Other Income +      117       67       69      -42       70      178   
 5            Interest       87       81       42       38       53       53   
 6        Depreciation       68       90       98      111      131      163   
 7   Profit before tax    1,010    1,049      868      973    1,530    1,957   
 8               Tax %      27%      22%      22%      28%      28%      29%   
 9        Net Profit +      735      816      675      697    1,102    1,389   
 10          EPS in Rs     8.28     9.19     7.60     8.01    12.73    15.82   
 11  Dividend

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

def get_peer_comparison_table(ticker: str) -> pd.DataFrame:
    url = f"https://www.screener.in/company/{ticker}/"
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "lxml")

    # Find the peer comparison table
    table = soup.find("table", class_="table table-sm table-responsive")

    if not table:
        print("❌ Peer comparison table not found.")
        return None

    # Extract headers
    headers = [th.text.strip() for th in table.find_all("th")]

    # Extract rows
    data = []
    for row in table.find_all("tr")[1:]:  # Skip header row
        cols = [td.text.strip() for td in row.find_all("td")]
        if cols:
            data.append(cols)

    # Convert to DataFrame
    df = pd.DataFrame(data, columns=headers)
    return df


In [6]:
peer_tickers = get_peer_comparison_table("GPIL")
peer_tickers

❌ Peer comparison table not found.


In [None]:
 

    peer_data_list = []
    for peer in peer_tickers:
        try:
            peer_data = get_all_financial_data(peer)
            peer_data_list.append(peer_data)
        except Exception as e:
            print(f"Skipping {peer} due to error: {e}")
    
    peer_summaries = "\n\n".join([f"{p['ticker']}:\n{summarize(p)}" for p in peer_data_list])

In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_peer_comparison_table(ticker: str) -> pd.DataFrame:
    base_url = f"https://www.screener.in/company/{ticker}/"
    headers = {"User-Agent": "Mozilla/5.0"}

    # Step 1: Get correct slug (redirect target)
    session = requests.Session()
    resp = session.get(base_url, headers=headers, allow_redirects=True)
    if resp.status_code != 200:
        raise Exception(f"❌ Failed to fetch base page for {ticker}")

    actual_slug = resp.url.strip("/").split("/")[-1]  # e.g., "TITAN"
    peer_url = f"https://www.screener.in/company/{actual_slug}/company-peers/"

    # Step 2: Fetch peer table page
    resp2 = session.get(peer_url, headers=headers)
    if resp2.status_code != 200:
        raise Exception(f"❌ Failed to fetch peers page. Status {resp2.status_code}")

    soup = BeautifulSoup(resp2.text, "html.parser")
    table = soup.find("table", class_="data-table")
    if not table:
        raise Exception("❌ Peer comparison table not found.")

    headers = [th.text.strip() for th in table.find("thead").find_all("th")]
    rows = []
    for tr in table.find("tbody").find_all("tr"):
        tds = [td.text.strip() for td in tr.find_all("td")]
        if tds:
            rows.append(tds)

    df = pd.DataFrame(rows, columns=headers)
    return df


In [14]:
get_peer_comparison_table("TCS")

Exception: ❌ Failed to fetch peers page. Status 404

In [33]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from typing import List
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(model_name="gpt-4o", openai_api_key=openai_api_key)

def get_peer_companies_via_gpt_lc(ticker: str) -> List[str]:
    system_prompt = "You are a stock market assistant. Only return peer company names."

    user_prompt = f"""You are a financial analyst AI.

Your task is to find 3 to 5 **publicly listed Indian companies** that are **direct business competitors** or operate in the **same sector or product category** as the company given below.

Strict rules:
- Only include companies listed on NSE or BSE.
- Do NOT include conglomerates or unrelated businesses.
- Focus on **product overlap**, **customer segment**, or **business model similarity**.
- Do NOT include indices (like NIFTY), generic codes (like 500112), or unrelated firms.
- Do NOT repeat the target company itself.

Return only peer names or ticker symbols as a comma-separated list, no extra text.

Example  
Input Company: TITAN  
Output: TBZ, Kalyan Jewellers, PC Jeweller, Senco Gold

Input Company: {ticker}
"""

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_prompt)
    ]

    response = llm(messages)
    peers_text = response.content.strip()

    return [peer.strip() for peer in peers_text.split(",") if peer.strip()]


  llm = ChatOpenAI(model_name="gpt-4o", openai_api_key=openai_api_key)


In [36]:
peers = get_peer_companies_via_gpt_lc("GPIL")
print("✅ Peers:", peers)


✅ Peers: ['JSW Steel', 'Tata Steel', 'SAIL', 'Jindal Steel & Power', 'NMDC']


In [None]:
import requests
from bs4 import BeautifulSoup

from data_fetch_backup import (
    get_profit_loss_df,
    get_cashflow_df,
    get_balance_sheet_df,
    get_shareholding_pattern,
    build_summary_input,
    get_peer_companies
)

def get_all_financial_data(ticker: str):
    return {
        "ticker": ticker,
        "pnl": get_profit_loss_df(ticker),
        "cashflow": get_cashflow_df(ticker),
        "balance_sheet": get_balance_sheet_df(ticker),
        "shareholding": get_shareholding_pattern(ticker),
    }


In [38]:

from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
from dotenv import load_dotenv
import os
import pandas as pd
from typing import List

In [39]:





# Load environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model_name="gpt-4o", openai_api_key=openai_api_key)

In [None]:



def get_peer_companies_via_gpt_lc(ticker: str) -> List[str]:
    system_prompt = "You are a stock market assistant. Only return peer company names."

    user_prompt = f"""You are a financial analyst AI.

Your task is to find 3 to 5 **publicly listed Indian companies** that are **direct business competitors** or operate in the **same sector or product category** as the company given below.

Strict rules:
- Only include companies listed on NSE or BSE.
- Do NOT include conglomerates or unrelated businesses.
- Focus on **product overlap**, **customer segment**, or **business model similarity**.
- Do NOT include indices (like NIFTY), generic codes (like 500112), or unrelated firms.
- Do NOT repeat the target company itself.

Return only peer names or ticker symbols as a comma-separated list, no extra text.

Example  
Input Company: TITAN  
Output: TBZ, Kalyan Jewellers, PC Jeweller, Senco Gold

Input Company: {ticker}
"""

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_prompt)
    ]

    response = llm(messages)
    peers_text = response.content.strip()
    return [peer.strip() for peer in peers_text.split(",") if peer.strip()]


def build_summary_input(pl_df, cf_df, bs_df, sh_df) -> str:
    def _get_row(df, regex):
        match = df[df["Line Item"].str.contains(regex, case=False, na=False)]
        return match.iloc[0] if not match.empty else pd.Series(["N/A"] * len(df.columns), index=df.columns)

    def _get_latest(df, regex):
        row = _get_row(df, regex)
        return row.iloc[-1] if isinstance(row, pd.Series) else "N/A"

    def _to_num(x):
        try:
            return float(str(x).split()[0].replace(",", ""))
        except:
            return float("nan")

    total_assets = _get_latest(bs_df, r"Total Assets")
    borrowings = _get_latest(bs_df, r"Borrowings")
    cash_equiv = _get_latest(bs_df, r"Cash")
    curr_assets = _get_latest(bs_df, r"Current Assets")
    curr_liab = _get_latest(bs_df, r"Current Liabilities")
    current_ratio = round(_to_num(curr_assets) / _to_num(curr_liab), 2) if pd.notna(_to_num(curr_assets)) and pd.notna(_to_num(curr_liab)) else "N/A"

    try:
        promoter_row = sh_df[sh_df.columns[0]].str.lower().str.contains("promoter")
        promoter_holding = sh_df.loc[promoter_row].iloc[0, -1] if promoter_row.any() else "N/A"
    except:
        promoter_holding = "N/A"

    try:
        fii_row = sh_df[sh_df.columns[0]].str.lower().str.contains("fii")
        fii_holding = sh_df.loc[fii_row].iloc[0, -1] if fii_row.any() else "N/A"
    except:
        fii_holding = "N/A"

    sales_row = _get_row(pl_df, r"Sales")
    net_profit_row = _get_row(pl_df, r"Net Profit")
    cfo_row = _get_row(cf_df, r"Cash from Operating|Cash Flow from Ops")

    def last_n_years(row, n=5):
        try:
            return row[-n:].tolist()
        except:
            return ["N/A"] * n

    return f"""
📈 Profit & Loss (last 5 years):
- Sales: {last_n_years(sales_row)}
- Net Profit: {last_n_years(net_profit_row)}

💸 Cash Flow (last 5 years):
- CFO: {last_n_years(cfo_row)}
- Net Profit vs CFO Divergence: check if pattern diverges

🧮 Balance Sheet (latest year only):
- Total Assets: {total_assets}
- Borrowings: {borrowings}
- Cash & Equivalents: {cash_equiv}
- Current Ratio (CA / CL): {current_ratio}

🧾 Shareholding Pattern (latest):
- Promoter Holding: {promoter_holding}
- FII Holding: {fii_holding}
"""


def get_all_financial_data(ticker: str):
    return {
        "ticker": ticker,
        "pnl": get_profit_loss_df(ticker),
        "cashflow": get_cashflow_df(ticker),
        "balance_sheet": get_balance_sheet_df(ticker),
        "shareholding": get_shareholding_pattern(ticker),
    }


def summarize(data):
    return build_summary_input(
        data["pnl"], data["cashflow"], data["balance_sheet"], data["shareholding"]
    )


def run_peer_comparison(ticker: str):
    target_data = get_all_financial_data(ticker)
    peer_tickers = get_peer_companies_via_gpt_lc(ticker)

    peer_data_list = []
    skipped_peers = []

    for peer in peer_tickers:
        try:
            print(f"🔄 Fetching data for peer: {peer}")
            peer_data = get_all_financial_data(peer)

            # ✅ Ensure all required DataFrames are present and non-empty
            if not all([
                isinstance(peer_data["pnl"], pd.DataFrame) and not peer_data["pnl"].empty,
                isinstance(peer_data["cashflow"], pd.DataFrame) and not peer_data["cashflow"].empty,
                isinstance(peer_data["balance_sheet"], pd.DataFrame) and not peer_data["balance_sheet"].empty,
                isinstance(peer_data["shareholding"], pd.DataFrame) and not peer_data["shareholding"].empty,
            ]):
                raise ValueError("One or more financial tables are missing or empty.")

            # ✅ Ensure 'Line Item' column exists in pnl, cashflow, balance sheet
            for key in ["pnl", "cashflow", "balance_sheet"]:
                if "Line Item" not in peer_data[key].columns:
                    raise KeyError(f"'{key}' missing 'Line Item' column")

            # ✅ Validate summarization does not raise
            _ = build_summary_input(
                peer_data["pnl"],
                peer_data["cashflow"],
                peer_data["balance_sheet"],
                peer_data["shareholding"]
            )

            peer_data["ticker"] = peer
            peer_data_list.append(peer_data)

        except Exception as e:
            print(f"❌ Skipping {peer} due to error: {e}")
            skipped_peers.append(peer)

    if skipped_peers:
        print(f"⚠️ Skipped peers: {', '.join(skipped_peers)}")

    if not peer_data_list:
        return "❌ No valid peer data could be fetched."

    peer_summaries = "\n\n".join([
        f"{p['ticker']}:\n{build_summary_input(p['pnl'], p['cashflow'], p['balance_sheet'], p['shareholding'])}"
        for p in peer_data_list
    ])

    comparison_prompt = f"""
You are a financial comparison analyst AI.

Compare the financial health and metrics of the target company with its peers.

Target: {ticker}
Peers: {', '.join([p['ticker'] for p in peer_data_list])}

🔹 Target Company Financials:
{build_summary_input(target_data["pnl"], target_data["cashflow"], target_data["balance_sheet"], target_data["shareholding"])}

🔸 Peer Company Financials:
{peer_summaries}

Give:
1. 📊 Financial metric comparison table (Revenue, Net Profit, Debt, CFO, etc)
2. ✅ Top 1–2 strengths and weaknesses of the target
3. 🔍 Relative positioning vs each peer
4. 🏆 Final verdict: Best positioned peer
"""

    response = llm([
        SystemMessage(content="You are a financial comparison analyst AI."),
        HumanMessage(content=comparison_prompt)
    ])

    return response.content



# Example usage:
# print(run_peer_comparison("ITC"))



In [47]:
run_peer_comparison("TITAN")

📥 Fetching Profit & Loss data for TITAN...
✅ Profit & Loss data fetched successfully.
📥 Fetching Cash Flow data for TITAN...
✅ Cash Flow data fetched successfully.
📥 Fetching Balance Sheet data for TITAN...
✅ Balance Sheet data fetched successfully.
📥 Fetching Shareholding Pattern data for TITAN...
✅ Shareholding data fetched successfully.
🔄 Fetching data for peer: TBZ
📥 Fetching Profit & Loss data for TBZ...
✅ Profit & Loss data fetched successfully.
📥 Fetching Cash Flow data for TBZ...
✅ Cash Flow data fetched successfully.
📥 Fetching Balance Sheet data for TBZ...
✅ Balance Sheet data fetched successfully.
📥 Fetching Shareholding Pattern data for TBZ...
✅ Shareholding data fetched successfully.
🔄 Fetching data for peer: Kalyan Jewellers
📥 Fetching Profit & Loss data for Kalyan Jewellers...
❌ Error fetching P&L: 'NoneType' object has no attribute 'find'
📥 Fetching Cash Flow data for Kalyan Jewellers...
❌ Error fetching Cash Flow: 'NoneType' object has no attribute 'find'
📥 Fetching Ba

KeyError: 'Line Item'