In [1]:
from pydantic import BaseModel,Field
from enum import Enum
from typing import List,Dict
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from edgar import *
from langchain_core.messages import HumanMessage,SystemMessage
from langchain_core.tools import tool,StructuredTool
import logging
model = ChatGroq(
    model='openai/gpt-oss-120b'
)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
class TenQSECSection(str, Enum):
    FINANCIAL_STATEMENTS = "1"     
    MDA = "2"                    
    MARKET_RISK = "3"             
    CONTROLS_PROCEDURES = "4"      
    LEGAL_PROCEEDINGS = "2-1"        
    RISK_FACTORS = "2-1A"            
    UNREGISTERED_SECURITIES = "2-2"  
    DEFAULTS = "2-3"                 
    MINE_SAFETY = "2-4"             
    OTHER_INFORMATION = "2-5"        
    EXHIBITS = "2-6"    
class TenKSECSection(str, Enum):
    BUSINESS = "1"             
    RISK_FACTORS = "1A"        
    UNRESOLVED_STAFF_COMMENTS = "1B"
    PROPERTIES = "2"           
    LEGAL_PROCEEDINGS = "3" 
    MINE_SAFETY = "4"
    MARKET_FOR_EQUITY = "5"     
    RESERVED_6 = "6"          
    MDA = "7"                  
    MARKET_RISK = "7A"         
    ACCOUNTING_DISPUTES = "9"   
    INTERNAL_CONTROLS = "9A"    
    OTHER_INFO = "9B"          
    FOREIGN_AUDIT = "9C"      
    DIRECTORS_GOVERNANCE = "10" 
    EXECUTIVE_COMP = "11"     
    SECURITY_OWNERSHIP = "12"  
    CERTAIN_RELATIONSHIPS = "13"
    AUDIT_FEES = "14"         
    EXHIBITS = "15"

class TenQFilingToolState(BaseModel):
    query : str
    tickers : List[str] = Field(...,description='List of targetted tickers.')
    years : List[str] = Field(...,description="List of years e.g.['2024','2023']")
    sections : List[TenQSECSection] = Field(...,
                                            description="""
'1'  : Unaudited financial statements and notes for the quarter.
'2'  : Management explanation of quarterly performance and changes.
'3'  : Exposure to market risks (interest rates, FX, commodities).
'4'  : Disclosure controls and internal control procedures.

'2-1'  : Material legal proceedings during the quarter.
'2-1A: Updated risk factors (only if there are material changes).
'2-2'  : Unregistered securities sales and share repurchases.
'2-3'  : Defaults on senior securities (if any).
'2-4'  : Mine safety disclosures (if applicable).
'2-5'  : Other material information not previously reported.
'2-6'  : Exhibits, certifications, and attachments.""")


class TenKFilingToolState(BaseModel):
    query : str
    tickers : List[str] = Field(...,description='List of targetted tickers.')
    years : List[str] = Field(...,description="List of years e.g.['2024','2023']")
    sections : List[TenKSECSection] = Field(...,
                                            description="""
    '1' : Company operations, products, services, markets, strategy.
    '1A' : Major risks affecting business, finances, or operations.
    '2' : Physical assets like offices, plants, warehouses.
    '3' : Major lawsuits or regulatory cases.
    '4' : Mining-related safety reporting (if applicable).
    '5' : Stock info, dividends, shareholders.
    '6 : Historical financial summary.
    '7' : Management explanation of financial performance and outlook.
    '7A' : Interest rate, currency, commodity risk exposure.
    '9' : Accounting disputes (if any).
    '9A' : Internal controls over financial reporting.
    '9B' : Miscellaneous disclosures.
    '9C' : Disclosure about foreign audit inspections.
    '10' : Leadership and governance structure.
    '11' : Salary, bonuses, stock compensation.
    '12' : Major shareholders and insider ownership.
    '13' : Conflicts of interest or insider dealings.
    '14' : Audit and consulting fees.
    '15' : Contracts, certifications, supporting filings.
                                            """) 
def get10KFiling(state : TenKFilingToolState):
    """
    Fetches all the required sections of 10K of a company
    """
    tickers = state.get('tickers',[])
    years = state.get('years',[])
    query = state.get('query'," ")
    sections = state.get('sections',[])
    

    set_identity("arxishhh@gmail.com")

    proofs = []

    for ticker in tickers:
            
        try :
            company = Company(ticker)
            filings = company.get_filings(form='10-K')

            if not filings:
                continue

            targetted_filings = [
                f for f in filings
                if any(year in str(f.filing_date) for year in years)
            ]

            for f in targetted_filings:
                try :
                    filing_obj = f.obj()
                    if not filing_obj :
                        continue

                    for section in sections:

                        item_key = f"Item {section}"
                        content = obj[item_key]

                        proofs.append(
                            {
                                'ticker' : ticker,
                                'time' : str(f.filing_date),
                                'source' : '10-K',
                                'section':section,
                                'content': content
                            })
                    
                except Exception as e:
                    logging.warning(f"Error fetching file for {f.accession_no}: {e}")
                    continue
            
        except Exception as e:
            logging.error(f"Failed to fetch data for {ticker} : {e}")
            continue     
def getQFiling(state : TenQFilingToolState):
    """Fetches all the required sections of 10Q of a company"""

    tickers = state.get('tickers',[])
    years = state.get('years',[])
    query = state.get('query'," ")
    sections = state.get('sections',[])
    

    set_identity("arxishhh@gmail.com")

    proofs = []

    for ticker in tickers:
            
        try :
            company = Company(ticker)
            filings = company.get_filings(form='10-Q')

            if not filings:
                continue

            targetted_filings = [
                f for f in filings
                if any(year in str(f.filing_date) for year in years)
            ]

            for f in targetted_filings:
                try :
                    filing_obj = f.obj()
                    if not filing_obj :
                        continue

                    for section in sections:

                        if '-' in section:
                            _,num = section.split('-')
                            item_key = f'Part II, Item {num}'

                        else :
                            item_key = f'Part I, Item {section}'
                            
                        content = filing_obj[item_key]

                        proofs.append(
                            {
                                'ticker' : ticker,
                                'time' : str(f.filing_date),
                                'source' : '10-Q',
                                'section':section,
                                'content': content
                            })
                    
                except Exception as e:
                    logging.warning(f"Error fetching file for {f.accession_no}: {e}")
                    continue
            
        except Exception as e:
            logging.error(f"Failed to fetch data for {ticker} : {e}")
            continue
class Ticker(BaseModel):
    company : List[str]

@tool 
def getBalanceSheet(ticker : str,year : str):
    """Return Balance sheet for the company and the year"""
    return {
        'content':f'Got Balance Sheet of {ticker} for the year {year}'
    }


def tickerTOOL(state : Ticker):
    'Finds the tickers of the given companies'
    return 'AAPL'



In [13]:
tickertool = StructuredTool.from_function(
    name='TickerResolver',
    func = tickerTOOL,
    args_schema=Ticker,
    description='Finds the tickers of the given companies'
)
tenktool = StructuredTool.from_function(
    name='TenKFilingTool',
    func=get10KFiling,
    description='Fetches all the required sections of 10K of a company',
    args_schema=TenKFilingToolState
)
tenqtool = StructuredTool.from_function(
    name='TenQFilingTool',
    func=getQFiling,
    description='Fetches all the required sections of 10Q of a company for all the quarters of the following years',
    args_schema=TenQFilingToolState
)

In [None]:
tools = [tenktool,tenqtool,getBalanceSheet,tickerTOOL]
from langchain_core.prompts import PromptTemplate


prompt = PromptTemplate(
    
    template = """
YOU ARE AN FINANCER AGENT THAT COLLECTS FINANCE SHEET EVIDENCE.

YOU WORK ITERATIVELY.

ON EACH STEP YOU MUST DECIDE:
1. WHAT EVIDENCE IS MISSING
2. WHICH VALUE OF FINANCE SHEET TO CHOOSE
3. WHICH TOOL TO CALL NEXT
4. WHICH YEARS STILL NEED COVERAGE

YOU ARE BUILDING A PROOF SET.

DO NOT TRY TO SOLVE THE USER QUERY.

AFTER COMPLETING THE PROOF SET RETURN DONE (AS A MESSAGE) WHICH ENDS THE ITERATION.

----------------------------------

COLLECTION REQUIREMENTS

Ticker must exist.
All requested years must be covered.
You can choose any of these three to answer the query
- Cash Flow Statement
- Income Statement
- Balance Sheet

If any requirement is missing → CALL A TOOL.
If all are satisfied → RETURN DONE.

----------------------------------

YEAR HANDLING

If a range is mentioned, expand it.

Example:
2023–2025 → ["2023","2024","2025"]

Never collapse to one year.

----------------------------------

QUERY REFORMULATION

Always pass a well defined query which can be used for semantic searching.

----------------------------------

STOP CONDITION (MANDATORY CHECK)

Before responding, verify the following checklist:

Ticker identified: YES/NO (COMPULSORY)
Cash Flow Proofs Collected : YES/NO (OPTIONAL)
Income Statement Proofs Collected: YES/NO (OPTIONAL)
Balance Sheet Proofs Collected: YES/NO (OPTIONAL)
All years covered: YES/NO (COMPULSORY)

IF ANY ITEM IS "NO":
DO WE NEED IT?
YES : CALL A TOOL.
NO : MOVE FORWARD.

IF ALL ITEMS ARE "YES":
RETURN EXACTLY:
DONE (AS A MESSAGE)

DO NOT CALL ANY TOOL AFTER RETURNING DONE.
DO NOT PROVIDE EXPLANATIONS.
MAKE THE ITERATIONS AS LESS AS POSSIBLE CALL MULTIPLE TOOLS AT THE SAME TIME BUT FOLLOW THE GIVEN PATH MENTIONED (IF APPLICABLE)

----------------------------------

QUERY:
{query}

COLLECTED PROOFS: THESE ARE THE PROOFS WHICH GOT COLLECTED BY THE USER.
PROOFS ARE THE HOLY GRAIL NO NEED TO VERIFY THEM.
{proofs}
CURRENT YEAR JAN 2026

""",
input_variables=['query','proofs']


)

In [15]:
p = prompt.invoke({
    'query': "Describe Apple’s products and services from the latest year of all quarters.",
    'proofs':""})
llm_with_tools = model.bind_tools(tools)
response = llm_with_tools.invoke(p)

In [16]:
response

AIMessage(content='', additional_kwargs={'reasoning_content': 'We need to follow the auditor agent steps.\n\nWe have query: "Describe Apple’s products and services from the latest year of all quarters."\n\nWe need to collect filing evidence. Requirements: Ticker must exist. All requested years must be covered. Both filings must be used: 10K and 10Q (use only if asked explicitly). The query asks about "latest year of all quarters". That suggests we need 10Q for the latest year (2025? current year Jan 2026, latest year completed would be 2025). Also need 10K for the latest year (2025). The query is about products and services, which is in 10K Item 1 (business) and possibly 10Q Item 2 (management discussion). The requirement says both filings must be used: 10K and 10Q (use only if asked explicitly). The query didn\'t explicitly ask for 10Q, but "latest year of all quarters" implies need 10Q. So we need both.\n\nWe need to identify ticker for Apple: AAPL.\n\nYears needed: latest year: 2025

In [17]:
response.tool_calls

[{'name': 'tickerTOOL',
  'args': {'state': {'company': ['Apple']}},
  'id': 'fc_d1789e13-c578-475d-ae2a-e9bc30bc842a',
  'type': 'tool_call'}]

In [8]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

prompt2 = PromptTemplate(
    template = """You are query refiner and your work is to refine the query.
    You are refining this query for a proof collector whose work is to collect the proof in order to solve the query.
    Refine it in such a way that it makes it easier to collect the proof.
    DO NOT TELL THE COLLECTOR WHAT TO DO NOT THE SOURCE. IN CASE OF YEARS DO NOT USE MONTHS.
    IN CASE OF RANGE OF YEARS MENTION EVERY YEAR.
    NO PREAMBLE
    ###Query###
    {query}
    """,
    input_variables=['query']
)
chain = prompt2 | model | parser
chain.invoke({'query': 'Did Apple and Microsoft report any new lawsuits in 2023 to 2025?'})

'Identify any lawsuits reported by Apple in 2023, 2024, and 2025, and any lawsuits reported by Microsoft in 2023, 2024, and 2025.'

In [22]:
import yfinance as yf
res = yf.Search('microsoft',max_results=1).quotes
res[0]['symbol']

'MSFT'

In [2]:
import yfinance as yf
res = yf.Ticker('AAPL')