Download all 

In [None]:
import requests
from dotenv import load_dotenv
import os
from sec_api import QueryApi
import time
import json
from datetime import datetime
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
API_KEY = os.getenv("API_KEY")

# queryApi = QueryApi(api_key=API_KEY)
print(API_KEY)

def download_filings_by_strategy(ticker, queryApi, num_10k=5, other_forms_limit=10):
    """
    Downloads SEC filings for a given ticker based on a strategic approach.

    1. Fetches the 'num_10k' most recent 10-K filings to establish a date range.
    2. Fetches ALL 10-Q and 8-K filings within that date range.
    3. Fetches the 'other_forms_limit' most recent DEF 14A, 3, 4, and 5 filings.
    4. Downloads the HTML version of each filing and saves it.
    5. Saves a consolidated JSON file with metadata for all downloaded filings.

    Args:
        ticker (str): The company ticker symbol (e.g., "AAPL").
        queryApi (QueryApi): An initialized instance of the sec_api QueryApi.
        num_10k (int): The number of recent 10-K filings to base the date range on.
        other_forms_limit (int): The max number of recent filings for less critical forms.
    """
    # --- Setup ---
    save_dir = f"{ticker}_sec_filings"
    os.makedirs(save_dir, exist_ok=True)
    headers = {
        "User-Agent": "Gautam Kumar gautam.baranwal2003@gmail.com"
    }
    all_metadata = []
    processed_accession_nos = set()
    count = 0

    # --- Helper function to process and save a single filing ---
    def process_and_save_filing(filing):
        """Inner function to handle downloading and metadata extraction for one filing."""
        nonlocal count
        accession_no = filing.get("accessionNo")
        if not accession_no or accession_no in processed_accession_nos:
            return False

        url = filing.get("linkToFilingDetails")
        if not url:
            print(f"Warning: No URL found for filing {accession_no}. Skipping.")
            return False

        try:
            ticker_from_filing = filing.get("ticker", "UNKNOWN")
            form_type = filing.get("formType", "UNKNOWN").replace("/", "-")
            filed_at = filing.get("filedAt", "")[:10]
            
            filename = f"{ticker_from_filing}_{form_type}_{filed_at}_{accession_no.replace('-', '')}.html"
            filepath = os.path.join(save_dir, filename)

            r = requests.get(url, headers=headers, timeout=20)
            r.raise_for_status()
            with open(filepath, "wb") as f:
                f.write(r.content)
            
            print(f"Saved: {filename}")

            metadata_to_save = {
                "filename": filename,
                "ticker": ticker_from_filing,
                "form_type": form_type,
                "filed_at": filed_at,
                "period_of_report": filing.get("periodOfReport"),
                "items": filing.get("items", []),
                "url": url
            }
            all_metadata.append(metadata_to_save)
            processed_accession_nos.add(accession_no)
            count += 1
            return True
        except Exception as e:
            print(f"Failed to download or process {url}: {e}")
            return False

    # Fetch 10-K filings to establish a date range ---
    print(f"\n--- Step 1: Fetching last {num_10k} 10-K filings for {ticker} ---")
    search_params_10k = {
        "query": f'ticker:{ticker} AND formType:"10-K"',
        "from": "0", "size": str(num_10k), "sort": [{"filedAt": {"order": "desc"}}]
    }
    response_10k = queryApi.get_filings(search_params_10k)
    filings_10k = response_10k.get("filings", [])

    if not filings_10k:
        print(f"Could not find any 10-K filings for {ticker}. Aborting.")
        return

    for f in filings_10k:
        process_and_save_filing(f)
    
    ten_k_dates = [f.get("filedAt") for f in filings_10k]
    start_date = min(ten_k_dates)[:10]
    end_date = max(ten_k_dates)[:10]
    print(f"Date range established: {start_date} to {end_date}")

    # Fetch critical filings (10-Q, 8-K) within the date range ---
    print(f"\n--- Step 2: Fetching ALL 10-Q and 8-K filings from {start_date} to {end_date} ---")
    critical_forms = ["10-Q", "8-K"]
    form_type_query_part = " OR ".join([f'formType:"{t}"' for t in critical_forms])
    
    offset = 0
    batch_size = 200
    while True:
        search_params_critical = {
            "query": f"ticker:{ticker} AND ({form_type_query_part}) AND filedAt:[{start_date} TO {end_date}]",
            "from": str(offset), "size": str(batch_size), "sort": [{"filedAt": {"order": "desc"}}]
        }
        response_critical = queryApi.get_filings(search_params_critical)
        filings_critical = response_critical.get("filings", [])
        if not filings_critical:
            break
        for f in filings_critical:
            process_and_save_filing(f)
        offset += len(filings_critical)
        time.sleep(1)

    # Fetch a limited number of other filings (DEF 14A, 3, 4, 5) ---
    print(f"\n--- Step 3: Fetching RECENT (max {other_forms_limit}) other filings ---")
    limited_forms = ["DEF 14A", "3", "4", "5"]
    for form_type in limited_forms:
        print(f"Fetching recent {form_type} filings...")
        search_params_limited = {
            "query": f"ticker:{ticker} AND formType:\"{form_type}\" AND filedAt:[{start_date} TO {end_date}]",
            "from": "0", "size": str(other_forms_limit), "sort": [{"filedAt": {"order": "desc"}}]
        }
        response_limited = queryApi.get_filings(search_params_limited)
        filings_limited = response_limited.get("filings", [])
        for f in filings_limited:
            process_and_save_filing(f)
        time.sleep(1)

    # Save all collected metadata ---
    print("\n--- Step 4: Saving all collected metadata ---")
    metadata_filepath = os.path.join(save_dir, f"{ticker}_metadata.json")
    with open(metadata_filepath, "w") as f:
        json.dump(all_metadata, f, indent=4)
    
    print(f"Metadata for {len(all_metadata)} filings saved to {metadata_filepath}")
    print(f"\nTotal filings downloaded for {ticker}: {count}")


# Execution block 
if __name__ == "__main__":
    load_dotenv()
    API_KEY = os.getenv("API_KEY")

    if not API_KEY:
        raise ValueError("API_KEY not found. Please create a .env file and add your API_KEY from sec-api.io.")
        
    queryApi = QueryApi(api_key=API_KEY)
    
    tickers = [
        "AAPL", "MSFT", "JPM", "JNJ", "XOM",
        "GOOG", "AMZN", "TSLA", "UNH", "V", 
        "PG", "HD", "CVX", "MRK", "BAC"
    ]
    
    for ticker in tickers:
        try:
            print(f"==================================================")
            print(f"Starting download process for ticker: {ticker}")
            print(f"==================================================")
            download_filings_by_strategy(ticker, queryApi, num_10k=5, other_forms_limit=10)
            print(f"\nSuccessfully completed download for {ticker}.")
        except Exception as e:
            print(f"An error occurred while processing {ticker}: {e}")
        
        time.sleep(2)

### Create vector store using Gemini 

In [None]:
import os
import json
import re
import shutil
from bs4 import BeautifulSoup, NavigableString
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import logging

# Configure logging for better visibility
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def convert_html_table_to_json(table_soup: BeautifulSoup) -> str:
    """
    Converts a complex HTML table with rowspan and colspan into a structured JSON string.
    This version is updated to be more robust against malformed tables.
    """
    rows = table_soup.find_all('tr')
    
    max_cols = 0
    for row in rows:
        max_cols = max(max_cols, len(row.find_all(['th', 'td'])))
    
    # Create a virtual grid with a safety buffer
    grid = [[None for _ in range(max_cols + 20)] for _ in range(len(rows))]

    # Populate the grid, accounting for rowspan and colspan
    for r, row in enumerate(rows):
        cells = row.find_all(['th', 'td'])
        c = 0
        for cell in cells:
            while c < len(grid[r]) and grid[r][c] is not None:
                c += 1
            
            if c >= len(grid[r]):
                continue # Skip cell if row is malformed and we're out of bounds
            
            rowspan = int(cell.get('rowspan', 1))
            colspan = int(cell.get('colspan', 1))
            
            cell_text = cell.get_text(strip=True)
            
            for i in range(rowspan):
                for j in range(colspan):
                    # --- FIX: Add boundary checks for rowspan/colspan ---
                    if r + i < len(grid) and c + j < len(grid[r + i]):
                        grid[r + i][c + j] = cell_text
                    # ----------------------------------------------------
            c += colspan

    # Find the header row (first row with significant content)
    header = []
    header_row_index = -1
    for r, row_data in enumerate(grid):
        if any(cell and isinstance(cell, str) for cell in row_data):
            # Clean up header by removing empty trailing columns
            last_content_col = -1
            for c, cell in enumerate(row_data):
                if cell is not None:
                    last_content_col = c
            header = [cell if cell is not None else '' for cell in row_data[:last_content_col+1]]
            header_row_index = r
            break

    if not header:
        return "" # return if there is no header. 

    # Grid -> Dictionarise 
    json_data = []
    for r in range(header_row_index + 1, len(grid)):
        row_data = grid[r]
        if any(cell is not None for cell in row_data):
            row_dict = {header[c]: cell for c, cell in enumerate(row_data) if c < len(header)}
            if any(row_dict.values()):
                json_data.append(row_dict)

    return json.dumps(json_data, indent=2) if json_data else ""


def extract_text_and_tables(html: str) -> str:
    """
    Cleans HTML, converts tables to JSON, retains surrounding context for tables,
    and returns a single combined text for processing.
    """
    soup = BeautifulSoup(html, "html.parser")

    # 1. Process and replace all tables
    for table in soup.find_all('table'):
        # a. Retain surrounding context from the preceding paragraph
        context_text = ""
        prev_p = table.find_previous_sibling()
        if prev_p and prev_p.name == 'p':
            context_text = prev_p.get_text(strip=True)

        # b. Capture table caption (if available)
        caption = table.find('caption')
        caption_text = caption.get_text(strip=True) if caption else ""

        # c. Capture nearest preceding heading (if available)
        heading = table.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
        heading_text = heading.get_text(strip=True) if heading else ""

        # d. Convert table to JSON
        json_table = convert_html_table_to_json(table)

        # e. Create a combined block with all context
        combined_block = f"\n\n--- TABLE START ---\n"
        if heading_text:
            combined_block += f"Heading: {heading_text}\n"
        if caption_text:
            combined_block += f"Caption: {caption_text}\n"
        if context_text:
            combined_block += f"Context: {context_text}\n"
        if json_table:
            combined_block += f"Table Data (JSON):\n{json_table}\n"
        combined_block += "--- TABLE END ---\n\n"
        
        # f. Replace the original table with the combined block
        table.replace_with(BeautifulSoup(combined_block, "html.parser"))

    # 2. Clean remaining non-content tags
    for tag in soup(["script", "style", "header", "footer", "nav"]):
        tag.decompose()
    for tag in soup(re.compile(r".*:[a-zA-Z]")):
        tag.decompose()
    
    # 3. Extract all text, which now includes the contextualized JSON tables
    text = soup.get_text(separator="\n", strip=True)
    text = re.sub(r'\n\s*\n', '\n\n', text)
    text = re.sub(r'\s{2,}', ' ', text)
    return text

def extract_sections(text: str) -> list[tuple[str, str]]:
    """Extracts (section_title, section_content) tuples from full filing text."""
    pattern = re.compile(
        r"(?i)(Item\s\d{1,2}[A-Z]?(?:[.:–\-]?\s?.*?))(?=\nItem\s\d{1,2}[A-Z]?(?:[.:–\-]|\s)|\Z)", 
        re.DOTALL
    )
    matches = pattern.findall(text)
    results = []
    for match in matches:
        split_point = match.find("\n")
        title = match[:split_point].strip() if split_point != -1 else match.strip()
        content = match[split_point:].strip() if split_point != -1 else ""
        title = re.sub(r"\s+", " ", title)
        if content:
            results.append((title, content))
    return results

def process_company_filings(ticker: str, sector: str, base_folder: str = ".") -> list[Document]:
    """Processes all downloaded HTML filings for a given company and returns a list of Document chunks."""
    folder_path = os.path.join(base_folder, f"{ticker}_sec_filings")
    metadata_path = os.path.join(folder_path, f"{ticker}_metadata.json")

    if not os.path.exists(folder_path) or not os.path.exists(metadata_path):
        logging.warning(f"Folder or metadata file not found for ticker {ticker}. Skipping.")
        return []

    with open(metadata_path, "r", encoding="utf-8") as f:
        metadata_list = json.load(f)
    metadata_lookup = {item['filename']: item for item in metadata_list}

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    company_chunks = []

    for filename, file_metadata in metadata_lookup.items():
        file_path = os.path.join(folder_path, filename)
        if not os.path.exists(file_path):
            logging.warning(f"Missing file, skipping: {filename}")
            continue

        try:
            with open(file_path, "r", encoding="utf-8") as f:
                html = f.read()

            text = extract_text_and_tables(html)
            print("Text -> ", text)
            sections = extract_sections(text)

            if text and sections:
                first_item_start = text.find(sections[0][0])
                if first_item_start > 0:
                    preamble_content = text[:first_item_start].strip()
                    if preamble_content:
                        sections.insert(0, ("Preamble", preamble_content))
            elif text and not sections:
                sections = [("Full Document", text)]

            for title, content in sections:
                if not content.strip():
                    continue
                
                metadata = file_metadata.copy()
                metadata["section_full"] = title 
                metadata["sector"] = sector
                metadata["contains_table"] = "--- TABLE START ---" in content
                
                try:
                    metadata["year"] = int(file_metadata["filed_at"][:4])
                except (ValueError, TypeError):
                    metadata["year"] = 0
                
                simple_section_match = re.match(r"(?i)Item\s\d{1,2}[A-Z]?", title)
                metadata["section_simple"] = simple_section_match.group(0).upper().replace(" ", "") if simple_section_match else "OTHER"
                
                chunks = text_splitter.create_documents([content], metadatas=[metadata])
                company_chunks.extend(chunks)

            logging.info(f"✅ Processed {filename}")

        except Exception as e:
            logging.error(f"❌ Error processing {filename}: {e}", exc_info=True)

    return company_chunks

def filter_complex_metadata(metadata: dict) -> dict:
    """Filters metadata to ensure all values are simple types that ChromaDB can handle."""
    filtered_metadata = {}
    for key, value in metadata.items():
        if isinstance(value, (str, int, float, bool)):
            filtered_metadata[key] = value
        elif isinstance(value, list):
            filtered_metadata[key] = ", ".join(map(str, value))
    return filtered_metadata

if __name__ == "__main__":
    
    TICKER_TO_SECTOR = {
        "AAPL": "Technology", "MSFT": "Technology", "NVDA": "Technology",
        "AMZN": "Consumer Discretionary", "TSLA": "Consumer Discretionary",
        "JPM": "Financial Services", "V": "Financial Services",
        "JNJ": "Healthcare", "PFE": "Healthcare",
        "BA": "Industrials", "CAT": "Industrials", "UPS": "Industrials",
        "XOM": "Energy", "GOOG": "Technology", "UNH": "Healthcare",
        "PG": "Consumer Staples", "HD": "Consumer Discretionary",
        "CVX": "Energy", "MRK": "Healthcare", "BAC": "Financial Services"
    }
    tickers = list(TICKER_TO_SECTOR.keys())
    
    all_final_chunks = []
    for ticker in tickers:
        logging.info(f"--- Processing ticker: {ticker} ---")
        sector = TICKER_TO_SECTOR.get(ticker, "Unknown")
        ticker_chunks = process_company_filings(ticker, sector, base_folder=".")
        all_final_chunks.extend(ticker_chunks)
        logging.info(f"--- Finished {ticker}. Chunks so far: {len(all_final_chunks)} ---")

    cleaned_chunks = [
        Document(page_content=doc.page_content, metadata=filter_complex_metadata(doc.metadata))
        for doc in all_final_chunks
    ]

    logging.info(f"📦 Total chunks created for all companies: {len(cleaned_chunks)}")

    if cleaned_chunks:
        logging.info("--- Creating and saving ChromaDB vector store ---")
        if not os.getenv("GOOGLE_API_KEY"):
            logging.warning("⚠️ GOOGLE_API_KEY environment variable not set.")
        else:
            try:
                embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
                persist_directory = "sec_filings_db"
                if os.path.exists(persist_directory):
                    shutil.rmtree(persist_directory)
                vector_store = Chroma.from_documents(
                    documents=cleaned_chunks,
                    embedding=embedding_model,
                    persist_directory=persist_directory
                )
                logging.info(f"✅ ChromaDB vector store created and saved to '{persist_directory}'")
            except Exception as e:
                logging.error(f"❌ Failed to create vector store: {e}", exc_info=True)

### QA Agent 

#### Provide User Question here.

In [1]:
user_question = "How do companies describe competitive advantages? What themes emerge?"


#### Run the below file, The final answer will get printed. 

In [None]:
import os
import json
import re
import time
from typing import List, Dict, Any
import numpy as np
import logging
import traceback

# LangChain and other core libraries
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

# Set up logging for better visibility
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")


# --- 1. The Query Parser: Using an LLM to Extract Metadata ---

class QueryMetadata(BaseModel):
    """Metadata extracted from a user's financial query."""
    ticker: List[str] = Field(description="List of stock tickers mentioned, e.g., ['AAPL', 'MSFT']")
    sector: List[str] = Field(description="List of industry sectors mentioned, e.g., ['Technology', 'Healthcare']")
    form_type: List[str] = Field(description="List of SEC form types mentioned, e.g., ['10-K', '10-Q']")
    year: List[int] = Field(description="List of four-digit years mentioned, e.g., [2023]")
    search_query: str = Field(description="The core semantic question to be used for the vector search.")

SEC_SECTIONS = {
    "Item 1. Business": (
        "Provides a general overview of the company's business, including its main products and services, "
        "business model, key markets, acquisitions, competitive positioning, and strategic direction."
    ),
    "Item 1A. Risk Factors": (
        "Describes the most significant risks that could adversely affect the company's business operations, "
        "financial condition, or stock price, helping investors assess potential uncertainties."
    ),
    "Item 2. Properties": (
        "Describes the company's physical assets and properties, including those gained through acquisitions "
        "or business combinations."
    ),
    "Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations (MD&A)": (
        "Offers management’s perspective on financial performance, including discussion of revenue, expenses, "
        "liquidity, capital resources, acquisitions, business combinations, restructuring efforts, and overall strategy."
    ),
    "Item 8. Financial Statements and Supplementary Data": (
        "Contains the company’s audited financial statements, including the balance sheet, income statement, "
        "cash flow statement, and notes, which may include details of mergers, acquisitions, and goodwill."
    ),
    "Item 11. Executive Compensation": (
        "Details compensation for the company’s executives, including salaries, bonuses, stock awards, and incentive plans."
    ),
    "8-K Filing (Material Events)": (
        "Reports unscheduled material events or corporate changes, such as mergers and acquisitions, executive departures, "
        "earnings announcements, and other strategic developments. Often includes strategic rationale and financial impact."
    ),
}

# --- NEW: Mapping keywords to specific form types for intelligent filtering ---
FORM_TYPE_KEYWORDS = {
    "insider trading": ["3", "4", "5"],
    "executive compensation": ["DEF 14A"],
    "revenue guidance": ["8-K", "10-Q"],
    "financial guidance": ["8-K", "10-Q"],
    "earnings release": ["8-K"],
    "material event": ["8-K"],
    "corporate event": ["8-K"],
    "acquisition": ["8-K"],
    "merger": ["8-K"]
}

class QueryParser:
    """
    An intelligent parser that uses an LLM to extract structured metadata
    from a natural language query and routes it to the most relevant section.
    """
    def __init__(self, llm, embedding_model):
        self.llm = llm
        self.embedding_model = embedding_model
        
        self.pydantic_parser = PydanticOutputParser(pydantic_object=QueryMetadata)
        self.prompt = PromptTemplate(
            template="""
            You are an expert at parsing financial questions. Analyze the user's query and extract the required information into the specified JSON format.
            User Query: {query}
            Format Instructions: {format_instructions}
            """,
            input_variables=["query"],
            partial_variables={"format_instructions": self.pydantic_parser.get_format_instructions()},
        )
        self.chain = self.prompt | self.llm | self.pydantic_parser

        self.section_descriptions = list(SEC_SECTIONS.values())
        self.section_names = list(SEC_SECTIONS.keys())
        self.section_embeddings = self.embedding_model.embed_documents(self.section_descriptions)

    def _find_best_section(self, query: str) -> str | None:
        if not query.strip(): return None
        query_embedding = self.embedding_model.embed_query(query)
        similarities = [np.dot(query_embedding, sec_emb) / (np.linalg.norm(query_embedding) * np.linalg.norm(sec_emb)) for sec_emb in self.section_embeddings]
        best_index = np.argmax(similarities)
        if similarities[best_index] > 0.75: 
            return self.section_names[best_index]
        return None

    def build_filter(self, query: str) -> (Dict[str, Any], str):
        """Parses a query and builds a ChromaDB filter."""
        parsed_metadata: QueryMetadata = self.chain.invoke({"query": query})
        
        conditions = []
        if parsed_metadata.ticker: conditions.append({"ticker": {"$in": parsed_metadata.ticker}})
        if parsed_metadata.sector: conditions.append({"sector": {"$in": parsed_metadata.sector}})
        
        # --- Intelligent Form Type Selection ---
        target_forms = set()
        if parsed_metadata.form_type:
            target_forms.update(parsed_metadata.form_type)
        else:
            found_keyword = False
            for keyword, forms in FORM_TYPE_KEYWORDS.items():
                if keyword in parsed_metadata.search_query.lower():
                    target_forms.update(forms)
                    found_keyword = True
            if not found_keyword:
                target_forms.add("10-K")
        
        conditions.append({"form_type": {"$in": list(target_forms)}})
        # ---------------------------------------------

        if parsed_metadata.year:
            conditions.append({"year": {"$in": parsed_metadata.year}})
        
        # --- Form-Aware Section Routing ---
        # Only search for "Item" sections if we are looking at a 10-K or 10-Q
        if any(form in target_forms for form in ["10-K", "10-Q"]):
            best_section = self._find_best_section(parsed_metadata.search_query)
            if best_section:
                simple_section_match = re.match(r"(?i)Item\s\d{1,2}[A-Z]?", best_section)
                if simple_section_match:
                    simple_section_tag = simple_section_match.group(0).upper().replace(" ", "")
                    conditions.append({"section_simple": simple_section_tag})
        # ----------------------------------
            
        filter_dict = {"$and": conditions} if len(conditions) > 1 else (conditions[0] if conditions else {})
        return filter_dict, parsed_metadata.search_query


# --- 2. The Main QA Application ---

def pretty_print_result(result: dict):
    """Prints the QA result in a readable format."""
    print("\n✅ Final Answer:")
    print(result["result"])
    print("\n" + "---" * 10)
    print("📚 Source Documents Used:")
    
    printed_sources = set()
    for source in result["source_documents"]:
        source_id = (
            source.metadata.get('filename'), 
            source.metadata.get('section_full') 
        )
        if source_id not in printed_sources:
            print(f"  - File: {source.metadata.get('filename')}\n    Section: {source.metadata.get('section_full', 'N/A')}")
            printed_sources.add(source_id)

if __name__ == "__main__":
    if not os.getenv("GOOGLE_API_KEY"):
        print("⚠️ GOOGLE_API_KEY environment variable not set.")
    else:
        # 1. Load components
        embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        persist_directory = "sec_filings_db"
        
        if not os.path.exists(persist_directory):
            print(f"❌ Error: Vector store not found at '{persist_directory}'. Please run the processing script first.")
        else:
            vector_store = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)
            llm = GoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0.1)
            
            # 2. Initialize the intelligent query parser
            parser = QueryParser(llm, embedding_model)

            # 3. Define the user's question
            user_question = "How do companies describe competitive advantages? What themes emerge?"
            
            
            ## If question is not computable. 
            if not user_question or not isinstance(user_question, str):
                raise ValueError("User question must be a non-empty string")
            
            # 4. Use the parser to build the filter and get the core query
            try:
                metadata_filter, search_query = parser.build_filter(user_question)
                
                print(f"❓ User Question: {user_question}")
                print(f"🔍 Search Query: {search_query}")
                print(f"⚙️ Generated Filter: {json.dumps(metadata_filter, indent=2)}")

                retriever = vector_store.as_retriever(
					search_kwargs={"k": 8, "filter": metadata_filter},
					search_type="mmr"  
				)
                retrieved_docs = retriever.get_relevant_documents(search_query)
                if not retrieved_docs:
                    print(" No relevant documents found for the query.")
                # 6. Use the simple and fast "stuff" chain with a custom prompt
                prompt_template = """
                You are a senior financial analyst. Your task is to synthesize a high-level answer to the user's question based on the provided context from multiple SEC filings.

                **Instructions:**
                1.  Read all the provided context carefully. Each document is from a different source.
                2.  Identify the main themes, patterns, and key differences that emerge across the documents.
                3.  Structure your answer clearly. Start with a high-level summary, then provide a bulleted list of the common themes.
                4.  Do not just list the information from each source. Your value is in the synthesis and comparison.
                5.  If the context is insufficient to answer the question, state that clearly.

                **Context from documents:**
                {context}

                **User's Question:**
                {question}

                **Final Synthesized Answer:**
                """
                
                QA_CHAIN_PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

                qa_chain = RetrievalQA.from_chain_type(
                    llm=llm,
                    chain_type="stuff",
                    retriever=retriever,
                    return_source_documents=True,
                    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
                )
                
                result = qa_chain.invoke({"query": search_query})
                
                pretty_print_result(result)
            except ValueError as ve:
                print(f"❌ Invalid input: {ve}")
            except RuntimeError as re:
                print(f"❌ Runtime error during processing: {re}")
            except Exception as e:
            	print(f"❌ Unexpected error: {e}", traceback.format_exc())

# Some example of Answers and Questions appended below..


## Questions and Answers

### Question1 : What are the primary revenue drivers for major technology companies, and how have they evolved?
⚙️ Generated Base Filter: {
  "$and": [
    {
      "sector": {
        "$in": [
          "Technology"
        ]
      }
    },
    {
      "section_simple": "ITEM7"
    }
  ]
}
🧠 Decomposed into 1 sub-queries: ['What are the primary revenue drivers for major technology companies, and how have they evolved?']

Sub-querying for: 'What are the primary revenue drivers for major technology companies, and how have they evolved?'...
Done.


✅ Final Synthesized Answer:
The provided summaries offer a limited view, focusing on a single technology company's revenue streams rather than a broad overview of major tech companies.  While the example highlights the importance of seasonal spending (corporate year-end and holiday seasons), multi-year contracts, and online advertising, this is not representative of all major tech companies.

To provide a more comprehensive answer, we must consider the diverse revenue models across the tech landscape.  Major technology companies' primary revenue drivers can be broadly categorized as follows, and their evolution is a dynamic process:

**1. Software and Software-as-a-Service (SaaS):** This is a dominant revenue driver for many companies, encompassing subscription fees for cloud services, enterprise software, productivity tools, and various software applications.  The evolution here involves a shift from perpetual licenses to subscription models, recurring revenue streams, and increasing integration of AI and machine learning capabilities.

**2. Hardware Sales:**  While less dominant than software for some giants, hardware remains crucial for others. This includes personal computers, smartphones, servers, networking equipment, and wearables.  Evolution in this area involves miniaturization, increased processing power, improved connectivity (5G, etc.), and the integration of software and services into hardware offerings.

**3. Online Advertising:** This is a massive revenue source for companies like Google and Meta.  The evolution here is marked by increasing sophistication in targeting, the rise of programmatic advertising, the expansion into new platforms (e.g., connected TVs), and the ongoing challenges related to data privacy and regulation.

**4. Cloud Computing Services:**  This is a rapidly growing sector, with companies like Amazon (AWS), Microsoft (Azure), and Google Cloud providing infrastructure, platform, and software services.  Evolution involves expanding service offerings, increasing geographic reach, and competition for market share.

**5. E-commerce:**  For companies like Amazon, e-commerce is a primary revenue driver.  Evolution involves expanding product categories, improving logistics and delivery, and leveraging data analytics for personalized recommendations and targeted marketing.

**6. Intellectual Property Licensing:**  Some tech companies generate significant revenue through licensing their patents and technologies to other businesses.  The evolution here is tied to the pace of technological innovation and the strategic management of intellectual property portfolios.


In conclusion, while seasonal spending and online advertising are important for *some* tech companies, the primary revenue drivers are far more diverse and dynamic.  The evolution of these drivers is shaped by technological advancements, changing consumer behavior, competitive pressures, and regulatory landscapes.  A holistic understanding requires considering the specific business models and strategies of individual companies within the broader technological ecosystem.



## Question 2:  Compare R&D spending trends across companies. What insights about innovation investment strategies?

✅ Final Answer:
The original analysis highlighted a single company's increasing R&D spending as a percentage of net sales, while lacking crucial financial context.  The subsequent analysis of Company X provided a more nuanced view, showing relatively stable R&D spending as a percentage of sales (17.7% in both 2022 and 2023) despite a 6.7% year-over-year absolute increase, driven by growth in its "Innovative Medicine" and "MedTech" segments.  This highlighted the importance of considering both absolute and relative metrics.

The *new context*, however, introduces data for a third company (let's call it Company Y) which allows for a broader comparison of R&D investment strategies. Company Y shows a different trend:  R&D expenses increased by $3.9 billion from 2023 to 2024, representing a significant absolute increase. However, the *percentage* of revenue dedicated to R&D *decreased* from 15% to 14%. This decrease, despite the substantial absolute increase, suggests a different strategic approach compared to both the original company and Company X.

A breakdown of Company Y's increased R&D spending reveals that the rise is primarily due to increased employee compensation ($1.5B, largely driven by stock-based compensation), depreciation ($1.4B), and third-party services ($698M).  These factors suggest potential investments in talent acquisition, capital expenditures (reflected in depreciation), and external collaborations.  The offsetting reduction in office space optimization charges ($640M) indicates cost-cutting measures in other areas.

Comparing the three companies reveals diverse strategies:

* **Original Company:**  Aggressive absolute R&D growth (details limited, but potentially unsustainable without further financial data).
* **Company X:**  Balanced approach, maintaining a consistent percentage of sales dedicated to R&D while strategically allocating resources across segments ("Innovative Medicine" and "MedTech").
* **Company Y:**  Significant absolute R&D growth, but a decreasing percentage of revenue allocated to R&D, suggesting potential prioritization of efficiency and cost management alongside expansion.  The high proportion of increased spending on employee compensation and depreciation suggests a focus on talent and capital investment.

To draw more definitive conclusions, a more comprehensive analysis is needed. This should include:

* **Multi-year data:**  Analyzing trends over several years for all three companies would provide a clearer picture of long-term strategies.
* **Profitability by segment:**  Understanding the profitability of each R&D segment for all companies is crucial for evaluating the return on investment.
* **Capital expenditures:**  Detailed capital expenditure data related to R&D for all companies would provide insights into long-term investments.
* **Debt levels:**  Assessing the role of debt financing in funding R&D for each company.
* **Industry benchmarks:**  Comparing R&D spending to industry averages within relevant segments for each company would provide crucial context for evaluating competitiveness.
* **Sales and Marketing data:** Comparing sales and marketing spend to R&D spend for each company would provide a more holistic view of overall investment strategies.  The provided sales and marketing data for Company Y is insufficient for meaningful comparison without further details.


Only with this more complete data can we accurately assess the long-term viability and effectiveness of each company's innovation investment strategy.  The current analysis reveals distinct approaches, but lacks the depth to definitively rank their effectiveness or sustainability.

## Question 3:  Identify significant working capital changes for financial services companies and driving factors.
⚙️ Generated Base Filter: {
  "$and": [
    {
      "sector": {
        "$in": [
          "Financial Services"
        ]
      }
    },
    {
      "section_simple": "ITEM7"
    }
  ]
}
🧠 Decomposed into 1 sub-queries: ['Identify significant working capital changes for financial services companies and driving factors']

Sub-querying for: 'Identify significant working capital changes for financial services companies and driving factors'...
Done.


✅ Final Synthesized Answer:
The provided information is insufficient to answer the question about significant working capital changes in financial services companies and their driving factors.  To provide a comprehensive answer, detailed financial statements (balance sheets, income statements, and cash flow statements) across multiple periods for several financial services companies would be required.  Analysis would then need to focus on changes in current assets (like cash and cash equivalents, marketable securities, receivables, and other current assets) and current liabilities (like accounts payable, short-term debt, and accrued expenses).

Significant changes in working capital for financial services firms can stem from several factors, including:

* **Changes in interest rates:**  Fluctuations in interest rates directly impact the value of securities held and the cost of borrowing, leading to changes in both current assets and liabilities.  Rising rates can increase the value of securities but also increase borrowing costs, impacting net working capital.

* **Economic cycles:** During economic expansions, loan demand increases, leading to higher receivables (loans outstanding). Conversely, during recessions, loan defaults may rise, impacting the quality of receivables and potentially increasing loan loss provisions (a current liability).

* **Regulatory changes:** New regulations can impact capital requirements, reserve ratios, and liquidity standards, forcing firms to adjust their working capital levels to comply.

* **Technological advancements:**  Increased use of fintech and automation can impact operational efficiency, potentially reducing costs associated with processing transactions and managing accounts, thus affecting working capital.

* **Mergers and acquisitions:**  Acquisitions can significantly alter a company's asset and liability structure, leading to substantial changes in working capital.

* **Changes in customer behavior:** Shifts in customer preferences and usage patterns (e.g., increased use of digital banking) can impact the volume and type of transactions, affecting the level of cash and cash equivalents and other current assets.


Without access to specific financial data, a precise identification of significant working capital changes and their driving factors for financial services companies is impossible.  The analysis requires a quantitative approach examining specific financial statements over time and comparing them to industry benchmarks and macroeconomic indicators.


## Question 4: What are the most commonly cited risk factors across industries? How do same-sector companies prioritize differently?
#
What are the most commonly cited risk factors across industries? How do same-sector companies prioritize differently?
⚙️ Generated Base Filter: {
  "section_simple": "ITEM1A"
}
🧠 Decomposed into 1 sub-queries: ['What are the most commonly cited risk factors across industries and how do same-sector companies prioritize them differently?']

Sub-querying for: 'What are the most commonly cited risk factors across industries and how do same-sector companies prioritize them differently?'...
C:\Users\gauta\AppData\Local\Temp\ipykernel_3036\3723195594.py:219: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  logging.info(f"Parsed sub-query metadata: {parsed_sub_query.dict()}")
2025-08-05 22:54:02,372 - INFO - Parsed sub-query metadata: {'ticker': [], 'sector': [], 'form_type': [], 'year': [], 'search_query': 'What are the most commonly cited risk factors across industries and how do companies within the same sector prioritize them differently?'}
2025-08-05 22:54:02,372 - INFO - Final filter: {'section_simple': 'ITEM1A'}
Done.


✅ Final Synthesized Answer:
The question of the most commonly cited risk factors across industries and how same-sector companies prioritize them differently is complex and lacks a single, universally applicable answer.  While specific regulatory risks like anti-corruption, anti-money laundering, anti-terrorism, and sanctions are consistently relevant, they represent only a subset of the broader risk landscape.  A comprehensive analysis requires considering numerous factors and their interplay.

The most commonly cited risk factors generally fall under several broad categories:

* **Financial Risks:** These include credit risk, liquidity risk, market risk (interest rate, currency, and equity price fluctuations), and operational risk (internal failures impacting financial performance).  The specific weight given to each varies drastically by industry and company size and strategy.  For example, a highly leveraged financial institution will prioritize liquidity risk far more than a capital-intensive manufacturing firm.

* **Operational Risks:**  These encompass disruptions to supply chains, production failures, cybersecurity breaches, and human capital risks (talent acquisition and retention).  Companies in industries with complex supply chains (e.g., automotive, electronics) will naturally prioritize supply chain resilience, while tech companies will focus heavily on cybersecurity.

* **Strategic Risks:** These involve competitive pressures, technological disruption, changes in consumer preferences, and regulatory changes (beyond the specific examples mentioned earlier).  Startups will prioritize competitive threats and market acceptance, while established firms might focus on adapting to technological shifts.

* **Compliance and Legal Risks:**  This category includes regulatory compliance (broader than the anti-corruption examples), litigation, and intellectual property infringement.  Pharmaceutical companies, for instance, face significantly higher legal and regulatory risks than many other sectors.

* **Environmental, Social, and Governance (ESG) Risks:**  Growing in importance, these encompass climate change, environmental regulations, social responsibility concerns, and corporate governance issues.  Companies in energy, manufacturing, and consumer goods are increasingly scrutinized for their ESG performance.


**Differentiation in Prioritization within the Same Sector:**

Even within the same industry, companies prioritize risks differently based on:

* **Business Model:** A company focused on innovation will prioritize different risks than one focused on cost leadership.
* **Risk Appetite:**  Some companies are more willing to accept higher levels of risk to pursue growth opportunities.
* **Financial Strength:**  Financially robust companies may be better positioned to absorb certain risks.
* **Management Philosophy:**  Different management teams have varying risk tolerances and priorities.
* **Geographic Location:**  Risks vary significantly by region due to political, economic, and regulatory differences.


In conclusion, while some risks are universally relevant, the specific prioritization is highly context-dependent.  A thorough risk assessment requires a deep understanding of the company's specific circumstances, industry dynamics, and competitive landscape.  Simply listing common risk categories is insufficient; a nuanced analysis is crucial for effective risk management.  Access to company-specific information, such as Form 10-K filings, is essential for a detailed understanding of individual company risk profiles.


# Question 5 How do companies describe climate-related risks? Notable industry differences?

✅ Final Synthesized Answer:
Based on the provided summaries, a comprehensive answer to the original question – "How do companies describe climate-related risks? Notable industry differences?" – remains incomplete due to significant data gaps.  However, we can draw some partial conclusions:

**Technology Companies:**  Technology companies, at least some, identify climate-related risks in their disclosures, though the specific location within their filings (e.g., 10-K's "Risk Factors" section) is not consistently confirmed.  They acknowledge *physical risks* such as increased energy costs for cooling and powering hardware due to rising temperatures.  *Transition risks* are also recognized, stemming from potential changes in environmental regulations, energy supply, and resource availability impacting operational costs and the availability of necessary goods and services.  However, the extent of their disclosures, methods of quantifying financial impacts, and specific risk management strategies remain unknown.

**Financial Services Companies:**  Financial services companies identify both *physical risks* (e.g., increased frequency and severity of weather events impacting asset values, insurance costs, and business operations) and *transition risks* (e.g., asset devaluation due to policy changes, technology shifts, and evolving consumer preferences).  The potential impacts are described, but the methods used to quantify these financial impacts and the disclosed risk management strategies are not detailed in the provided summaries.

**Healthcare Companies:** The provided information offers no insight into how healthcare companies describe climate-related risks, quantify their financial impact, or implement risk management strategies.  Further research is needed.


**Notable Industry Differences (Partial):**  The available data suggests a potential difference in the *types* of climate-related risks emphasized.  While both technology and financial services companies acknowledge physical and transition risks, the specific manifestations of these risks differ significantly.  Financial services companies, for example, are more directly exposed to the financial consequences of physical events (e.g., property damage, insurance claims) and the broader economic shifts driven by climate change.  Technology companies' concerns appear more focused on direct operational impacts (energy costs, supply chain disruptions).  However, this is a preliminary observation based on limited data.  A more thorough analysis across a wider range of companies within each sector is necessary to confirm these differences and identify others.

**Overall Conclusion:**  The provided summaries highlight a significant lack of readily available information on how companies, particularly in the healthcare sector, describe and manage climate-related risks.  The information available suggests that the disclosure and quantification of climate-related risks are not standardized across industries, and further research is crucial to develop a complete understanding of the topic.


## Question 6: Analyze recent executive compensation changes. What trends emerge?
✅ Final Answer:

The provided text offers insufficient information to analyze trends in recent executive compensation changes.  All excerpts state that details regarding executive compensation are included in the company's proxy statements for various years (2021, 2022, 2023, 2024, and 2025), but the actual compensation data itself is not provided.  Therefore, no trend analysis can be performed.  The excerpts only reveal the consistent practice of referencing executive compensation details within the annual proxy statements.


While we cannot analyze trends, we can note the following common themes across the provided excerpts:

* **Consistent Reporting Location:** Executive compensation information is consistently reported in the company's annual proxy statement.
* **Detailed Breakdown:** The information is typically broken down into sections covering Director Compensation, Executive Compensation, and details on the Compensation Committee.
* **Incorporation by Reference:**  All excerpts incorporate the relevant information by reference to the proxy statements, rather than providing the data directly within the filings.
* **Potential Exclusion of Pay-Versus-Performance Data:** Some excerpts specifically note that certain information related to pay versus performance (Item 402(v) of Regulation S-K) may be excluded from the incorporation by reference.

To analyze trends in executive compensation, access to the actual proxy statements referenced in these filings is required.


## Question 9: Identify recent M&A activity. What strategic rationale do companies provide?
🔍 Search Query: Identify recent mergers and acquisitions and their strategic rationale
⚙️ Generated Filter: {
  "form_type": {
    "$in": [
      "10-K",
      "8-K"
    ]
  }
}

✅ Final Answer:
The provided text offers insufficient information to identify recent mergers and acquisitions or their strategic rationale.  While the repeated phrase "We must successfully manage ongoing acquisition, joint venture and divestiture activities" highlights the company's active involvement in M&A activity,  no specific details regarding recent transactions are given.  The mention of "Results from Operations and Financial Condition" suggests that such information *might* be found within a full financial report, but is absent from this excerpt.

Therefore, a synthesized answer regarding specific mergers and acquisitions and their strategic rationale cannot be provided based on the limited context.




## Question 10: How do companies describe competitive advantages? What themes emerge?


✅ Final Answer:
The provided SEC filings offer limited insight into specific companies' competitive advantages.  The excerpts focus primarily on the dynamic and evolving nature of the competitive landscape, rather than detailing individual companies' strategies.  The documents highlight the uncertainty inherent in predicting future success due to varying business models, cost structures, market segments, and the constant shifts in the competitive environment.

Common themes emerging from the limited information are:

* **Diverse Business Models:** Competitors employ different business models, making direct comparison and identification of a single "best" approach difficult.
* **Varying Cost Structures:**  Cost structures differ significantly across competitors, impacting profitability and competitiveness.
* **Differentiated Market Segments:** Companies operate in different market segments, limiting direct head-to-head comparisons of competitive advantages.
* **Dynamic Competitive Landscape:** The competitive landscape is constantly changing, making it challenging to identify sustainable competitive advantages.


In short, the filings emphasize the fluidity of the competitive environment and the difficulty in pinpointing specific, enduring competitive advantages based on the information provided.  More detailed financial statements and business descriptions would be needed to provide a comprehensive answer to the user's question.


# Extra Questions

## Question: Compare Apple’s and Microsoft’s risks
✅ Final Answer:
The new context regarding Microsoft's significant investments in R&D and new technologies, particularly in AI, necessitates a revision of the risk assessment, primarily impacting the **Technological Risk** and **Financial Risk** categories for both companies, and adding a new category: **Innovation Risk**.

**Market Risk:** Apple's vulnerability to economic downturns remains amplified by the risk of inventory write-downs and purchase commitment cancellations, as previously discussed.  Microsoft's market risk is less directly tied to consumer spending fluctuations, given its diversified revenue streams from enterprise software and cloud services. However, a prolonged economic downturn could still impact enterprise spending on Microsoft's products and services.

**Competitive Risk:**  The competitive landscape remains intense for both companies.  Microsoft's significant investments in AI, however, represent a potential competitive advantage, particularly in the cloud computing and enterprise software markets.  Apple's potential price reductions to clear excess inventory could further intensify competition.

**Technological Risk:** This section requires significant revision.  Both Apple and Microsoft face the risk of technological obsolescence. However, Microsoft's substantial R&D investments, particularly in AI, represent both an opportunity and a risk.  Success in AI could solidify its market leadership, but failure to deliver on these investments could result in significant financial losses and a loss of competitive edge.  Apple's technological risk remains focused on maintaining its innovation in hardware and software, and the potential disruption from emerging technologies.

**Regulatory Risk:** Remains largely unchanged. Both companies face significant regulatory scrutiny globally.

**Financial Risk:**  Apple's financial risk remains significantly elevated due to inventory write-down and purchase commitment cancellation risks.  A detailed analysis of Apple's inventory turnover, days sales outstanding, and the value of its purchase commitments is crucial.  Microsoft's financial risk is also impacted by its substantial R&D investments.  The success or failure of these investments, particularly in AI, will significantly influence its future financial performance.  Comparing the return on investment (ROI) of these investments against Apple's profitability will provide a more comprehensive comparison of their financial resilience.

**Geopolitical Risk:** Remains largely unchanged. Both companies operate globally and are susceptible to geopolitical risks.

**Innovation Risk (New Category):** This category assesses the risk associated with the failure of new product development and investments in emerging technologies.  Microsoft's significant investments in AI represent a substantial innovation risk.  Failure to successfully integrate AI into its products or to develop successful AI platform services could lead to significant financial losses and a loss of competitive advantage. Apple also faces innovation risk, though perhaps less dramatically given its current market position, related to its ability to continue to innovate in hardware and software.

**Summary:** The new context highlights a significant innovation risk for Microsoft, stemming from its substantial investments in AI.  While Apple's financial risk remains elevated due to inventory and commitment issues, Microsoft's risk profile is now more nuanced, balancing the potential for significant gains from AI innovation against the risk of substantial losses if these investments fail to yield expected returns. A comprehensive risk assessment must now include a detailed analysis of both companies' R&D spending, the potential ROI of their respective investments, and the potential impact of technological disruption on their future profitability.  The comparison should also consider the different nature of their risks: Apple's is more focused on near-term market fluctuations and inventory management, while Microsoft's is more focused on long-term technological bets.

## Question: How microsoft is getting revenue from AI research?
✅ Final Answer:
Microsoft's revenue generation from AI research is not explicitly detailed in the provided SEC filings.  The documents highlight significant investment in AI research across various segments (Microsoft Research, Azure AI, Microsoft AI, etc.), emphasizing its role in future innovation and product development.  However, there is no direct link made between specific research projects and resulting revenue streams.

The filings reveal the following common themes regarding Microsoft's AI strategy:

* **Strategic Investment in AI Research:** Microsoft heavily invests in fundamental and applied AI research, both at the corporate and segment levels, collaborating with universities and leveraging this research for future product development.
* **AI Integration Across Products and Services:**  AI capabilities are being infused into existing and new products across various segments, including Microsoft 365, Windows, Bing, Xbox, and Azure.  This suggests that AI research is a key driver of innovation and product enhancement, indirectly contributing to revenue.
* **Azure AI as a Competitive Advantage:** Azure's AI-optimized infrastructure and services are positioned as a key differentiator, allowing customers to build and deploy AI applications, generating revenue through cloud services.
* **Focus on Generative AI:**  Microsoft is actively developing and integrating generative AI capabilities into its consumer and commercial offerings, indicating a strong focus on this area for future revenue generation.


In conclusion, while the provided text showcases Microsoft's substantial commitment to AI research and its integration into various revenue-generating products and services, it lacks specific details on how revenue is *directly* generated *from* the research itself.  The revenue is likely derived from the *products and services* enhanced or created *using* the research, rather than the research itself being a direct revenue source.



# The model is failing, If the LLM doesn’t infer specific companies, the filter may be empty, leading to broad or irrelevant results.

# Queries: 
### 1.) Apple's 2022 risks 
### ✅ Final Answer:
Apple's 2022 10-K filing highlights several key risk factors that could materially adversely affect the company's business, results of operations, and financial condition.  The overarching theme is the vulnerability of Apple's performance to both macroeconomic and microeconomic factors, many of which are interconnected and difficult to predict.

Key risk factors identified include:

* **Economic Volatility and Supply Chain Disruptions:** Global and regional economic downturns, coupled with supply chain fragility (including component shortages and price increases), pose significant threats to Apple's production and sales.  The COVID-19 pandemic serves as a recent example of how these factors can severely impact operations.

* **Foreign Currency Exchange Rate Fluctuations:**  Apple's substantial international operations expose it to significant risk from changes in currency exchange rates, impacting both sales and the cost of goods sold.

* **Tax Risks:**  Ongoing tax examinations by various authorities create uncertainty regarding Apple's ultimate tax liability.  Increases in effective tax rates, particularly in the U.S. or Ireland, could negatively affect financial performance.

* **Dependence on Third-Party Developers:** Apple's success is partially reliant on the continued development and maintenance of third-party software applications for its products.  A decline in third-party support could negatively impact consumer demand.

* **Stock Price Volatility:**  The inherent volatility of Apple's stock price presents a risk to investors.


It is important to note that these risks are interconnected and can exacerbate each other. For example, economic downturns can lead to reduced consumer demand, while supply chain disruptions can further constrain production and sales.  The company's past performance is not necessarily indicative of future results due to the influence of these dynamic and unpredictable factors.

### 2.) Apple's 2022 performance 
### ✅ Final Answer:
Apple's 2022 financial performance showed overall growth, driven primarily by strong sales in several key areas, despite challenges from global factors.  Total net sales increased by 8% or $28.5 billion compared to 2021.  However, this growth was partially offset by unfavorable currency exchange rates.

Key themes from Apple's 2022 performance include:

* **Strong iPhone Sales:**  New iPhone models released in Q4 2021 significantly boosted iPhone net sales throughout 2022. This positive trend was observed across various geographic segments, including the Americas and Europe.

* **Growth in Services and Wearables:**  The Services segment experienced increased net sales due to higher revenue from advertising, cloud services, and the App Store.  Similarly, Wearables, Home, and Accessories sales increased, primarily driven by strong performance of Apple Watch and AirPods.

* **Mac Sales Growth:**  Laptop sales fueled an increase in Mac net sales for the year.

* **iPad Sales Decline:**  Conversely, iPad net sales decreased in 2022, mainly due to lower sales of the iPad Pro.

* **Impact of COVID-19:** The ongoing pandemic continued to impact Apple's supply chain, causing disruptions from outsourcing partners and component suppliers.  The pandemic also created volatility in global financial markets.

* **Significant Capital Return:** Apple significantly increased its share repurchase program and raised its quarterly dividend.  This demonstrates a commitment to returning value to shareholders.

* **Increased Operating Expenses:**  Growth in research and development and selling, general, and administrative expenses was primarily driven by increased headcount, advertising, and engineering program costs.


While the provided data shows strong performance in several key areas,  a complete picture requires access to the full financial statements (specifically the tables mentioned as missing from this excerpt) to fully assess profitability and other key financial metrics.

### 3.) Executive compensation of Apple. 

✅ Final Answer:
Executive compensation details for Apple are not directly provided in the given text.  The provided excerpts from SEC filings repeatedly state that information regarding executive compensation will be found in Apple's annual proxy statements, filed within 120 days of a specific date in September of each year (2020, 2021, etc.).  These proxy statements are incorporated by reference, meaning the information is considered part of the filing but not explicitly included in these excerpts.  Therefore, a synthesized answer regarding specific compensation figures or details is impossible.


However, common themes across the filings that *would* be found in the proxy statements (based on the references) include:

* **Board Oversight of Risk Management:**  The proxy statements will detail the board's role in overseeing risk management, which is relevant to executive compensation decisions as it impacts the company's overall performance and thus executive pay.
* **Compensation Committee Interlocks and Insider Participation:**  The proxy statements will disclose any potential conflicts of interest related to the compensation committee's composition and the involvement of insiders in compensation decisions.
* **Compensation of Directors:**  Details on the compensation packages for Apple's directors will be included.
* **Executive Compensation:**  The core information requested by the user – the details of executive compensation – will be fully described in the referenced proxy statements.  This would likely include salary, bonuses, stock options, and other benefits.

To obtain the actual executive compensation data, one must consult Apple's proxy statements for the relevant years (2021, 2022, 2023, 2024, and 2025).

### 4.) What were the primary drivers of revenue for Microsoft in their latest annual report?

✅ Final Answer:
Based on the provided data snippets from Microsoft's SEC filings,  a definitive answer regarding the *primary* revenue drivers in their latest annual report is impossible. The excerpts focus on revenue growth comparisons between various product and service categories, not absolute revenue figures.  Therefore, we cannot definitively rank revenue drivers by their contribution to total revenue.

However, the data consistently highlights the following as significant revenue contributors:

* **Microsoft 365 Commercial subscriptions:** This consistently appears as a key area, encompassing various services like Microsoft Teams, Exchange, SharePoint, and security offerings.  The growth of this segment is repeatedly compared to other areas.

* **Cloud Services (Azure):**  Azure and other cloud services are frequently mentioned as a major area of revenue growth, often compared to other product lines.

* **Windows Commercial products and cloud services:** This category, including volume licensing and cloud services, is another recurring theme, showing its importance in Microsoft's revenue stream.

* **Devices (Surface, Xbox):** Revenue from Surface devices and Xbox content and services are also highlighted as important revenue streams, though their relative importance compared to cloud services and Microsoft 365 is unclear from the provided data.

* **LinkedIn:**  LinkedIn's various revenue streams (Talent Solutions, Marketing Solutions, etc.) are also mentioned as a significant contributor.

* **Dynamics 365:** This cloud-based business applications suite is another area of notable revenue generation.


In summary, while the exact ranking of revenue drivers cannot be determined from the limited data, the above categories consistently appear as major contributors to Microsoft's revenue.  More comprehensive financial statements are needed to definitively answer the question of primary revenue drivers.
