# 📈 Captide API: Net Income to Adjusted EBITDA Reconciliation

This notebook demonstrates how to use the Captide API to fetch financial documents for public companies and extract reconciliation data from Net Income to Adjusted EBITDA using LLM-driven parsing.

**Note:** You must set your API key in a `.env` file (not committed to GitHub) or set it as an environment variable.

In [10]:
# 📦 Install required packages
!pip install requests pandas python-dotenv

/bin/bash: pip: command not found


In [11]:
# 🔐 Load API key from .env or environment variable
import os
from IPython.display import display
from dotenv import load_dotenv

load_dotenv()
CAPTIDE_API_KEY = os.getenv("CAPTIDE_API_KEY")

HEADERS = {
    "X-API-Key": CAPTIDE_API_KEY,
    "Content-Type": "application/json",
    "Accept": "application/json"
}

TICKERS = ["SNAP", "PLTR", "UBER"]

In [12]:
# 🛠️ Helper functions for document filtering and API parsing
import re, json, requests, pandas as pd
from typing import Dict, List

def is_valid_fiscal_period(fp: str) -> bool:
    m = re.match(r"Q([1-4]) (\d{4})", fp)
    return bool(m and int(m.group(2)) > 2022)

def is_valid_document(doc: Dict) -> bool:
    if doc["sourceType"] == "8-K":
        return "2.02" in doc.get("additionalKwargs", {}).get("item", "")
    return True

def fetch_documents(ticker: str) -> List[Dict]:
    url = f"https://rest-api.captide.co/api/v1/companies/ticker/{ticker}/documents"
    docs = requests.get(url, headers=HEADERS, timeout=60).json()
    return [
        {"ticker": doc["ticker"],
         "fiscalPeriod": doc["fiscalPeriod"],
         "sourceLink": doc["sourceLink"]}
        for doc in docs
        if doc["sourceType"] == "8-K"
        and "fiscalPeriod" in doc
        and is_valid_fiscal_period(doc["fiscalPeriod"])
        and is_valid_document(doc)
    ]

In [13]:
# 📊 SSE Response Parsing and Prompt Generation
def parse_sse_response(sse_text: str) -> Dict:
    try:
        lines = [l[6:] for l in sse_text.splitlines() if l.startswith("data: ")]
        for l in lines:
            obj = json.loads(l)
            if obj.get("type") == "full_answer":
                content = re.sub(r"\s*\[#\w+\]", "", obj["content"])
                m = re.search(r"\{.*\}", content, re.DOTALL)
                return json.loads(m.group(0)) if m else {}
    except Exception:
        pass
    return {}

def fetch_metrics_with_prompt(source_links: List[str], prompt: str) -> Dict:
    payload = {"query": prompt, "sourceLink": source_links}
    r = requests.post(
        "https://rest-api.captide.co/api/v1/rag/agent-query-stream",
        json=payload, headers=HEADERS, timeout=120
    )
    return parse_sse_response(r.text)

In [14]:
# 🔁 Prompt building and reconciliation merging logic
BASE_PROMPT = (
    "Return a single valid JSON object with double-quoted keys and numeric values (in thousands of dollars). The object "
    "must represent the reconciliation from Net Income to Adjusted EBITDA, including all reported line items. Use "
    "positive values for additions to Net Income and negative values for subtractions. Do not include words like 'add' "
    "or 'less' in the keys. Output only the JSON object—no commentary or extra text."
)

def build_prompt(prev_keys: List[str]) -> str:
    if not prev_keys:
        return BASE_PROMPT
    joined = ", ".join(f'"{k}"' for k in prev_keys)
    return (
        BASE_PROMPT +
        f" Use the following keys in this order if they appear: [{joined}]."
        " If the document contains additional reconciliation line items, insert "
        "them at the correct position relative to the list above."
    )

def merge_key_lists(master: list[str], this_quarter: list[str]) -> list[str]:
    for i, k in enumerate(this_quarter):
        if k in master:
            continue
        insert_pos = None
        for j in range(i - 1, -1, -1):
            prev_key = this_quarter[j]
            if prev_key in master:
                insert_pos = master.index(prev_key) + 1
                break
        if insert_pos is None:
            for j in range(i + 1, len(this_quarter)):
                nxt_key = this_quarter[j]
                if nxt_key in master:
                    insert_pos = master.index(nxt_key)
                    break
        if insert_pos is None:
            insert_pos = len(master)
        master.insert(insert_pos, k)
    return master

In [15]:
# 🧠 Execute the API logic for each ticker
def fiscal_sort_key(fp: str) -> tuple[int, int]:
    m = re.match(r"Q([1-4]) (\d{4})", fp)
    if not m:
        return (9999, 9)
    q, yr = int(m.group(1)), int(m.group(2))
    return (yr, q)

def run_one_ticker(ticker: str) -> Dict[str, Dict[str, float]]:
    docs = fetch_documents(ticker)
    docs.sort(key=lambda d: fiscal_sort_key(d["fiscalPeriod"]))

    key_order: List[str] = []
    results: Dict[str, Dict[str, float]] = {}

    for doc in docs:
        prompt = build_prompt(key_order)
        data = fetch_metrics_with_prompt([doc["sourceLink"]], prompt)
        if not data:
            continue
        results[doc["fiscalPeriod"]] = data
        key_order = merge_key_lists(key_order, list(data.keys()))

    return {"keys": key_order, "data": results}

In [16]:
# 🚀 Run the notebook for selected tickers

from concurrent.futures import ThreadPoolExecutor, as_completed

per_ticker_output = {}
with ThreadPoolExecutor(max_workers=len(TICKERS)) as pool:
    futures = {pool.submit(run_one_ticker, t): t for t in TICKERS}
    for fut in as_completed(futures):
        ticker = futures[fut]
        per_ticker_output[ticker] = fut.result()

In [17]:
# 📊 Format and display as dataframes
tables = {}
for ticker, payload in per_ticker_output.items():
    key_order = payload["keys"]
    series_by_q = payload["data"]
    df = pd.DataFrame(series_by_q).reindex(key_order)
    df.index.name = "Line item"
    tables[ticker] = df

for t, frame in tables.items():
    print(f"\n📊 {t}")
    display(frame)


📊 PLTR


Unnamed: 0_level_0,Q1 2023,Q2 2023,Q3 2023,Q4 2023,Q1 2024,Q2 2024,Q3 2024,Q4 2024,Q1 2025
Line item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Net income attributable to common stockholders,16802,28127,71505,93391,105530.0,134126.0,143525.0,79009.0,214031.0
Net income attributable to noncontrolling interests,2349,-255,1934,3522,541.0,1444.0,5816.0,-2073.0,3686.0
Interest income,-20853,-30310,-36864,-44545,-43352.0,-46593.0,-52120.0,-54727.0,-50441.0
Interest expense,1275,1317,742,136,,,,,
"Other (income) expense, net",2861,9024,-3864,3956,13507.0,11173.0,8110.0,-14768.0,3173.0
Provision for income taxes,1681,2171,6530,9334,4655.0,5189.0,7809.0,3602.0,5599.0
Depreciation and amortization,8320,8399,8663,7972,8438.0,8056.0,8087.0,7006.0,6622.0
Stock-based compensation,114714,114201,114380,132608,125651.0,141764.0,142425.0,281798.0,155339.0
Employer payroll taxes related to stock-based compensation,6285,10760,8909,10953,19926.0,6464.0,19950.0,79681.0,59323.0
Adjusted EBITDA,133434,143434,171935,217327,234896.0,261623.0,283602.0,379528.0,397332.0



📊 UBER


Unnamed: 0_level_0,Q1 2023,Q2 2023,Q3 2023,Q4 2023,Q1 2024,Q2 2024,Q3 2024,Q4 2024,Q1 2025
Line item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Net income attributable to Uber Technologies, Inc.",-157000,394,221,1429000,-654000.0,1015000,2612000,6883000,1776000
"Net income attributable to non-controlling interests, net of tax",0,0,-2,271000,-9000.0,-7000,-13000,18000,-2000
Provision for income taxes,55000,65,-40,133000,29000.0,57000,158000,-6002000,-402000
Income from equity method investments,-36000,-4,-3,-5000,-4000.0,12000,12000,10000,-13000
Interest expense,168000,144,166,155000,124000.0,139000,143000,117000,105000
"Other income, net",-292000,-273,52,-1331000,678000.0,-420000,-1851000,-256000,-262000
Depreciation and amortization,207000,208,205,203000,190000.0,173000,179000,169000,171000
Stock-based compensation expense,470000,504,492,469000,484000.0,455000,438000,419000,435000
"Legal, tax, and regulatory reserve changes and settlements",250000,-155,-13,-73000,,134000,0,462000,28000
Goodwill and asset impairments/loss on sale of assets,67000,16,2,-1000,,0,0,6000,0



📊 SNAP


Unnamed: 0_level_0,Q1 2023,Q2 2023,Q3 2023,Q4 2023,Q1 2024,Q2 2024,Q3 2024,Q4 2024
Line item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Net loss,-328674.0,-377308,-368256,-248247,-305090,-249000,153000.0,-9
Interest income,-37948.0,-43144,-43839,-43463,-39898,-31000,,-18
Interest expense,5885.0,5343,5521,5275,4743,21000,,6
"Other (income) expense, net",-11372.0,-1323,20662,34447,81,16000,,-2
Income tax (benefit) expense,6845.0,12093,5849,3275,6932,2000,,3
Depreciation and amortization,35220.0,39688,41209,43882,38098,97000,,72
Stock-based compensation expense,314931.0,317943,353846,333063,254715,269000,266000.0,285
Payroll and other tax expense related to stock-based compensation,15926.0,8229,6463,8706,15970,18000,,15
Adjusted EBITDA,813.0,-38479,40094,159149,45659,55000,132000.0,276
Restructuring charges,,0,18639,22211,70108,9000,0.0,0
