In [2]:
# Financial Data Extraction - Simple Usage
#
# Configuration is in: extract_financials.py (same folder)
# Edit that file to change: MODEL_NAME, PROMPT, EXAMPLES_CONFIG, CARD_COLORS

from extract_financials import run, run_file, extract, extract_file, display_results

# XBRL Catalog - Fetch structured XBRL data from Neo4j
# NOTE: Only 10-K, 10-Q (and amendments) have XBRL data. 8-K does NOT.
from xbrl_catalog import (
    xbrl_catalog,           # Smart fetcher - accepts CIK or ticker
    get_xbrl_catalog,       # Fetch by CIK
    search_xbrl_concepts,   # Search for concepts
    list_companies_with_xbrl,  # Discover companies with XBRL data
    quick_catalog,          # One-liner for LLM context
    print_catalog_summary,  # Display summary
    XBRL_FORM_TYPES,        # ["10-K", "10-Q", "10-K/A", "10-Q/A"]
)

In [2]:
# Just pass your text - everything else is handled by the module
input_text = """
Marathon Petroleum (NYSE: MPC) has observed the following analyst ratings within the last quarter: 
Bullish Somewhat Bullish Indifferent Somewhat Bearish Bearish Total Ratings 2 4 4 0 0 Last 30D 0 1 0 0 0 1M Ago 0 0 1 0 0 2M Ago 0 0 1 0 0 3M Ago 2 3 2 0 0 
According to 10 analyst offering 12-month price targets in the last 3 months, Marathon Petroleum has an average price target of $134.0 with a high of $153.00 
and a low of $116.00. This article was generated by Benzinga's automated content engine and reviewed by an editor.
"""

# Run extraction and display results
result = run(input_text)

[94m[1mLangExtract[0m: model=[92mgemini-2.5-flash[0m, current=[92m530[0m chars, processed=[92m530[0m chars:  [00:06]

[92m✓[0m Extraction processing complete
[92m✓[0m Extracted [1m10[0m entities ([1m4[0m unique types)
  [96m•[0m Time: [1m6.85s[0m
  [96m•[0m Speed: [1m78[0m chars/sec
  [96m•[0m Chunks: [1m1[0m
Extracted 10 entities










#,Class,Text,Span,Alignment,Ticker,Exchange,Concept,Gaap_Item,Value,Unit,Period,Decimals,Type,Impact
1,company,Marathon Petroleum,1:19,exact,MPC,NYSE,,,,,,,,
2,xbrl_concept,analyst ratings,59:74,exact,,,AnalystRatings,non-gaap:AnalystRatings,,,,,,
3,fact,10 analyst,271:281,exact,,,,,10.0,analysts,last 3 months,,,
4,xbrl_concept,average price target,362:382,exact,,,AveragePriceTarget,non-gaap:AveragePriceTarget,,,,,,
5,fact,$134.0,386:392,exact,,,,,134.0,USD,12-month,1.0,,
6,xbrl_concept,high price target,400:404,lesser,,,HighPriceTarget,non-gaap:HighPriceTarget,,,,,,
7,fact,$153.00,408:415,exact,,,,,153.0,USD,12-month,2.0,,
8,xbrl_concept,low price target,423:426,lesser,,,LowPriceTarget,non-gaap:LowPriceTarget,,,,,,
9,fact,$116.00,430:437,exact,,,,,116.0,USD,12-month,2.0,,
10,description,This article was generated by Benzinga's automated content engine and reviewed by an editor.,439:531,exact,,,,,,,,,source_information,neutral


In [1]:
# Extract from a file - just pass the path
filepath = "/home/faisal/EventMarketDB/drivers/8K_XBRL_Linking/sample_data/DELL_1571996_2025-08-28_000157199625000096/exhibit_EX-99.1.txt"

result = run_file(filepath)

NameError: name 'run_file' is not defined

In [3]:
# =============================================================================
# XBRL Catalog Usage Examples
# =============================================================================

# Fetch XBRL catalog for a company (by ticker or CIK)
catalog = xbrl_catalog("ICE", limit_filings=3)  # Limit to 3 most recent filings

# Print quick summary
print_catalog_summary(catalog)


XBRL Catalog: INTERCONTINENTAL EXCHANGE INC (ICE)
CIK: 0001571949
Industry: FinancialDataAndStockExchanges
Sector: FinancialServices

Total Filings: 3
Total Facts: 6,306
Unique Concepts: 778

Filings:
  - 10-Q (2025-06-30): 1,967 facts
  - 10-Q (2025-03-31): 1,406 facts
  - 10-K (2024-12-31): 2,933 facts

Top Segments:
  - SeniorNotes: 1178 facts
  - ExchangesSegment: 1061 facts
  - MortgageTechnologySegment: 914 facts
  - ICEClearEuropeLimited: 556 facts
  - OriginalMargin: 505 facts




In [4]:
# Get LLM-ready context (the main output for passing to an LLM)
llm_context = catalog.to_llm_context(max_facts=30)
print(llm_context[:3000])  # Print first 3000 chars

XBRL FINANCIAL DATA CATALOG
Company: INTERCONTINENTAL EXCHANGE INC (ICE)
CIK: 0001571949
Industry: FinancialDataAndStockExchanges | Sector: FinancialServices

## SUMMARY
----------------------------------------
Total Filings: 3
Total Facts: 6,306
Unique Concepts: 778
Unique Periods: 146
Unique Segments: 265
Report Types:
  10-Q: 2
  10-K: 1

## REPORTS
----------------------------------------
* 10-Q - 2025-06-30
  Accession: 0001571949-25-000013
  Facts: 1,967
* 10-Q - 2025-03-31
  Accession: 0001571949-25-000007
  Facts: 1,406
* 10-K - 2024-12-31
  Accession: 0001571949-25-000003
  Facts: 2,933
  Key Metrics:
    - Net Income (Loss) Attributable to Parent: 2,754,000,000 iso4217:USD
    - Earnings Per Share, Basic: 4.8 iso4217:USDshares
    - Stockholders' Equity Attributable to Parent: 27,647,000,000 iso4217:USD
    - Assets: 139,428,000,000 iso4217:USD

## KEY CONCEPTS REPORTED
----------------------------------------
* Revenue from Contract with Customer, Excluding Assessed Tax [cre

In [5]:
# Access normalized reference tables
print(f"Unique Concepts: {len(catalog.concepts)}")
print(f"Unique Periods: {len(catalog.periods)}")
print(f"Unique Members/Segments: {len(catalog.members)}")
print(f"Calculation Trees: {len(catalog.calculation_trees)}")

# Show top 5 concepts by fact count
print("\nTop 5 Concepts:")
for qname, info in list(catalog.concepts.items())[:5]:
    print(f"  - {info['label']}: {info['fact_count']} facts")

Unique Concepts: 778
Unique Periods: 146
Unique Members/Segments: 265
Calculation Trees: 50

Top 5 Concepts:
  - Revenue from Contract with Customer, Excluding Assessed Tax: 272 facts
  - Equity, Including Portion Attributable to Noncontrolling Interest: 42 facts
  - Unsecured Long-Term Debt, Noncurrent: 32 facts
  - Debt, Long-Term and Short-Term, Combined Amount: 19 facts
  - Debt Instrument, Fair Value Disclosure: 18 facts


In [None]:
# Get time series for a specific concept
# NOTE: qname must match exactly - use search_xbrl_concepts() to find the right qname
# ICE uses "RevenueFromContractWithCustomerExcludingAssessedTax" not "Revenues"
revenue_series = catalog.get_time_series("us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax")
print(f"Revenue time series ({len(revenue_series)} data points):")
for r in revenue_series[:5]:
    print(f"  {r['period_display']}: {r['value']} {r['unit']}")

In [7]:
# Search for concepts across the database
results = search_xbrl_concepts("revenue", cik="1571949", limit=10)
print("Concepts containing 'revenue':")
for r in results[:5]:
    print(f"  - {r['concept']} ({r['qname']}): {r['fact_count']} facts")

Concepts containing 'revenue':
  - Revenue from Contract with Customer, Excluding Assessed Tax (us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax): 1512 facts
  - Revenues (us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax): 891 facts
  - Revenues, Less Transaction-Based Expenses (ice:RevenuesLessTransactionBasedExpenses): 718 facts
  - Contract with Customer, Liability, Including New Contract Revenue, Recognized (ice:ContractWithCustomerLiabilityIncludingNewContractRevenueRecognized): 64 facts
  - Revenue, Remaining Performance Obligation, Expected Timing of Satisfaction, Period (us-gaap:RevenueRemainingPerformanceObligationExpectedTimingOfSatisfactionPeriod1): 31 facts


In [10]:
import pprint
pprint.pprint(llm_context)

 'XBRL FINANCIAL DATA CATALOG\n'
 'Company: INTERCONTINENTAL EXCHANGE INC (ICE)\n'
 'CIK: 0001571949\n'
 'Industry: FinancialDataAndStockExchanges | Sector: FinancialServices\n'
 '\n'
 '## SUMMARY\n'
 '----------------------------------------\n'
 'Total Filings: 3\n'
 'Total Facts: 6,306\n'
 'Unique Concepts: 778\n'
 'Unique Periods: 146\n'
 'Unique Segments: 265\n'
 'Report Types:\n'
 '  10-Q: 2\n'
 '  10-K: 1\n'
 '\n'
 '## REPORTS\n'
 '----------------------------------------\n'
 '* 10-Q - 2025-06-30\n'
 '  Accession: 0001571949-25-000013\n'
 '  Facts: 1,967\n'
 '* 10-Q - 2025-03-31\n'
 '  Accession: 0001571949-25-000007\n'
 '  Facts: 1,406\n'
 '* 10-K - 2024-12-31\n'
 '  Accession: 0001571949-25-000003\n'
 '  Facts: 2,933\n'
 '  Key Metrics:\n'
 '    - Net Income (Loss) Attributable to Parent: 2,754,000,000 iso4217:USD\n'
 '    - Earnings Per Share, Basic: 4.8 iso4217:USDshares\n'
 "    - Stockholders' Equity Attributable to Parent: 27,647,000,000 "
 'iso4217:USD\n'
 '    - Assets: 