# Extract Structured Insider Trades (Directors' Dealings) with AI

This notebook demonstrates how to convert unstructured insider trade filings (Directors' Dealings, or `DIRS`) into clean, structured JSON using the FinancialReports API and Google's Gemini 2.0 Flash model.

### The Value Proposition

This workflow provides an automated solution for extracting complex financial data. It uses our API to find filings and the Gemini 2.0 Flash model to parse raw markdown text into predictable, analyzable JSON objects.

**Note:** This notebook uses the asynchronous `google-genai` and `aiohttp` libraries for maximum performance and reliability.

In [None]:
import os
import json
import logging
import ssl
import aiohttp
import certifi
import pandas as pd
from google import genai
from google.genai import types
from financial_reports_generated_client import ApiClient, Configuration
from financial_reports_generated_client.api.filings_api import FilingsApi
from dotenv import load_dotenv

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# macOS SSL Fix: Ensure Python uses the certifi CA bundle
os.environ['SSL_CERT_FILE'] = certifi.where()

load_dotenv()

FR_API_KEY = os.environ.get("FR_API_KEY")
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")

if not FR_API_KEY or not GEMINI_API_KEY:
    raise ValueError("API keys not found. Please set FR_API_KEY and GEMINI_API_KEY.")

logger.info("API keys loaded successfully.")

In [None]:
# 1. Configure FinancialReports API Client
config = Configuration(host="https://api.financialreports.eu")
fr_api_client = ApiClient(config)
# Force injection of the header to ensure security gateway authorization
fr_api_client.set_default_header("X-API-Key", FR_API_KEY)
filings_api = FilingsApi(fr_api_client)

# 2. Configure Google GenAI Client (v1.0+)
gemini_client = genai.Client(api_key=GEMINI_API_KEY)

logger.info("API clients configured.")

In [None]:
# --- User-Configurable Parameters ---
COMPANY_ISIN = "DE000A1EWWW0"  # Example: adidas AG
RELEASE_DATE_FROM = "2024-01-01T00:00:00Z"
MODEL_ID = "gemini-2.0-flash" 
# --------------------------------------

logger.info(f"Target: {COMPANY_ISIN} since {RELEASE_DATE_FROM}")

## Step 1: Find DIRS Filings

We use the `/filings/` endpoint to find relevant Directors' Dealings filings.

In [None]:
logger.info(f"Searching for 'DIRS' filings for ISIN {COMPANY_ISIN}...")

try:
    # Note: We await the call as the generated client is asynchronous
    filings_response = await filings_api.filings_list(
        company_isin=COMPANY_ISIN,
        type="DIRS",
        release_datetime_from=RELEASE_DATE_FROM,
        page_size=10
    )

    if filings_response and hasattr(filings_response, 'results'):
        filings_to_process = filings_response.results
        logger.info(f"Found {len(filings_to_process)} filings.")
    else:
        filings_to_process = []
        logger.warning("No 'DIRS' filings found.")

except Exception as e:
    logger.error(f"Error fetching filings: {e}")
    filings_to_process = []

## Step 2: Define the Structured Output Schema

We define the schema using a standard Python dictionary, optimized for the modern `google-genai` SDK.

In [None]:
dirs_schema = {
    "type": "OBJECT",
    "properties": {
        "issuer_name": {"type": "STRING"},
        "issuer_isin": {"type": "STRING"},
        "reporting_person_details": {
            "type": "OBJECT",
            "properties": {
                "name": {"type": "STRING"},
                "position": {"type": "STRING"}
            }
        },
        "transactions": {
            "type": "ARRAY",
            "items": {
                "type": "OBJECT",
                "properties": {
                    "transaction_date": {"type": "STRING", "description": "YYYY-MM-DD"},
                    "financial_instrument": {"type": "STRING"},
                    "nature_of_transaction": {"type": "STRING"},
                    "price": {"type": "NUMBER"},
                    "currency": {"type": "STRING"},
                    "volume": {"type": "NUMBER"},
                    "total_value": {"type": "NUMBER"},
                    "venue": {"type": "STRING"}
                }
            }
        }
    }
}

logger.info("Extraction schema defined.")

## Step 3: Create AI Extraction Function

We use `aiohttp` for direct markdown retrieval to ensure maximum reliability and speed.

In [None]:
async def extract_structured_data(filing_id: int) -> dict | None:
    """
    Fetches markdown directly and uses Gemini to extract structured JSON.
    """
    try:
        url = f"https://api.financialreports.eu/filings/{filing_id}/markdown/"
        headers = {"X-API-Key": FR_API_KEY}
        ssl_context = ssl.create_default_context(cafile=certifi.where())
        
        # 1. Fetch raw markdown
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers=headers, ssl=ssl_context) as resp:
                if resp.status != 200:
                    return None
                markdown_content = await resp.text()
        
        if not markdown_content or len(markdown_content) < 10:
            return None

        # 2. Extract with AI
        prompt = f"Extract insider trade details from this filing in JSON format:\n\n{markdown_content}"
        
        response = gemini_client.models.generate_content(
            model=MODEL_ID,
            contents=prompt,
            config=types.GenerateContentConfig(
                response_mime_type="application/json",
                response_schema=dirs_schema,
            )
        )
        
        return response.parsed if hasattr(response, 'parsed') else json.loads(response.text)
        
    except Exception as e:
        logger.error(f"Extraction error on ID {filing_id}: {e}")
        return None

## Step 4: Execute Workflow & Aggregate Results

In [None]:
all_structured_data = []

if filings_to_process:
    for filing in filings_to_process:
        data = await extract_structured_data(filing.id)
        if data:
            # Handle potential object/dict variance from SDK
            data_dict = data if isinstance(data, dict) else data.model_dump()
            data_dict['filing_id'] = filing.id
            data_dict['release_datetime'] = str(filing.release_datetime)
            all_structured_data.append(data_dict)
            logger.info(f"Processed filing {filing.id}")

if all_structured_data:
    print(f"\n--- Successfully processed {len(all_structured_data)} filings ---")
    print(json.dumps(all_structured_data[0], indent=2))
else:
    print("\nNo structured data was extracted.")

## Step 5: Analyze and Flatten Data with Pandas

In [None]:
if all_structured_data:
    try:
        df = pd.json_normalize(
            all_structured_data, 
            record_path=['transactions'], 
            meta=['filing_id', 'release_datetime', 'issuer_name', 'issuer_isin', ['reporting_person_details', 'name']]
        )
        display(df.head())
    except Exception as e:
        logger.error(f"Pandas error: {e}")

## Conclusion

This pipeline provides a scalable, automated way to extract structured financial insights from European regulatory filings.