# SEC Cybersecurity Disclosure Agent

This notebook creates a Pydantic AI agent that:
1. Takes a company CIK number
2. Searches SEC filings for cybersecurity disclosures
3. Provides a simple summary of all cybersecurity disclosures found


## Setup and Imports


In [None]:
from pydantic_ai import Agent
from typing import List, Dict, Any
from datetime import datetime, timedelta
from sec_search_tools import search_cybersecurity_disclosures, search_sec_filings
from sec_edgar_client import SECEdgarClient
from company_cik_lookup import lookup_company_cik, find_companies_in_text


## Check Dependencies and Setup


In [None]:
# Check if Elasticsearch is running and accessible
import subprocess
import sys

def check_docker():
    """Check if Docker is running"""
    try:
        result = subprocess.run(['docker', 'info'], capture_output=True, timeout=5)
        return result.returncode == 0
    except:
        return False

def check_elasticsearch_container():
    """Check if Elasticsearch container is running"""
    try:
        result = subprocess.run(['docker', 'ps', '--filter', 'name=elasticsearch', '--format', '{{.Names}}'], 
                              capture_output=True, text=True, timeout=5)
        containers = result.stdout.strip().split('\n')
        return [c for c in containers if c]
    except:
        return []

print("Checking Docker and Elasticsearch setup...")
print("=" * 60)

# Check Docker
docker_running = check_docker()
if not docker_running:
    print("‚úó Docker is not running")
    print("\nüìã To fix this:")
    print("   1. Start Docker Desktop (or Docker daemon)")
    print("   2. Wait for Docker to fully start")
    print("   3. Run this cell again")
    print("\nüí° Quick start command (after Docker is running):")
    print("   docker run -d -p 9200:9200 -e 'discovery.type=single-node' --name elasticsearch elasticsearch:8.11.0")
else:
    print("‚úì Docker is running")
    
    # Check for Elasticsearch container
    containers = check_elasticsearch_container()
    if not containers:
        print("‚ö† No Elasticsearch container found")
        print("\nüìã To start Elasticsearch:")
        print("   Run this command in your terminal:")
        print("   docker run -d -p 9200:9200 -e 'discovery.type=single-node' --name elasticsearch elasticsearch:8.11.0")
        print("\n   Or use the cell below to start it automatically.")
    else:
        print(f"‚úì Found Elasticsearch container(s): {', '.join(containers)}")

# Check Elasticsearch connection
print("\n" + "=" * 60)
print("Checking Elasticsearch connection...")
try:
    from elasticsearch import Elasticsearch
    es = Elasticsearch('http://localhost:9200', request_timeout=2)
    if es.ping():
        print("‚úì Elasticsearch is responding on port 9200")
        
        # Check if index exists
        index_name = "sec_filings"
        if es.indices.exists(index=index_name):
            count = es.count(index=index_name)['count']
            print(f"‚úì Index '{index_name}' exists with {count} documents")
        else:
            print(f"‚ö† Index '{index_name}' does not exist. You need to index chunks first.")
            print("  Use: from sec_search_tools import index_sec_chunks")
            print("  Then: index_sec_chunks(your_chunks)")
    else:
        print("‚úó Elasticsearch is not responding (ping failed)")
        print("  Make sure the container is running: docker ps")
except ImportError:
    print("‚úó elasticsearch package not installed")
    print("  Install with: pip install elasticsearch")
except Exception as e:
    print(f"‚úó Cannot connect to Elasticsearch: {e}")
    if docker_running:
        print("  Docker is running but Elasticsearch container may not be started.")
        print("  Start it with: docker start elasticsearch")
    else:
        print("  Make sure Docker is running first, then start Elasticsearch.")


‚úó Elasticsearch is not responding
  Make sure Elasticsearch is running: docker run -d -p 9200:9200 elasticsearch:8.11.0


## Start Elasticsearch (if not running)

Run this cell to automatically start Elasticsearch in Docker:


In [None]:
# Uncomment and run this to start Elasticsearch automatically
# Make sure Docker Desktop is running first!

# import subprocess
# 
# print("Starting Elasticsearch container...")
# try:
#     # Check if container already exists
#     result = subprocess.run(['docker', 'ps', '-a', '--filter', 'name=elasticsearch', '--format', '{{.Names}}'], 
#                            capture_output=True, text=True)
#     existing = result.stdout.strip()
#     
#     if existing:
#         print(f"Found existing container: {existing}")
#         # Start existing container
#         subprocess.run(['docker', 'start', 'elasticsearch'], check=True)
#         print("‚úì Started existing Elasticsearch container")
#     else:
#         # Create and start new container
#         subprocess.run([
#             'docker', 'run', '-d',
#             '-p', '9200:9200',
#             '-e', 'discovery.type=single-node',
#             '-e', 'xpack.security.enabled=false',  # Disable security for easier use
#             '--name', 'elasticsearch',
#             'elasticsearch:8.11.0'
#         ], check=True)
#         print("‚úì Created and started new Elasticsearch container")
#         print("  Waiting for Elasticsearch to be ready...")
#         import time
#         time.sleep(5)  # Give it a moment to start
#     
#     # Test connection
#     from elasticsearch import Elasticsearch
#     es = Elasticsearch('http://localhost:9200')
#     for i in range(10):  # Try for up to 10 seconds
#         if es.ping():
#             print("‚úì Elasticsearch is ready!")
#             break
#         time.sleep(1)
#     else:
#         print("‚ö† Elasticsearch container started but not responding yet. Wait a few seconds and check again.")
#         
# except subprocess.CalledProcessError as e:
#     print(f"‚úó Error starting Elasticsearch: {e}")
#     print("  Make sure Docker Desktop is running!")
# except Exception as e:
#     print(f"‚úó Error: {e}")

print("üìù To start Elasticsearch:")
print("   1. Make sure Docker Desktop is running")
print("   2. Uncomment the code above and run this cell")
print("   3. OR run this command in terminal:")
print("      docker run -d -p 9200:9200 -e 'discovery.type=single-node' --name elasticsearch elasticsearch:8.11.0")


## Define Tools


In [None]:
def lookup_company_by_name(company_name: str) -> Dict[str, Any]:
    """
    Look up CIK number for a company by name.
    
    This tool should be used FIRST when a question mentions a company name
    but doesn't provide a CIK number. It maps common company name variations
    to their correct CIK numbers.
    
    Args:
        company_name: Company name (e.g., "UnitedHealth Group", "Change Healthcare", "Capital One")
        
    Returns:
        Dictionary with:
        - cik: CIK number if found (10 digits with leading zeros)
        - company_name: The normalized company name
        - found: True if CIK was found, False otherwise
        - error: Error message if not found
    """
    cik = lookup_company_cik(company_name)
    
    if cik:
        return {
            "cik": cik,
            "company_name": company_name,
            "found": True,
            "error": None
        }
    else:
        return {
            "cik": None,
            "company_name": company_name,
            "found": False,
            "error": f"Company '{company_name}' not found in lookup table. Please provide the CIK number directly or use a known company name variation."
        }


def get_company_info(cik: str) -> Dict[str, Any]:
    """
    Get company information from SEC EDGAR API.
    
    Args:
        cik: Central Index Key (CIK) of the company (can be with or without leading zeros)
        
    Returns:
        Dictionary with company information (name, ticker, industry, cik, etc.)
        or dictionary with 'error' key if fetch fails
    """
    try:
        # Normalize CIK - handle various input formats
        # Remove any non-digit characters first
        cik_str = str(cik).strip()
        # Remove "CIK" prefix if present
        if cik_str.upper().startswith("CIK"):
            cik_str = cik_str[3:].strip()
        # Remove dashes, spaces, and any other non-digits
        cik_digits = ''.join(filter(str.isdigit, cik_str))
        
        if not cik_digits or len(cik_digits) > 10:
            return {"error": f"Invalid CIK format: {cik}. CIK must be 1-10 digits."}
        
        # Pad to 10 digits with leading zeros
        cik_normalized = cik_digits.zfill(10)
        
        # Initialize SEC EDGAR client (reads SEC_USER_AGENT from .env)
        client = SECEdgarClient()
        
        # Try with the normalized CIK
        company_info = client.get_company_info(cik_normalized)
        
        if company_info and not company_info.get("error"):
            # Extract ticker - handle both single value and list
            tickers = company_info.get("tickers", [])
            ticker = tickers[0] if tickers and len(tickers) > 0 else "N/A"
            
            # Use the CIK returned from API (properly formatted)
            returned_cik = company_info.get("cik", cik_normalized)
            
            return {
                "name": company_info.get("name", "Unknown"),
                "ticker": ticker,
                "industry": company_info.get("sicDescription", "Unknown"),
                "cik": str(returned_cik).zfill(10) if returned_cik else cik_normalized,
                "entity_type": company_info.get("entityType", "Unknown"),
                "sic_code": company_info.get("sic", "N/A")
            }
        else:
            # Check if it's a 404 error specifically
            return {"error": f"CIK {cik} (normalized: {cik_normalized}) not found in SEC database. The company may not exist, may have merged, or the CIK may be incorrect. Please verify the CIK on the SEC website."}
    except Exception as e:
        error_str = str(e)
        if "404" in error_str or "Not Found" in error_str:
            return {"error": f"CIK {cik} not found in SEC database (404 error). The CIK may be incorrect or the company may no longer exist."}
        return {"error": f"Error fetching company info: {error_str}"}


In [23]:
def search_company_cybersecurity_disclosures(
    cik: str, 
    query: str = "cybersecurity OR data breach OR ransomware OR security incident",
    years: int = 3
) -> List[Dict[str, Any]]:
    """
    Search for cybersecurity disclosures in SEC filings for a given company.
    
    Args:
        cik: Central Index Key (CIK) of the company (can be with or without leading zeros)
        query: Search query string (default includes common cybersecurity terms)
        years: Number of years to search back (default: 3)
        
    Returns:
        List of chunks containing cybersecurity-related disclosures
    """
    try:
        # Normalize CIK - same logic as get_company_info
        cik_str = str(cik).strip()
        if cik_str.upper().startswith("CIK"):
            cik_str = cik_str[3:].strip()
        cik_digits = ''.join(filter(str.isdigit, cik_str))
        
        if not cik_digits or len(cik_digits) > 10:
            return [{"content": f"Invalid CIK format: {cik}. CIK must be 1-10 digits.", "metadata": {}, "error": True}]
        
        cik_normalized = cik_digits.zfill(10)
        
        # Calculate date range
        end_date = datetime.now().strftime("%Y-%m-%d")
        start_date = (datetime.now() - timedelta(days=years * 365)).strftime("%Y-%m-%d")
        date_range = (start_date, end_date)
        
        # Check if Elasticsearch is available
        try:
            from elasticsearch import Elasticsearch
            es = Elasticsearch('http://localhost:9200')
            if not es.ping():
                return [{"content": "Elasticsearch is not running. Please start Elasticsearch first.", "metadata": {"cik": cik_normalized}, "error": True}]
            
            # Check if index exists
            if not es.indices.exists(index="sec_filings"):
                return [{"content": "Elasticsearch index 'sec_filings' does not exist. Please index your SEC filing chunks first using sec_search_tools.index_sec_chunks().", "metadata": {"cik": cik_normalized}, "error": True}]
        except ImportError:
            return [{"content": "Elasticsearch package not installed. Install with: pip install elasticsearch", "metadata": {"cik": cik_normalized}, "error": True}]
        except Exception as e:
            return [{"content": f"Error connecting to Elasticsearch: {str(e)}", "metadata": {"cik": cik_normalized}, "error": True}]
        
        # Search for cybersecurity disclosures
        results = search_cybersecurity_disclosures(
            company_cik=cik_normalized,
            query=query,
            date_range=date_range,
            form_types=["10-K", "10-Q"],  # Focus on annual and quarterly reports
            num_results=30  # Get more results for comprehensive summary
        )
        
        if not results:
            return [{"content": "No cybersecurity disclosures found in the searched filings for this company and date range. Either no disclosures exist, or the filings have not been indexed in Elasticsearch yet.", "metadata": {"cik": cik_normalized}}]
        
        return results
    except Exception as e:
        import traceback
        error_msg = f"Error searching for cybersecurity disclosures: {str(e)}\n{traceback.format_exc()}"
        return [{"content": error_msg, "metadata": {"cik": cik}, "error": True}]


## Create the Agent - Final Refined Prompt

After iterative testing, this is the final prompt that works best:


In [24]:
# Final refined prompt - optimized after iterative testing
final_instructions = """
You are an expert SEC filing analyst specializing in cybersecurity disclosures for supply chain risk assessment.

Your primary function is to analyze SEC filings and extract all cybersecurity-related information for a given company.

WORKFLOW - When given a CIK number, you MUST follow these steps:
1. Call get_company_info(cik) FIRST to identify the company
2. Call search_company_cybersecurity_disclosures(cik) to retrieve all cybersecurity-related chunks from SEC filings
3. Analyze the retrieved chunks systematically
4. Generate a comprehensive summary following the structure below

REQUIRED SUMMARY STRUCTURE:

## Company Information
- Company Name: [name]
- Ticker Symbol: [ticker]
- Industry: [industry]
- CIK: [cik]

## Cybersecurity Disclosures Summary

### Security Incidents
List and describe any disclosed cybersecurity incidents, data breaches, unauthorized access, ransomware attacks, or other security events. Include:
- Nature of the incident
- When it occurred (if disclosed)
- Impact or scope (if disclosed)
- Filing date and form type where disclosed

### Risk Factors
Summarize cybersecurity-related risk factors mentioned in the filings, such as:
- Risks related to data security and privacy
- Dependencies on third-party vendors or cloud providers
- Potential impact of cyber attacks on operations
- Regulatory compliance challenges
- Any specific vulnerabilities or threats identified

### Security Measures and Improvements
Describe any cybersecurity measures, controls, or improvements mentioned:
- Security investments or initiatives
- Remediation efforts following incidents
- Improvements to security infrastructure
- Compliance measures or certifications

### Filing Timeline
List the filings reviewed with dates:
- [Form Type] - [Filing Date]: Brief note on what was disclosed

IMPORTANT GUIDELINES:
- Always use the tools - never provide information without first calling get_company_info and search_company_cybersecurity_disclosures
- If no cybersecurity disclosures are found, clearly state "No cybersecurity disclosures found in the analyzed filings"
- Cite specific filing dates and form types for all information
- Use professional, clear language appropriate for supply chain and procurement professionals
- Group similar information together rather than listing every chunk separately
- Highlight trends or changes in disclosure patterns over time
- If disclosures mention vendors, suppliers, or third parties, note this clearly
""".strip()

cybersecurity_agent = Agent(
    name='sec_cybersecurity_agent',
    instructions=final_instructions,
    tools=[get_company_info, search_company_cybersecurity_disclosures],
    model='openai:gpt-4o-mini'
)


## Test the Agent


## Helper: Verify CIK Before Running Agent

If you get 404 errors, verify the CIK is correct using this cell:


In [25]:
# Test CIK lookup - use this to verify a CIK before running the full agent
test_cik = "1048695"  # F5, Inc. - try different CIKs here

result = get_company_info(test_cik)
if "error" in result:
    print(f"‚ùå {result['error']}")
    print(f"\nüí° Tip: Visit https://www.sec.gov/edgar/searchedgar/cik.htm to verify the correct CIK")
else:
    print(f"‚úÖ Company found:")
    print(f"   Name: {result['name']}")
    print(f"   Ticker: {result['ticker']}")
    print(f"   CIK: {result['cik']}")
    print(f"   Industry: {result['industry']}")


‚úÖ Company found:
   Name: F5, INC.
   Ticker: FFIV
   CIK: 0001048695
   Industry: Computer Communications Equipment


In [26]:
# Test with F5, Inc. (CIK: 1048695)
test_result = await cybersecurity_agent.run(
    user_prompt='''
    
    Summarize all cybersecurity disclosures for the following companies: 

    Unisys - 0001109833
    Avaya - 0001058701
    Check Point - 0001097551 
    Mimecast - 0000913488
    F5 - 1048695

    '''
)

print("=" * 80)
print("AGENT RESPONSE:")
print("=" * 80)
print(test_result.output)
print("=" * 80)


Error fetching company info: 404 Client Error: Not Found for url: https://data.sec.gov/submissions/CIK0001097551.json
Error fetching company info: 404 Client Error: Not Found for url: https://data.sec.gov/submissions/CIK0001109833.json
Error fetching company info: 404 Client Error: Not Found for url: https://data.sec.gov/submissions/CIK0000913488.json
AGENT RESPONSE:
## Company Information
### Unisys
- Company Name: Not found
- Ticker Symbol: N/A
- Industry: Not available
- CIK: 0001109833

### Avaya
- Company Name: HASKIN EUGENE
- Ticker Symbol: N/A
- Industry: Not available
- CIK: 0001058701

### Check Point
- Company Name: Not found
- Ticker Symbol: N/A
- Industry: Not available
- CIK: 0001097551

### Mimecast
- Company Name: Not found
- Ticker Symbol: N/A
- Industry: Not available
- CIK: 0000913488

### F5
- Company Name: F5, INC.
- Ticker Symbol: FFIV
- Industry: Computer Communications Equipment
- CIK: 0001048695

## Cybersecurity Disclosures Summary

### Security Incidents
- **A