# 🔧 Development Setup (Optional)

If you're running this notebook from a cloned repository (not an installed package), run the next cells to:

1. Locate the project root (looks for `pyproject.toml` / `setup.py`)
2. Change working directory to the project root (if currently in `examples/`)
3. Add the project root to `sys.path`
4. Provide a helper to reload the package without restarting the kernel

You can skip these in a packaged / production environment.

---

In [None]:
# Dev bootstrap: project root detection & sys.path setup
import os, sys, pathlib
from datetime import datetime

start_cwd = pathlib.Path.cwd()
print(f"🔍 Starting CWD: {start_cwd}")

project_root = None
for candidate in [start_cwd, *start_cwd.parents]:
    if (candidate / 'pyproject.toml').exists() or (candidate / 'setup.py').exists():
        project_root = candidate
        break

if project_root is None:
    raise RuntimeError('Project root not found (no pyproject.toml or setup.py)')

if start_cwd.name == 'examples':
    os.chdir(project_root)
    print(f"📂 Changed directory to project root: {project_root}")
else:
    print(f"📂 Using directory: {pathlib.Path.cwd()}")

root_str = str(project_root.resolve())
if root_str not in sys.path:
    sys.path.insert(0, root_str)
    print(f"➕ Added project root to sys.path: {root_str}")
else:
    print('✔ Project root already on sys.path')

print('🧪 Import check...')
try:
    import ffiec_data_collector as fdc
    print(f"✅ Imported ffiec_data_collector (version={getattr(fdc, '__version__', 'unknown')})")
except Exception as e:
    print(f"❌ Import failed: {e}")

print('✨ Dev bootstrap complete at', datetime.now().isoformat())

In [None]:
# Optional editable install (uncomment to run once)
# %pip install -e .

from importlib import reload
import ffiec_data_collector as fdc

def dev_reload():
    try:
        reload(fdc)
        print('🔄 Reloaded ffiec_data_collector')
    except Exception as e:
        print(f'❌ Reload failed: {e}')

print('Helper ready: dev_reload()')

# FFIEC Data Collector Demo

This notebook demonstrates how to use the FFIEC Data Collector library to download bulk financial data from the FFIEC CDR system.

## Installation

First, install the package:

```bash
pip install ffiec-data-collector
```

Or install from source:

```bash
pip install -e .
```

## Basic Usage

In [None]:
from ffiec_data_collector import FFIECDownloader, Product, FileFormat
from pathlib import Path
import pandas as pd

In [None]:
# Initialize the downloader
download_dir = Path("./downloads")
downloader = FFIECDownloader(download_dir=download_dir)

print(f"Downloads will be saved to: {download_dir.absolute()}")

## 1. Available Products

Let's see what data products are available:

In [None]:
# List all available products
products = downloader.get_available_products()

for product in products:
    print(f"- {product.display_name}")
    print(f"  Value: {product.value}")
    print(f"  Type: {'UBPR' if product.is_ubpr else 'Call Report'}")
    print(f"  Periods: {'Single' if product.is_single_period else 'Multiple'}")
    print()

## 2. Get Available Reporting Periods

In [None]:
# Get available periods for Call Reports
periods = downloader.select_product(Product.CALL_SINGLE)

print(f"Available reporting periods for Call Reports:")
print(f"Total: {len(periods)} quarters\n")

# Show the 5 most recent periods
for period in periods[:5]:
    print(f"- {period} (Value: {period.value})")

## 3. Download Latest Call Report Data

In [None]:
# Download the latest Call Report in XBRL format
print("Downloading latest Call Report data...")
result = downloader.download_latest(Product.CALL_SINGLE, FileFormat.XBRL)

if result.success:
    print(f"✓ Download successful!")
    print(f"  File: {result.filename}")
    print(f"  Size: {result.size_bytes:,} bytes")
    print(f"  Path: {result.file_path}")
else:
    print(f"✗ Download failed: {result.error_message}")

## 4. Download Specific Quarter

In [None]:
# Download a specific quarter (e.g., Q1 2024)
quarter = "20240331"  # March 31, 2024

print(f"Downloading Call Report for {quarter}...")
result = downloader.download_cdr_single_period(quarter, FileFormat.TSV)

if result.success:
    print(f"✓ Downloaded: {result.filename}")
    print(f"  Format: Tab-delimited (TSV)")
    print(f"  Size: {result.size_bytes:,} bytes")

## 5. Download UBPR Data

In [None]:
# Download UBPR Ratio data
print("Downloading UBPR Ratio data...")
result = downloader.download(
    product=Product.UBPR_RATIO_SINGLE,
    period="20240331",
    format=FileFormat.XBRL
)

if result.success:
    print(f"✓ Downloaded: {result.filename}")

## 6. Bulk Download Multiple Quarters

In [None]:
# Download multiple quarters
quarters_to_download = ["20240331", "20231231", "20230930", "20230630"]
results = []

for quarter in quarters_to_download:
    print(f"Downloading {quarter}...", end=" ")
    result = downloader.download_cdr_single_period(quarter, FileFormat.XBRL)
    results.append(result)
    
    if result.success:
        print(f"✓ ({result.size_bytes:,} bytes)")
    else:
        print(f"✗ Failed")

print(f"\nCompleted: {sum(1 for r in results if r.success)}/{len(results)} downloads")

## 7. Get Metadata About Available Data

In [None]:
# Get CDR bulk data source information
cdr_info = downloader.get_bulk_data_sources_cdr()

print("Call Report Data Information:")
print(f"Latest published: {cdr_info['published_date']}")
print(f"Available quarters: {len(cdr_info['available_quarters'])}")
print(f"Date range: {cdr_info['available_quarters'][-1]} to {cdr_info['available_quarters'][0]}")

print("\n" + "="*50 + "\n")

# Get UBPR bulk data source information
ubpr_info = downloader.get_bulk_data_sources_ubpr()

print("UBPR Data Information:")
print(f"Latest published: {ubpr_info['published_date']}")
print(f"Available quarters: {len(ubpr_info['available_quarters'])}")

## 8. Download to Memory (Without Saving to Disk)

In [None]:
# Download directly to memory for immediate processing
from io import BytesIO
import zipfile

print("Downloading to memory...")
content = downloader.download(
    product=Product.CALL_SINGLE,
    period="20240331",
    format=FileFormat.TSV,
    save_to_disk=False
)

if isinstance(content, BytesIO):
    print(f"✓ Downloaded {len(content.getvalue()):,} bytes to memory")
    
    # Process the ZIP file directly from memory
    with zipfile.ZipFile(content) as zf:
        print(f"\nZIP contents:")
        for info in zf.filelist[:5]:  # Show first 5 files
            print(f"  - {info.filename} ({info.file_size:,} bytes)")

## 9. Using Validation (Website Structure Monitoring)

In [None]:
from ffiec_data_collector import ValidatedFFIECDownloader, ThumbprintValidator

# Create a validated downloader that checks website structure before downloading
validated_downloader = ValidatedFFIECDownloader(download_dir=download_dir)

# The validator automatically checks if the FFIEC website structure has changed
print("Performing validation check...")

try:
    # This will validate the website structure before downloading
    result = validated_downloader.download(
        product=Product.CALL_SINGLE,
        period="20240331",
        format=FileFormat.XBRL
    )
    print(f"✓ Validation passed and download successful: {result.filename}")
except Exception as e:
    print(f"✗ Validation failed: {e}")

## 10. Check Website Health

In [None]:
# Check if FFIEC websites are healthy and match expected structure
validator = ThumbprintValidator()

print("Validating FFIEC website structure...\n")
results = validator.validate_all()

for page_type, result in results.items():
    status = "✓" if result.get('valid', False) else "✗"
    print(f"{status} {page_type}: {'Valid' if result.get('valid', False) else 'Invalid'}")
    
    if result.get('warnings'):
        for warning in result['warnings']:
            print(f"  ⚠ {warning}")
    
    if result.get('error'):
        print(f"  Error: {result['error']}")

## Advanced Example: Building a Data Pipeline

In [None]:
from datetime import datetime, timedelta
import json

class FFIECDataPipeline:
    """Example data pipeline for FFIEC data"""
    
    def __init__(self, download_dir="./pipeline_data"):
        self.downloader = FFIECDownloader(Path(download_dir))
        self.metadata_file = Path(download_dir) / "metadata.json"
        self.load_metadata()
    
    def load_metadata(self):
        """Load or initialize metadata tracking"""
        if self.metadata_file.exists():
            with open(self.metadata_file) as f:
                self.metadata = json.load(f)
        else:
            self.metadata = {"downloaded_quarters": [], "last_check": None}
    
    def save_metadata(self):
        """Save metadata"""
        self.metadata_file.parent.mkdir(exist_ok=True)
        with open(self.metadata_file, 'w') as f:
            json.dump(self.metadata, f, indent=2)
    
    def check_for_new_data(self):
        """Check if new data is available"""
        cdr_info = self.downloader.get_bulk_data_sources_cdr()
        available = set(cdr_info['available_quarters'])
        downloaded = set(self.metadata['downloaded_quarters'])
        
        new_quarters = available - downloaded
        return sorted(list(new_quarters), reverse=True)
    
    def download_new_quarters(self, max_downloads=5):
        """Download any new quarters available"""
        new_quarters = self.check_for_new_data()
        
        if not new_quarters:
            print("No new data available")
            return []
        
        print(f"Found {len(new_quarters)} new quarters")
        results = []
        
        for quarter in new_quarters[:max_downloads]:
            print(f"Downloading {quarter}...")
            result = self.downloader.download_cdr_single_period(quarter)
            
            if result.success:
                self.metadata['downloaded_quarters'].append(quarter)
                results.append(quarter)
                print(f"  ✓ Success: {result.filename}")
            else:
                print(f"  ✗ Failed: {result.error_message}")
        
        self.metadata['last_check'] = datetime.now().isoformat()
        self.save_metadata()
        
        return results

# Use the pipeline
pipeline = FFIECDataPipeline()
new_data = pipeline.download_new_quarters(max_downloads=2)

if new_data:
    print(f"\nSuccessfully downloaded {len(new_data)} new quarters: {new_data}")
else:
    print("\nNo new data downloaded")

## Summary

This notebook demonstrated:

1. **Basic Downloads** - Downloading Call Reports and UBPR data
2. **Multiple Formats** - Using XBRL and TSV formats
3. **Bulk Operations** - Downloading multiple quarters
4. **Memory Operations** - Processing data without saving to disk
5. **Validation** - Ensuring website structure hasn't changed
6. **Pipeline Example** - Building an automated data pipeline

The FFIEC Data Collector provides a lightweight, reliable way to access FFIEC bulk data programmatically.