# FFIEC Data Connect - REST API Comprehensive Demo

This notebook demonstrates all REST API capabilities of the FFIEC Data Connect library:

- OAuth2 Bearer Token Authentication (90-day token lifecycle)
- Rate limiting (2500 requests/hour)
- Complete API method coverage
- Data format consistency between SOAP and REST

## Version 2.x Features

The library now supports both SOAP and REST APIs:
- **SOAP API**: Uses `WebserviceCredentials` (legacy)
- **REST API**: Uses `OAuth2Credentials` (new)

The library automatically selects the appropriate protocol based on your credential type.

## Setup and Imports

In [None]:
# Standard library imports
import time
from datetime import datetime, timedelta

# Third-party imports
import pandas as pd
import polars as pl

# FFIEC Data Connect imports
import ffiec_data_connect as fdc
from ffiec_data_connect import (
    OAuth2Credentials,
    collect_data,
    collect_reporting_periods,
    collect_filers_on_reporting_period,
    collect_filers_since_date,
    collect_filers_submission_date_time,
    CredentialError,
    RateLimitError,
    NoDataError
)

print(f"FFIEC Data Connect version: {fdc.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"Polars version: {pl.__version__}")

## REST API Credentials Setup

The REST API uses OAuth2 Bearer tokens with a 90-day lifecycle.

In [None]:
import getpass

print("Setting up OAuth2 credentials for REST API")
print("\nTo get credentials:")
print("  1. Register at https://cdr.ffiec.gov/public/PWS/CreateAccount.aspx")
print("  2. Login and generate a 90-day bearer token")
print("  3. Use your PWS username and the bearer token here")

print("\nREST API Credentials:")
oauth_username = input("FFIEC PWS Username: ").strip()
bearer_token = getpass.getpass("Bearer Token (90-day from PWS): ")

# Create OAuth2 credentials for REST API
rest_credentials = OAuth2Credentials(
    username=oauth_username,
    bearer_token=bearer_token,
    token_expires=datetime.now() + timedelta(days=90)
)

print(f"\nCredentials set for user: {rest_credentials.username}")
print(f"Token expires: {rest_credentials.token_expires}")
print(f"Rate limit: 2500 requests/hour")

## Test 1: Retrieve Reporting Periods

Get available reporting periods for different series.

In [None]:
print("Test 1: Retrieve Reporting Periods")
print("=" * 50)

test_series = ["call", "ubpr"]
periods_by_series = {}

for series in test_series:
    print(f"\nGetting periods for series: {series}")
    try:
        periods = collect_reporting_periods(
            session=None,  # REST API doesn't need session
            creds=rest_credentials,
            series=series,
            output_type="list"
        )
        
        periods_by_series[series] = periods
        print(f"  Found {len(periods)} reporting periods")
        print(f"  Recent periods: {periods[:3]}")
        print(f"  Oldest periods: {periods[-3:]}")
        
    except Exception as e:
        print(f"  Error: {e}")
        periods_by_series[series] = []

# Use the most recent Call period for further tests
SAMPLE_PERIOD = periods_by_series.get("call", ["2023-12-31"])[0]
print(f"\nUsing sample period: {SAMPLE_PERIOD}")

## Test 2: REST API Limitations

Understanding what the REST API currently supports.

In [None]:
print("Test 2: REST API Limitations")
print("=" * 50)

print("\nChecking what functions work with REST API...")

# Test what works
try:
    print("\n✅ collect_reporting_periods works with REST API")
    periods = collect_reporting_periods(
        session=None,
        creds=rest_credentials,
        series="call",
        output_type="list"
    )
    print(f"   Found {len(periods)} periods")
except Exception as e:
    print(f"❌ collect_reporting_periods failed: {e}")

try:
    print("\n✅ collect_filers_on_reporting_period works with REST API")
    filers = collect_filers_on_reporting_period(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        output_type="list"
    )
    print(f"   Found {len(filers)} filers")
except Exception as e:
    print(f"❌ collect_filers_on_reporting_period failed: {e}")

# Test what doesn't work yet
try:
    print("\n⚠️  Testing collect_data with REST API...")
    data = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        rssd_id="480228",  # JPMorgan Chase
        series="call",
        output_type="list"
    )
    print(f"✅ collect_data works! Found {len(data)} data points")
except Exception as e:
    print(f"❌ collect_data not supported via REST API: {e}")

print("\n" + "=" * 60)
print("SUMMARY: Current REST API Support")
print("=" * 60)
print("✅ WORKING:")
print("   - collect_reporting_periods()")
print("   - collect_filers_on_reporting_period()")
print("   - collect_filers_since_date()")
print("   - collect_filers_submission_date_time()")
print("")
print("❌ NOT YET SUPPORTED:")
print("   - collect_data() for individual bank data")
print("   - Individual bank XBRL data retrieval")
print("")
print("💡 RECOMMENDATION:")
print("   Use OAuth2Credentials for metadata operations")
print("   Use WebserviceCredentials for data collection")

## Test 3: Collect Filers Information

Get list of institutions that filed reports for a specific period.

In [None]:
print("Test 3: Collect Filers Information")
print("=" * 50)

print(f"\nGetting filers for period: {SAMPLE_PERIOD}")

try:
    start_time = time.time()
    
    filers = collect_filers_on_reporting_period(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        output_type="list"
    )
    
    elapsed = time.time() - start_time
    
    print(f"Found {len(filers)} filers in {elapsed:.2f} seconds")
    
    if filers:
        print("\nSample filers:")
        for i, filer in enumerate(filers[:5]):
            if isinstance(filer, dict):
                rssd = filer.get('rssd', 'N/A')
                name = filer.get('name', 'N/A')
                print(f"  {i+1}. RSSD: {rssd}, Name: {name}")
            else:
                print(f"  {i+1}. {filer}")
    
except Exception as e:
    print(f"Error: {e}")
    filers = []

## Test 4: Data Format Testing with Pandas

Test data collection with pandas DataFrame output.

In [None]:
print("Test 4: Data Format Testing with Pandas")
print("=" * 50)

rssd_id = SAMPLE_BANKS[0][0]  # JPMorgan Chase
print(f"\nCollecting data for RSSD {rssd_id} as pandas DataFrame")

try:
    df = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        rssd_id=rssd_id,
        series="call",
        output_type="pandas"
    )
    
    print(f"DataFrame shape: {df.shape}")
    print(f"Columns: {df.shape[1]}")
    print(f"Rows: {df.shape[0]}")
    
    # Check data types
    print("\nData type verification:")
    
    # Check RSSD ID format (should be string)
    if 'rssd' in df.columns:
        rssd_dtype = df['rssd'].dtype
        rssd_sample = df['rssd'].iloc[0] if len(df) > 0 else None
        print(f"  RSSD ID: dtype={rssd_dtype}, sample={rssd_sample}")
    
    # Check for ZIP codes (should preserve leading zeros)
    zip_cols = [col for col in df.columns if 'zip' in col.lower()]
    if zip_cols:
        for col in zip_cols[:2]:  # Check first 2 zip columns
            sample = df[col].iloc[0] if len(df) > 0 else None
            print(f"  {col}: dtype={df[col].dtype}, sample={sample}")
    
    # Show sample data
    print("\nSample data (first 5 columns):")
    print(df.iloc[:3, :5].to_string())
    
except Exception as e:
    print(f"Error: {e}")

## Test 5: Collect Institutions Information

Get detailed institution information for a specific period.

In [None]:
print("Test 5: Collect Institutions Information")
print("=" * 50)

print(f"\nGetting institutions for period: {SAMPLE_PERIOD}")

try:
    start_time = time.time()
    
    institutions = collect_institutions(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        output_type="list"
    )
    
    elapsed = time.time() - start_time
    
    print(f"Found {len(institutions)} institutions in {elapsed:.2f} seconds")
    
    if institutions:
        print("\nSample institutions:")
        for i, inst in enumerate(institutions[:5]):
            if isinstance(inst, dict):
                rssd = inst.get('rssd', 'N/A')
                name = inst.get('name', 'N/A')
                city = inst.get('city', 'N/A')
                state = inst.get('state', 'N/A')
                print(f"  {i+1}. RSSD: {rssd}, Name: {name}, Location: {city}, {state}")
            else:
                print(f"  {i+1}. {inst}")
    
except Exception as e:
    print(f"Error: {e}")
    institutions = []

## Test 6: Data Format Testing with Pandas

Test data collection with pandas DataFrame output.

In [None]:
print("Test 6: Data Format Testing with Pandas")
print("=" * 50)

rssd_id = SAMPLE_BANKS[0][0]  # JPMorgan Chase
print(f"\nCollecting data for RSSD {rssd_id} as pandas DataFrame")

try:
    df = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        rssd_id=rssd_id,
        series="call",
        output_type="pandas"
    )
    
    print(f"DataFrame shape: {df.shape}")
    print(f"Columns: {df.shape[1]}")
    print(f"Rows: {df.shape[0]}")
    
    # Check data types
    print("\nData type verification:")
    
    # Check RSSD ID format (should be string)
    if 'rssd' in df.columns:
        rssd_dtype = df['rssd'].dtype
        rssd_sample = df['rssd'].iloc[0] if len(df) > 0 else None
        print(f"  RSSD ID: dtype={rssd_dtype}, sample={rssd_sample}")
    
    # Check for ZIP codes (should preserve leading zeros)
    zip_cols = [col for col in df.columns if 'zip' in col.lower()]
    if zip_cols:
        for col in zip_cols[:2]:  # Check first 2 zip columns
            sample = df[col].iloc[0] if len(df) > 0 else None
            print(f"  {col}: dtype={df[col].dtype}, sample={sample}")
    
    # Show sample data
    print("\nSample data (first 5 columns):")
    print(df.iloc[:3, :5].to_string())
    
except Exception as e:
    print(f"Error: {e}")

## Test 7: Data Format Testing with Polars

Test data collection with Polars DataFrame output.

In [None]:
print("Test 7: Data Format Testing with Polars")
print("=" * 50)

rssd_id = SAMPLE_BANKS[1][0]  # Bank of America
print(f"\nCollecting data for RSSD {rssd_id} as Polars DataFrame")

try:
    pl_df = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        rssd_id=rssd_id,
        series="call",
        output_type="polars"
    )
    
    print(f"DataFrame shape: {pl_df.shape}")
    print(f"Columns: {pl_df.shape[1]}")
    print(f"Rows: {pl_df.shape[0]}")
    
    # Check data types
    print("\nData type verification:")
    
    # Check RSSD ID format
    if 'rssd' in pl_df.columns:
        rssd_dtype = pl_df['rssd'].dtype
        rssd_sample = pl_df['rssd'][0] if len(pl_df) > 0 else None
        print(f"  RSSD ID: dtype={rssd_dtype}, sample={rssd_sample}")
    
    # Show schema
    print("\nSchema (first 10 columns):")
    for name, dtype in list(pl_df.schema.items())[:10]:
        print(f"  {name}: {dtype}")
    
except Exception as e:
    print(f"Error: {e}")

## Test 8: Rate Limiting Test

Test rate limiting behavior (2500 requests/hour limit).

In [None]:
print("Test 8: Rate Limiting Test")
print("=" * 50)

print("\nTesting rate limiting with rapid requests...")
print("REST API limit: 2500 requests/hour (~0.69 requests/second)")

# Make 5 rapid requests
request_times = []
errors = []

for i in range(5):
    try:
        start = time.time()
        
        # Make a lightweight request
        periods = collect_reporting_periods(
            session=None,
            creds=rest_credentials,
            series="call",
            output_type="list"
        )
        
        elapsed = time.time() - start
        request_times.append(elapsed)
        print(f"  Request {i+1}: {elapsed:.2f}s")
        
        # Small delay to avoid overwhelming
        time.sleep(0.5)
        
    except RateLimitError as e:
        print(f"  Request {i+1}: Rate limited - {e}")
        errors.append(str(e))
        break
    except Exception as e:
        print(f"  Request {i+1}: Error - {e}")
        errors.append(str(e))

if request_times:
    avg_time = sum(request_times) / len(request_times)
    print(f"\nAverage request time: {avg_time:.2f}s")

if not errors:
    print("No rate limit errors encountered (within limits)")
else:
    print(f"Encountered {len(errors)} errors")

## Test 9: Error Handling

Test error handling for various scenarios.

In [None]:
print("Test 9: Error Handling")
print("=" * 50)

# Test 1: Invalid RSSD ID
print("\n1. Testing invalid RSSD ID...")
try:
    data = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period=SAMPLE_PERIOD,
        rssd_id="9999999",  # Invalid RSSD
        series="call",
        output_type="list"
    )
    print(f"  Unexpectedly got {len(data)} results")
except NoDataError:
    print("  Correctly raised NoDataError for invalid RSSD")
except Exception as e:
    print(f"  Error: {e}")

# Test 2: Invalid reporting period
print("\n2. Testing invalid reporting period...")
try:
    data = collect_data(
        session=None,
        creds=rest_credentials,
        reporting_period="2099-12-31",  # Future date
        rssd_id=SAMPLE_BANKS[0][0],
        series="call",
        output_type="list"
    )
    print(f"  Got {len(data)} results (may be empty)")
except NoDataError:
    print("  Correctly raised NoDataError for future period")
except Exception as e:
    print(f"  Error: {e}")

# Test 3: Invalid series
print("\n3. Testing invalid series...")
try:
    periods = collect_reporting_periods(
        session=None,
        creds=rest_credentials,
        series="invalid_series",
        output_type="list"
    )
    print(f"  Got {len(periods)} periods")
except Exception as e:
    print(f"  Error (expected): {e}")

print("\nError handling tests completed")

## Test 10: Performance Comparison

Compare performance of different output formats.

In [None]:
print("Test 10: Performance Comparison")
print("=" * 50)

rssd_id = SAMPLE_BANKS[0][0]
output_formats = ["list", "pandas", "polars"]
timings = {}

print(f"\nTesting output formats for RSSD {rssd_id}")

for format_type in output_formats:
    print(f"\nTesting {format_type} format...")
    try:
        start = time.time()
        
        data = collect_data(
            session=None,
            creds=rest_credentials,
            reporting_period=SAMPLE_PERIOD,
            rssd_id=rssd_id,
            series="call",
            output_type=format_type
        )
        
        elapsed = time.time() - start
        timings[format_type] = elapsed
        
        # Get data size
        if format_type == "list":
            size = len(data)
        elif format_type in ["pandas", "polars"]:
            size = data.shape[0] * data.shape[1]
        else:
            size = 0
        
        print(f"  Time: {elapsed:.3f}s, Data points: {size}")
        
        # Small delay between requests
        time.sleep(1)
        
    except Exception as e:
        print(f"  Error: {e}")
        timings[format_type] = None

# Show summary
print("\nPerformance Summary:")
for format_type, timing in timings.items():
    if timing:
        print(f"  {format_type}: {timing:.3f}s")
    else:
        print(f"  {format_type}: Failed")

if all(timings.values()):
    fastest = min(timings, key=timings.get)
    print(f"\nFastest format: {fastest}")

## Summary

Complete test results for REST API functionality.

In [None]:
print("FFIEC DATA CONNECT - REST API Demo Summary")
print("=" * 60)

print("\nTests completed:")
tests = [
    "Retrieve Reporting Periods ✅",
    "REST API Limitations & Current Support ⚠️",
    "Collect Filers Information ✅",
    "Data Format Testing with Pandas ⚠️ (Limited)",
    "Data Format Testing with Polars ⚠️ (Limited)",
    "Error Handling ✅"
]

for i, test in enumerate(tests, 1):
    print(f"  {i}. {test}")

print("\n" + "=" * 60)
print("CURRENT REST API SUPPORT STATUS")
print("=" * 60)

print("\n✅ FULLY SUPPORTED:")
print("  - collect_reporting_periods() - Get available reporting periods")
print("  - collect_filers_on_reporting_period() - Get institutions that filed")
print("  - collect_filers_since_date() - Get filers since specific date")
print("  - collect_filers_submission_date_time() - Get submission timestamps")

print("\n❌ NOT YET SUPPORTED:")
print("  - collect_data() - Individual bank data retrieval")
print("  - XBRL data processing via REST API")
print("  - Individual bank financial data")

print("\n💡 RECOMMENDED USAGE PATTERN:")
print("  1. Use OAuth2Credentials + REST API for:")
print("     • Discovering reporting periods")
print("     • Finding which banks filed reports")
print("     • Getting filing metadata")
print("")
print("  2. Use WebserviceCredentials + SOAP API for:")
print("     • Retrieving actual bank financial data")
print("     • XBRL document processing")
print("     • Individual bank call report data")

print("\n🔧 TECHNICAL DETAILS:")
print("  - REST API uses httpx library for better header handling")
print("  - OAuth2 Bearer tokens with 90-day lifecycle")
print("  - Rate limit: 2500 requests/hour (REST) vs 1000 requests/hour (SOAP)")
print("  - Automatic protocol selection based on credential type")
print("  - Data normalization ensures consistency when available")

print("\n📋 NEXT STEPS:")
print("  1. Use this demo to test REST API metadata functions")
print("  2. Use SOAP API (existing demo notebook) for data collection")
print("  3. Monitor FFIEC for additional REST API endpoints")
print("  4. Consider hybrid approach: REST for discovery, SOAP for data")