# API Exploration - Portal da Transparência

This notebook explores all available endpoints in the Portal da Transparência API.

## Objectives:
1. Explore all API endpoints
2. Understand data structures
3. Identify key fields and relationships
4. Document API capabilities

In [None]:
# Setup
import os
import sys
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any

# Add project root to path
sys.path.insert(0, str(Path().absolute().parent.parent))

from src.api.client import TransparenciaAPIClient

# Initialize client
client = TransparenciaAPIClient()
print("API Client initialized successfully")

## 1. Endpoint Overview

In [None]:
# Get all endpoints and categorize them
endpoints = client.get_available_endpoints()

# Categorize endpoints
categories = {}
for name, path in endpoints.items():
    category = name.split('_')[0].title()
    if category not in categories:
        categories[category] = []
    categories[category].append({
        'name': name,
        'path': path,
        'description': name.replace('_', ' ').title()
    })

# Create summary DataFrame
summary_data = []
for category, items in categories.items():
    summary_data.append({
        'Category': category,
        'Endpoints': len(items),
        'Examples': ', '.join([item['name'] for item in items[:3]])
    })

summary_df = pd.DataFrame(summary_data)
print("API Endpoints by Category:")
print(summary_df.to_string(index=False))

# Visualize endpoint distribution
fig = px.pie(
    summary_df, 
    values='Endpoints', 
    names='Category',
    title='API Endpoints Distribution by Category'
)
fig.show()

## 2. Despesas (Expenses) Exploration

In [None]:
# Explore Contratos (Contracts)
print("Exploring Contracts endpoint...")

try:
    contracts = client.get_contratos(pagina=1, quantidade=5)
    
    if contracts:
        # Convert to DataFrame for better visualization
        df_contracts = pd.DataFrame(contracts)
        
        print(f"\nNumber of contracts fetched: {len(contracts)}")
        print(f"\nColumns available: {', '.join(df_contracts.columns)}")
        
        # Show data types
        print("\nData types:")
        print(df_contracts.dtypes)
        
        # Sample data
        print("\nSample contract:")
        if len(contracts) > 0:
            for key, value in contracts[0].items():
                print(f"  {key}: {value}")
    else:
        print("No contracts data available")
        
except Exception as e:
    print(f"Error fetching contracts: {e}")

In [None]:
# Explore Pagamentos (Payments)
print("Exploring Payments endpoint...")

try:
    payments = client.get_pagamentos(pagina=1, quantidade=10)
    
    if payments:
        df_payments = pd.DataFrame(payments)
        
        print(f"\nNumber of payments fetched: {len(payments)}")
        print(f"\nColumns available: {', '.join(df_payments.columns)}")
        
        # Analyze payment values if available
        value_columns = [col for col in df_payments.columns if 'valor' in col.lower()]
        if value_columns:
            print(f"\nValue columns found: {value_columns}")
            for col in value_columns:
                if pd.api.types.is_numeric_dtype(df_payments[col]):
                    print(f"\n{col} statistics:")
                    print(df_payments[col].describe())
    else:
        print("No payment data available")
        
except Exception as e:
    print(f"Error fetching payments: {e}")

## 3. Servidores (Public Servants) Exploration

In [None]:
# Explore Servidores
print("Exploring Public Servants endpoint...")

try:
    # Note: This endpoint might require specific parameters
    servidores = client.get_servidores(pagina=1, quantidade=5)
    
    if servidores:
        df_servidores = pd.DataFrame(servidores)
        
        print(f"\nNumber of servants fetched: {len(servidores)}")
        print(f"\nColumns available: {', '.join(df_servidores.columns)}")
        
        # Analyze structure
        print("\nSample servant data:")
        if len(servidores) > 0:
            for key, value in servidores[0].items():
                print(f"  {key}: {value}")
    else:
        print("No servants data available")
        print("Note: This endpoint might require specific search parameters")
        
except Exception as e:
    print(f"Error fetching servants: {e}")

## 4. Benefícios (Benefits) Exploration

In [None]:
# Explore Bolsa Família
print("Exploring Bolsa Família endpoint...")

try:
    # This endpoint might require specific parameters like municipality code and date
    # Example: codigoIbge and mesAno
    bolsa_familia = client.get_bolsa_familia(
        pagina=1, 
        quantidade=5
    )
    
    if bolsa_familia:
        print(f"\nBolsa Família data fetched successfully")
        print(f"Number of records: {len(bolsa_familia)}")
        
        if len(bolsa_familia) > 0:
            print("\nSample record:")
            for key, value in bolsa_familia[0].items():
                print(f"  {key}: {value}")
    else:
        print("No Bolsa Família data available")
        print("Note: This endpoint typically requires municipality code (codigoIbge) and month/year (mesAno)")
        
except Exception as e:
    print(f"Error fetching Bolsa Família data: {e}")
    print("This endpoint might require specific parameters")

## 5. Licitações (Bidding) Exploration

In [None]:
# Explore Licitações
print("Exploring Bidding processes endpoint...")

try:
    licitacoes = client.get_licitacoes(pagina=1, quantidade=10)
    
    if licitacoes:
        df_licitacoes = pd.DataFrame(licitacoes)
        
        print(f"\nNumber of bidding processes fetched: {len(licitacoes)}")
        print(f"\nColumns available: {', '.join(df_licitacoes.columns)}")
        
        # Analyze bidding types if available
        if 'modalidade' in df_licitacoes.columns:
            print("\nBidding modalities:")
            print(df_licitacoes['modalidade'].value_counts())
        
        # Show sample
        print("\nSample bidding process:")
        if len(licitacoes) > 0:
            for key, value in licitacoes[0].items():
                print(f"  {key}: {value}")
    else:
        print("No bidding data available")
        
except Exception as e:
    print(f"Error fetching bidding data: {e}")

## 6. Sanções (Sanctions) Exploration

In [None]:
# Explore sanctioned companies
print("Exploring Sanctioned Companies (CEIS)...")

try:
    sanctioned = client.get_empresas_sancionadas(tipo="ceis", pagina=1, quantidade=5)
    
    if sanctioned:
        df_sanctioned = pd.DataFrame(sanctioned)
        
        print(f"\nNumber of sanctioned companies fetched: {len(sanctioned)}")
        print(f"\nColumns available: {', '.join(df_sanctioned.columns)}")
        
        # Show sample
        print("\nSample sanctioned company:")
        if len(sanctioned) > 0:
            for key, value in sanctioned[0].items():
                print(f"  {key}: {value}")
    else:
        print("No sanctions data available")
        
except Exception as e:
    print(f"Error fetching sanctions data: {e}")

## 7. API Response Analysis

In [None]:
# Analyze response patterns across different endpoints
endpoint_analysis = []

test_endpoints = [
    ("Órgãos", lambda: client.get_orgaos(sistema="siafi", pagina=1, quantidade=5)),
    ("Contratos", lambda: client.get_contratos(pagina=1, quantidade=5)),
    ("Fornecedores", lambda: client.get_fornecedores(pagina=1, quantidade=5)),
    ("Licitações", lambda: client.get_licitacoes(pagina=1, quantidade=5))
]

for name, fetch_func in test_endpoints:
    try:
        print(f"\nAnalyzing {name}...")
        data = fetch_func()
        
        if data and len(data) > 0:
            # Analyze first item
            first_item = data[0]
            
            analysis = {
                'Endpoint': name,
                'Records': len(data),
                'Fields': len(first_item.keys()),
                'Field Names': ', '.join(list(first_item.keys())[:5]) + '...',
                'Has IDs': any('id' in k.lower() or 'codigo' in k.lower() for k in first_item.keys()),
                'Has Dates': any('data' in k.lower() for k in first_item.keys()),
                'Has Values': any('valor' in k.lower() for k in first_item.keys())
            }
            
            endpoint_analysis.append(analysis)
            print(f"  ✓ Success: {len(data)} records, {len(first_item.keys())} fields")
        else:
            print(f"  ⚠ No data returned")
            
    except Exception as e:
        print(f"  ✗ Error: {str(e)[:100]}")

# Create analysis DataFrame
if endpoint_analysis:
    analysis_df = pd.DataFrame(endpoint_analysis)
    print("\n" + "="*80)
    print("Endpoint Analysis Summary:")
    print("="*80)
    print(analysis_df.to_string(index=False))

## 8. Data Relationships Discovery

In [None]:
# Identify common fields across endpoints to understand relationships
print("Discovering data relationships...\n")

field_mapping = {}
common_fields = []

# Collect fields from different endpoints
endpoints_to_check = [
    ("Órgãos", lambda: client.get_orgaos(sistema="siafi", pagina=1, quantidade=2)),
    ("Contratos", lambda: client.get_contratos(pagina=1, quantidade=2)),
    ("Fornecedores", lambda: client.get_fornecedores(pagina=1, quantidade=2))
]

for name, fetch_func in endpoints_to_check:
    try:
        data = fetch_func()
        if data and len(data) > 0:
            fields = set(data[0].keys())
            field_mapping[name] = fields
            print(f"{name}: {len(fields)} fields")
    except:
        print(f"{name}: Unable to fetch data")

# Find common fields
if len(field_mapping) > 1:
    all_endpoints = list(field_mapping.keys())
    
    # Find fields that appear in multiple endpoints
    field_occurrences = {}
    for endpoint, fields in field_mapping.items():
        for field in fields:
            if field not in field_occurrences:
                field_occurrences[field] = []
            field_occurrences[field].append(endpoint)
    
    # Show common fields
    print("\nPotential relationship fields (appearing in multiple endpoints):")
    for field, endpoints in field_occurrences.items():
        if len(endpoints) > 1:
            print(f"  - {field}: {', '.join(endpoints)}")
            common_fields.append(field)
    
    if not common_fields:
        print("  No common fields found across endpoints")

## 9. Export API Documentation

In [None]:
# Generate API documentation
api_doc = {
    "api_info": {
        "name": "Portal da Transparência API",
        "base_url": client.BASE_URL,
        "rate_limit": f"{client.rate_limit} requests/minute",
        "documentation_generated": datetime.now().isoformat()
    },
    "endpoints": {}
}

# Document each endpoint category
for category, items in categories.items():
    api_doc["endpoints"][category] = []
    for item in items:
        endpoint_doc = {
            "name": item['name'],
            "path": item['path'],
            "description": item['description']
        }
        api_doc["endpoints"][category].append(endpoint_doc)

# Save documentation
docs_dir = Path().absolute().parent.parent / "docs"
docs_dir.mkdir(exist_ok=True)

with open(docs_dir / "api_endpoints.json", "w", encoding="utf-8") as f:
    json.dump(api_doc, f, ensure_ascii=False, indent=2)

print("API documentation exported to docs/api_endpoints.json")

# Display summary
print(f"\nDocumented {len(endpoints)} endpoints across {len(categories)} categories")

## 10. Key Findings Summary

In [None]:
# Summarize key findings
print("API EXPLORATION SUMMARY")
print("=" * 80)
print(f"\nTotal endpoints available: {len(endpoints)}")
print(f"Categories: {', '.join(categories.keys())}")

print("\nKey Observations:")
print("1. The API provides comprehensive access to Brazilian government transparency data")
print("2. Data is organized into logical categories (Expenses, Servants, Benefits, etc.)")
print("3. Most endpoints support pagination for large datasets")
print("4. Common fields suggest relationships between different data types")
print("5. Rate limiting is in place (30 requests/minute)")

print("\nRecommended Next Steps:")
print("1. Analyze data quality for specific use cases")
print("2. Design data collection strategy based on update frequency")
print("3. Implement data models to capture relationships")
print("4. Create automated data pipelines for regular updates")
print("5. Build visualizations and dashboards for insights")

print("\nPotential Use Cases:")
print("- Government spending analysis")
print("- Contractor and supplier monitoring")
print("- Public servant salary analysis")
print("- Social benefit distribution studies")
print("- Procurement process transparency")