# Census API Basics

This notebook demonstrates how to access and work with the Census Bureau API. You'll learn:
- How to set up your Census API key
- Basic API query structure
- Different Census datasets available
- Common data variables and their meanings

In [None]:
# Import required libraries
import os
import pandas as pd
from census import Census
import requests

# Set up warning handling
import warnings
warnings.filterwarnings('ignore')

## Setting Up Your Census API Key

To use the Census Bureau API, you need an API key. Get one for free at:
https://api.census.gov/data/key_signup.html

In [None]:
# Get Census API key from environment variable
api_key = os.getenv('CENSUS_API_KEY')

# If not set in environment, you can enter it here
if not api_key:
    api_key = input('Enter your Census API key: ')

# Initialize Census API client
c = Census(api_key)

## Available Census Datasets

The Census Bureau provides several datasets:
- ACS (American Community Survey): 1-year and 5-year estimates
- Decennial Census: Complete count every 10 years
- Population Estimates
- Economic Indicators

Let's explore how to access each type.

In [None]:
# Example queries for different datasets

# 1. ACS 5-year estimates (most detailed, most reliable)
acs5_example = c.acs5.state(('NAME', 'B01003_001E'), '06', year=2019)
print("ACS 5-year estimate (2019) - California population:")
print(f"{int(acs5_example[0]['B01003_001E']):,}\n")

# 2. ACS 1-year estimates (most current, but less reliable for small areas)
try:
    acs1_example = c.acs1.state(('NAME', 'B01003_001E'), '06', year=2019)
    print("ACS 1-year estimate (2019) - California population:")
    print(f"{int(acs1_example[0]['B01003_001E']):,}\n")
except:
    print("ACS 1-year data not available for this query\n")

# 3. Population estimates
pop_example = c.pep.population(vars=('POP', 'NAME'), geo={'for': 'state:06'})
print("Population estimate - California:")
print(f"{int(pop_example[0]['POP']):,}\n")

## Understanding Census Variables

Census variables are coded. Here's how to find and use common variables:
- B01003_001E: Total population
- B19013_001E: Median household income
- B01002_001E: Median age
- B02001_002E: White alone population
- B02001_003E: Black alone population

In [None]:
# Let's create a function to fetch multiple variables at once
def get_demographic_snapshot(state_fips):
    variables = (
        'NAME',
        'B01003_001E',  # Total population
        'B19013_001E',  # Median household income
        'B01002_001E',  # Median age
        'B02001_002E',  # White alone
        'B02001_003E'   # Black alone
    )
    
    data = c.acs5.state(variables, state_fips, year=2019)
    return pd.DataFrame(data)

# Get data for California (06) and New York (36)
states = ['06', '36']
all_data = pd.concat([get_demographic_snapshot(state) for state in states])

# Clean up column names
all_data = all_data.rename(columns={
    'B01003_001E': 'total_population',
    'B19013_001E': 'median_household_income',
    'B01002_001E': 'median_age',
    'B02001_002E': 'white_population',
    'B02001_003E': 'black_population'
})

# Convert to numeric
numeric_cols = ['total_population', 'median_household_income', 'median_age', 
                'white_population', 'black_population']
all_data[numeric_cols] = all_data[numeric_cols].apply(pd.to_numeric)

print("Demographic snapshot for California and New York:")
print(all_data)

## Error Handling and Best Practices

1. Always check your API rate limits
2. Use try/except blocks for robust code
3. Cache results for frequently used queries
4. Be mindful of data vintage (reference year)

In [None]:
# Example of proper error handling and caching
import json
from pathlib import Path
import time

def get_cached_data(cache_file, fetch_func):
    # Check if cached data exists and is less than 1 day old
    cache_path = Path(cache_file)
    if cache_path.exists() and (time.time() - cache_path.stat().st_mtime < 86400):
        with open(cache_file, 'r') as f:
            return pd.DataFrame(json.load(f))
    
    # If no cache or old cache, fetch new data
    try:
        data = fetch_func()
        # Save to cache
        with open(cache_file, 'w') as f:
            json.dump(data.to_dict(), f)
        return data
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        # If error occurs and cache exists, use old cache
        if cache_path.exists():
            with open(cache_file, 'r') as f:
                return pd.DataFrame(json.load(f))
        raise

# Example usage
def fetch_state_populations():
    return pd.DataFrame(c.acs5.state(('NAME', 'B01003_001E'), '*', year=2019))

# Get data with caching
try:
    pop_data = get_cached_data('../output/state_populations_cache.json', fetch_state_populations)
    print("Successfully retrieved population data!")
    print(f"Number of states/territories: {len(pop_data)}")
except Exception as e:
    print(f"Failed to retrieve data: {str(e)}")