# Census Data Visualization

This notebook demonstrates various ways to visualize Census data for journalistic storytelling, including:
- Maps and geographic visualizations
- Time series plots
- Comparative charts
- Interactive visualizations

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from census import Census
import json
import folium

# Set up plotting defaults
plt.style.use('seaborn')
%matplotlib inline

# Initialize Census API
api_key = os.getenv('CENSUS_API_KEY')
if not api_key:
    api_key = input('Enter your Census API key: ')
c = Census(api_key)

## 1. Geographic Visualizations

Creating maps to show geographic patterns in Census data.

In [None]:
# Get state population data
pop_data = c.acs5.state(('NAME', 'B01003_001E'), '*', year=2019)
pop_df = pd.DataFrame(pop_data)
pop_df = pop_df.rename(columns={'B01003_001E': 'population'})
pop_df['population'] = pd.to_numeric(pop_df['population'])

# Create a choropleth map using plotly
fig = px.choropleth(
    pop_df,
    locations=pop_df.index,
    locationmode="USA-states",
    color="population",
    scope="usa",
    title="U.S. State Populations (2019)",
    color_continuous_scale="Viridis"
)
fig.show()

## 2. Time Series Visualizations

Showing trends over time using line charts and area plots.

In [None]:
# Get population data for multiple years
def get_population_by_year(year):
    data = c.acs5.state(('NAME', 'B01003_001E'), ['06', '36', '48'], year=year)  # CA, NY, TX
    df = pd.DataFrame(data)
    df['year'] = year
    df['population'] = pd.to_numeric(df['B01003_001E'])
    return df

# Collect data from 2015-2019
years = range(2015, 2020)
time_series_data = pd.concat([get_population_by_year(year) for year in years])

# Create an interactive line plot
fig = px.line(
    time_series_data,
    x='year',
    y='population',
    color='NAME',
    title='Population Trends: California, New York, and Texas (2015-2019)'
)
fig.show()

## 3. Comparative Visualizations

Creating charts that compare different metrics across states or regions.

In [None]:
# Get income and education data
variables = (
    'NAME',
    'B19013_001E',  # Median household income
    'B15003_022E'   # Bachelor's degree
)

comp_data = c.acs5.state(variables, '*', year=2019)
comp_df = pd.DataFrame(comp_data)
comp_df = comp_df.rename(columns={
    'B19013_001E': 'median_income',
    'B15003_022E': 'bachelors_degree'
})

# Convert to numeric
comp_df[['median_income', 'bachelors_degree']] = \
    comp_df[['median_income', 'bachelors_degree']].apply(pd.to_numeric)

# Create a scatter plot with state labels
fig = px.scatter(
    comp_df,
    x='median_income',
    y='bachelors_degree',
    text='NAME',
    title="Median Income vs. Bachelor's Degree Holders by State",
    labels={
        'median_income': 'Median Household Income ($)',
        'bachelors_degree': "Number of Bachelor's Degree Holders"
    }
)
fig.show()

## 4. Distribution Visualizations

Showing the distribution of various demographic metrics.

In [None]:
# Get age distribution data
age_vars = {
    'B01001_003E': 'Under 5',
    'B01001_004E': '5-9',
    'B01001_005E': '10-14',
    'B01001_006E': '15-17',
    'B01001_007E': '18-19',
    'B01001_008E': '20-24'
}

variables = ['NAME'] + list(age_vars.keys())
age_data = c.acs5.state(['06'], year=2019)  # California
age_df = pd.DataFrame(age_data)

# Prepare data for visualization
age_values = []
age_groups = []
for code, name in age_vars.items():
    value = int(age_df.iloc[0][code])
    age_values.append(value)
    age_groups.append(name)

# Create a bar chart
fig = go.Figure(data=[
    go.Bar(x=age_groups, y=age_values)
])
fig.update_layout(
    title='Age Distribution in California (2019)',
    xaxis_title='Age Group',
    yaxis_title='Population'
)
fig.show()

## 5. Exporting Visualizations

Save your visualizations in various formats for publication.

In [None]:
# Create output directory if it doesn't exist
output_dir = '../output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save static versions of the visualizations
def save_static_plots():
    # Population map
    plt.figure(figsize=(15, 10))
    sns.scatterplot(data=pop_df, x='NAME', y='population')
    plt.xticks(rotation=45, ha='right')
    plt.title('State Populations (2019)')
    plt.tight_layout()
    plt.savefig(f'{output_dir}/state_populations.png', dpi=300, bbox_inches='tight')
    
    # Income vs Education scatter plot
    plt.figure(figsize=(15, 10))
    sns.scatterplot(data=comp_df, x='median_income', y='bachelors_degree')
    plt.title('Median Income vs. Education Level')
    plt.tight_layout()
    plt.savefig(f'{output_dir}/income_vs_education.png', dpi=300, bbox_inches='tight')

save_static_plots()
print(f"Visualizations saved to {output_dir}/")