# Analyzing Demographic Trends

This notebook demonstrates how to analyze demographic trends using Census data. We'll focus on:
- Population changes over time
- Age distribution analysis
- Income and poverty trends
- Educational attainment changes

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from census import Census
import seaborn as sns

# Set plotting style
plt.style.use('seaborn')
%matplotlib inline

# Initialize Census API
api_key = os.getenv('CENSUS_API_KEY')
if not api_key:
    api_key = input('Enter your Census API key: ')
c = Census(api_key)

## Population Changes Over Time

Let's analyze population changes across multiple years to identify trends and patterns.

In [None]:
def get_state_population(year):
    """Get population for all states in a given year"""
    data = c.acs5.state(('NAME', 'B01003_001E'), '*', year=year)
    df = pd.DataFrame(data)
    df['year'] = year
    df = df.rename(columns={'B01003_001E': 'population'})
    return df

# Get population data for multiple years
years = range(2015, 2020)
population_data = pd.concat([get_state_population(year) for year in years])
population_data['population'] = pd.to_numeric(population_data['population'])

# Pivot the data for easier trending
pop_trend = population_data.pivot(index='NAME', columns='year', values='population')

# Calculate growth rates
pop_trend['growth_rate'] = (pop_trend[2019] - pop_trend[2015]) / pop_trend[2015] * 100

# Show top 5 fastest growing states
print("Top 5 Fastest Growing States (2015-2019):")
print(pop_trend.nlargest(5, 'growth_rate')[['growth_rate']])

## Age Distribution Analysis

Understanding the age structure of populations is crucial for many stories, from education to healthcare.

In [None]:
# Get age distribution data
age_vars = {
    'B01001_003E': 'under_5',
    'B01001_004E': '5_to_9',
    'B01001_005E': '10_to_14',
    'B01001_006E': '15_to_17',
    'B01001_007E': '18_to_19',
    'B01001_008E': '20_to_24',
    'B01001_009E': '25_to_29',
    'B01001_010E': '30_to_34'
}

variables = ['NAME'] + list(age_vars.keys())
age_data = c.acs5.state(variables, '*', year=2019)
age_df = pd.DataFrame(age_data)

# Rename columns
for code, name in age_vars.items():
    age_df[name] = pd.to_numeric(age_df[code])
    age_df = age_df.drop(columns=[code])

# Calculate percentages
total = age_df[list(age_vars.values())].sum(axis=1)
for col in age_vars.values():
    age_df[f'{col}_pct'] = age_df[col] / total * 100

# Show states with highest percentage of young people (under 18)
young_cols = ['under_5_pct', '5_to_9_pct', '10_to_14_pct', '15_to_17_pct']
age_df['youth_percentage'] = age_df[young_cols].sum(axis=1)

print("\nStates with Highest Youth Population Percentage:")
print(age_df.nlargest(5, 'youth_percentage')[['NAME', 'youth_percentage']])

## Income and Poverty Trends

Analyzing economic indicators across different geographic areas and over time.

In [None]:
# Get income and poverty data
income_vars = (
    'NAME',
    'B19013_001E',  # Median household income
    'B17001_002E'   # Population in poverty
)

income_data = c.acs5.state(income_vars, '*', year=2019)
income_df = pd.DataFrame(income_data)

# Clean up the data
income_df = income_df.rename(columns={
    'B19013_001E': 'median_income',
    'B17001_002E': 'poverty_pop'
})

income_df[['median_income', 'poverty_pop']] = \
    income_df[['median_income', 'poverty_pop']].apply(pd.to_numeric)

# Sort by median income
print("\nTop 5 States by Median Household Income:")
print(income_df.nlargest(5, 'median_income')[['NAME', 'median_income']])

## Educational Attainment Changes

Track changes in educational attainment levels across different regions.

In [None]:
# Get educational attainment data
edu_vars = {
    'B15003_017E': 'high_school',
    'B15003_021E': 'bachelors',
    'B15003_022E': 'masters',
    'B15003_023E': 'professional',
    'B15003_024E': 'doctorate'
}

variables = ['NAME'] + list(edu_vars.keys())
edu_data = c.acs5.state(variables, '*', year=2019)
edu_df = pd.DataFrame(edu_data)

# Clean up the data
for code, name in edu_vars.items():
    edu_df[name] = pd.to_numeric(edu_df[code])
    edu_df = edu_df.drop(columns=[code])

# Calculate percentage with bachelor's degree or higher
edu_df['higher_education'] = edu_df[['bachelors', 'masters', 'professional', 'doctorate']].sum(axis=1)

# Show top states by higher education
print("\nTop 5 States by Percentage with Bachelor's Degree or Higher:")
print(edu_df.nlargest(5, 'higher_education')[['NAME', 'higher_education']])

## Exporting Results

Save your analysis results for use in your stories.