In [105]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import gkey
url = Census(gkey, year=2018)

In [106]:
# Pulling census data

census_data = url.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E"), {'for': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "NAME": "Name", "state": "State"})

# Final DataFrame
census_pd = census_pd[["State", "Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income"]]

census_pd.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income
0,27,Minnesota,5527358.0,37.9,68411.0,36245.0
1,28,Mississippi,2988762.0,37.2,43567.0,23434.0
2,29,Missouri,6090062.0,38.5,53560.0,29537.0
3,30,Montana,1041732.0,39.8,52559.0,29765.0
4,31,Nebraska,1904760.0,36.4,59116.0,31101.0


In [107]:
#Creating Region Lists
Northeast = ['Maine', 'Massachusetts', 'Rhode Island','Connecticut',
            'New Hampshire', 'Vermont', 'New York', 'Pennsylvania',
            'New Jersey', 'Delaware', 'Maryland']
Southeast = ['West Virginia', 'Virginia, Kentucky', 'Tennessee',
             'North Carolina', 'South Carolina', 'Georgia', 'Alabama',
             'Mississippi', 'Arkansas', 'Louisiana', 'Florida']
Midwest = ['Ohio', 'Indiana', 'Michigan', 'Illinois',
           'Missouri', 'Wisconsin', 'Minnesota', 'Iowa',
           'Kansas', 'Nebraska', 'South Dakota', 'North Dakota']
Southwest = ['Texas', 'Oklahoma', 'New Mexico', 'Arizona']
West = ['Colorado', 'Wyoming', 'Montana', 'Idaho',
        'Washington', 'Oregon', 'Utah', 'Nevada',
        'California', 'Alaska', 'Hawaii']


In [108]:
#Adding Region to the Dataframe
census_pd['Region'] = np.where(census_pd['Name'].isin(Northeast), 'Northeast',
                      np.where(census_pd['Name'].isin(Southeast), 'Southeast',
                      np.where(census_pd['Name'].isin(Midwest), 'Midwest',
                      np.where(census_pd['Name'].isin(Southwest), 'Southwest',
                      np.where(census_pd['Name'].isin(West), 'West',
                      'Unknown')))))


In [109]:
census_pd.head()
census_pd.to_csv("output_data/census_data_state.csv", encoding="utf-8", index=False)

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Region
0,27,Minnesota,5527358.0,37.9,68411.0,36245.0,Midwest
1,28,Mississippi,2988762.0,37.2,43567.0,23434.0,Southeast
2,29,Missouri,6090062.0,38.5,53560.0,29537.0,Midwest
3,30,Montana,1041732.0,39.8,52559.0,29765.0,West
4,31,Nebraska,1904760.0,36.4,59116.0,31101.0,Midwest


In [110]:
#Getting Population, Median Age, & Avg Income by Region
Region_Pop = census_pd.groupby('Region')['Population'].sum()
Region_Med_Age = census_pd.groupby('Region')['Median Age'].median()
Region_Avg_Income = census_pd.groupby('Region')['Household Income'].mean()


In [111]:
#Creating a Region Dataframe
Region_group_df = pd.DataFrame({'Population' : Region_Pop,
                              'Median Age' : Region_Med_Age,
                              'Household Income' : Region_Avg_Income
                             })
Region_group_df

Unnamed: 0_level_0,Population,Median Age,Household Income
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Midwest,68016629.0,37.9,58636.75
Northeast,63014129.0,40.2,68905.818182
Southeast,69995044.0,38.65,49398.8
Southwest,40842451.0,36.9,53816.5
Unknown,16925417.0,38.4,55681.5
West,67496301.0,37.3,65294.090909


In [112]:
#Formating Population and Household Income
Region_group_df['Population'] = Region_group_df['Population'].map('{:,.0f}'.format)
Region_group_df['Median Age'] = Region_group_df['Median Age'].map('{:.1f}'.format)
Region_group_df['Household Income'] = Region_group_df['Household Income'].map('${:,.0f}'.format)

In [113]:
Region_group_df

Unnamed: 0_level_0,Population,Median Age,Household Income
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Midwest,68016629,37.9,"$58,637"
Northeast,63014129,40.2,"$68,906"
Southeast,69995044,38.7,"$49,399"
Southwest,40842451,36.9,"$53,816"
Unknown,16925417,38.4,"$55,682"
West,67496301,37.3,"$65,294"


In [114]:
Clean_Region_group_df = Region_group_df.drop('Unknown') 

In [115]:
Clean_Region_group_df

Unnamed: 0_level_0,Population,Median Age,Household Income
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Midwest,68016629,37.9,"$58,637"
Northeast,63014129,40.2,"$68,906"
Southeast,69995044,38.7,"$49,399"
Southwest,40842451,36.9,"$53,816"
West,67496301,37.3,"$65,294"


In [116]:
Clean_Region_group_df.to_csv("output_data/census_data_region.csv", encoding="utf-8")