In [1]:
import pandas as pd
import json
import numpy as np

In [2]:
from censusAPI import myAPI

### 2000 & 2010 Decennial Census  - Total Population by County - Full U.S.

In [3]:
#variables
cols = f'NAME,P001001'

In [4]:
#build table of all US counties for 2000 and 2010
years = ['2000','2010']
df = pd.DataFrame()
for y in years:
    base_url = f'https://api.census.gov/data/{y}/dec/sf1'
    data_url = f'{base_url}?get={cols}&for=county:*&in=state:*&key={myAPI}'
    dff = pd.read_json(data_url)
    dff.columns = dff.iloc[0]
    dff = dff[1:]
    dff['stco'] = dff.state + dff.county
    dff['yr'] = y
    df = pd.concat([df,dff])

In [5]:
df = pd.pivot_table(df,values='P001001',index=['stco','NAME'],columns='yr',\
                    aggfunc=np.sum,fill_value=0,margins=False).reset_index()

In [6]:
for y in years:
    df[f'{y}'] = df[f'{y}'].astype(int)

In [7]:
df.head()

yr,stco,NAME,2000,2010
0,1001,"Autauga County, Alabama",43671,54571
1,1003,"Baldwin County, Alabama",140415,182265
2,1005,"Barbour County, Alabama",29038,27457
3,1007,"Bibb County, Alabama",20826,22915
4,1009,"Blount County, Alabama",51024,57322


In [8]:
#export all us counties 2000 & 2010 pop to csv
df.to_csv('output/dec_us_counties.csv')

### Recode US counties to major U.S. metrros CSAs and calculate 2000 and 2010 pop

##### This is done to maintain geographic border consistency over time.

In [9]:
# pull in list of counties in CSAs for major metros & reformat
metros = pd.read_csv('../data/geo/usmetros_cnty.csv')
metros['stco_id'] = metros['stco'].apply(lambda x:'{0:0>5}'.format(x))

In [10]:
# merge with Census data table
csa = pd.merge(metros,df,how='left',left_on='stco_id',right_on='stco')

In [11]:
# sum for CSAs
csa = csa[['csa_id','csa_name','2000','2010']]
csa = csa.groupby(['csa_id','csa_name']).sum().reset_index()

In [12]:
csa.head()

Unnamed: 0,csa_id,csa_name,2000,2010
0,122,"Atlanta--Athens-Clarke County--Sandy Springs, ...",4923371,6054858
1,148,"Boston-Worcester-Providence, MA-RI-NH-CT",7630016,7893376
2,172,"Charlotte-Concord, NC-SC",1897034,2402623
3,176,"Chicago-Naperville, IL-IN-WI",9465353,9840929
4,184,"Cleveland-Akron-Canton, OH",3694281,3630166


In [13]:
#export to csv
csa.to_csv('output/dec_majmetro.csv')

### housing units

In [14]:
#variables
h_cols = f'NAME,H001001,H003002,H003003'

In [15]:
#build table of all US counties for 2000 and 2010
years = ['2000','2010']
df = pd.DataFrame()
for y in years:
    base_url = f'https://api.census.gov/data/{y}/dec/sf1'
    data_url = f'{base_url}?get={h_cols}&for=county:*&in=state:*&key={myAPI}'
    dff = pd.read_json(data_url)
    dff.columns = dff.iloc[0]
    dff = dff[1:]
    dff['stco'] = dff.state + dff.county
    dff['yr'] = y
    df = pd.concat([df,dff])

In [16]:
df.head()

Unnamed: 0,NAME,H001001,H003002,H003003,state,county,stco,yr
1,"Autauga County, Alabama",17662,16003,1659,1,1,1001,2000
2,"Baldwin County, Alabama",74285,55336,18949,1,3,1003,2000
3,"Barbour County, Alabama",12461,10409,2052,1,5,1005,2000
4,"Bibb County, Alabama",8345,7421,924,1,7,1007,2000
5,"Blount County, Alabama",21158,19265,1893,1,9,1009,2000


In [17]:
dff = pd.pivot_table(df,values=['H001001','H003002','H003003'],index=['stco','NAME'],\
                     columns='yr',aggfunc=np.sum,fill_value=0,margins=False).reset_index()

In [18]:
dff.head()

Unnamed: 0_level_0,stco,NAME,H001001,H001001,H003002,H003002,H003003,H003003
yr,Unnamed: 1_level_1,Unnamed: 2_level_1,2000,2010,2000,2010,2000,2010
0,1001,"Autauga County, Alabama",17662,22135,16003,20221,1659,1914
1,1003,"Baldwin County, Alabama",74285,104061,55336,73180,18949,30881
2,1005,"Barbour County, Alabama",12461,11829,10409,9820,2052,2009
3,1007,"Bibb County, Alabama",8345,8981,7421,7953,924,1028
4,1009,"Blount County, Alabama",21158,23887,19265,21578,1893,2309


In [19]:
#export all us counties 2000 & 2010 pop to csv
dff.to_csv('output/dec_hou_us_counties.csv')