In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib

In [2]:
from censusAPI import myAPI

# 2010 Decennial Census  - Homeless Population by County

### Total population and Group Quarters population (PCT20) for all U.S. Counties

In [4]:
#Year, Source, Geography & Predicates

year = '2010'
dsource = 'dec/sf1'
state = '*' #all U.S. states
county = '*' #all U.S. counties
cols = f'NAME,P001001,group(PCT20)'

# Note ACS B26203 group quarters type [5] is available for full U.S. only. No comparable ACS estimate is available

In [5]:
base_url = f'https://api.census.gov/data/{year}/{dsource}'
data_url = f'{base_url}?get={cols}&for=county:{county}&in=state:{state}&key={myAPI}'

In [6]:
df = pd.read_json(data_url)
df.columns = df.iloc[0]
df = df10[1:]

#make a new column, the state-county fips code
df['stco'] = df.state + df.county
df.set_index('stco',inplace=True)
df.head()

Unnamed: 0_level_0,NAME,P001001,GEO_ID,PCT020001,PCT020002,PCT020003,PCT020004,PCT020005,PCT020006,PCT020007,...,PCT020027,PCT020028,PCT020029,PCT020030,PCT020031,PCT020032,NAME,PCT020001ERR,state,county
stco,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5131,"Sebastian County, Arkansas",125744,0500000US05131,2235,1283,417,0,0,0,417,...,78,45,160,0,5,214,"Sebastian County, Arkansas",,5,131
5133,"Sevier County, Arkansas",17058,0500000US05133,173,173,29,0,0,0,25,...,0,0,0,0,0,0,"Sevier County, Arkansas",,5,133
5135,"Sharp County, Arkansas",17264,0500000US05135,189,170,25,0,0,0,25,...,0,18,0,0,0,1,"Sharp County, Arkansas",,5,135
5137,"Stone County, Arkansas",12394,0500000US05137,151,135,24,0,0,0,24,...,0,10,0,0,1,5,"Stone County, Arkansas",,5,137
5139,"Union County, Arkansas",41639,0500000US05139,524,454,186,0,0,0,186,...,18,37,14,0,0,1,"Union County, Arkansas",,5,139


## Select 31-County Region Only

In [7]:
#Import 31-county region recode sheet
geo_reg = pd.read_excel('data/31CR_CoxSub.xlsx')

#Make stco, st, co string variables
geo_reg['StCo'] = geo_reg['StCo'].apply(lambda x: '{0:0>5}'.format(x))
geo_reg['St'] = geo_reg['St'].apply(lambda x: '{0:0>2}'.format(x))
geo_reg['Co'] = geo_reg['Co'].apply(lambda x: '{0:0>3}'.format(x))

#Make all columns lowercase
for column_name in geo_reg.columns:
    geo_reg.rename(columns={column_name:column_name.replace(' ', '_').lower()}, inplace=True)

#Merge df with the recode table
df = geo_reg.merge(df,on='stco')
df = df[df['reg'] == '31CR'] #get rid of PA counties for this use

Unnamed: 0,stco,st,co,stco_num,reg,subreg1,subreg2,stco_lbl,co_lbl
0,9001,9,1,9001,31CR,CT,CT,"Fairfield County, Connecticut",Fairfield
1,9005,9,5,9005,31CR,CT,CT,"Litchfield County, Connecticut",Litchfield
2,9009,9,9,9009,31CR,CT,CT,"New Haven County, Connecticut",New Haven
3,34003,34,3,34003,31CR,NJ In,North NJ,"Bergen County, New Jersey",Bergen
4,34013,34,13,34013,31CR,NJ In,North NJ,"Essex County, New Jersey",Essex


### Select only the homelessness/base variables:
- #### PCT020001: Total group quarters pop
- #### PCT020027: Non-institutionalized pop - emergency and transitional shelters for people experiencing homelessness
- #### PCT020032: Other non-institutionalized facilities (incl Targeted non-shelter outside locations)

Refer to Census API variables.json for additional information

In [10]:
#Create a new dataframe with just the subset of columns
dff = df[['stco','co_lbl','subreg1','P001001','PCT020001','PCT020027','PCT020032']].copy().rename(columns = {'P001001':'TotPop_10','PCT020001':'GQPop_10','PCT020027':'ShelPop_10','PCT020032':'TNSPop_10'})
#dff = dff.rename(columns = {'P001001':'TotPop_10','PCT020001':'GQPop_10','PCT020027':'ShelPop_10','PCT020032':'TNSPop_10'})

#Convert variable columns to integers from strings
variables = list(dff.columns[3:]) 
for col in variables:
    dff[col] = dff[col].astype(int)

In [11]:
dff['HomPop_10'] = dff['ShelPop_10'] + dff['TNSPop_10']
dff['ShelPop_10P'] = dff['ShelPop_10'] / dff['GQPop_10']
dff['TNSPop_10P'] = dff['TNSPop_10'] / dff['GQPop_10']
dff['HomPop_10P'] = dff['HomPop_10'] / dff['GQPop_10']

In [13]:
dff.set_index('stco',inplace=True)

In [14]:
dff

Unnamed: 0_level_0,co_lbl,subreg1,TotPop_10,GQPop_10,ShelPop_10,TNSPop_10,HomPop_10,ShelPop_10P,TNSPop_10P,HomPop_10P
stco,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
9001,Fairfield,CT,916829,19168,648,1147,1795,0.033806,0.059839,0.093646
9005,Litchfield,CT,189927,2804,49,107,156,0.017475,0.03816,0.055635
9009,New Haven,CT,862477,29198,572,1347,1919,0.01959,0.046133,0.065724
34003,Bergen,NJ In,905116,10422,193,558,751,0.018519,0.053541,0.072059
34013,Essex,NJ In,783969,23772,1746,3300,5046,0.073448,0.138819,0.212267
34017,Hudson,NJ In,634266,9378,631,808,1439,0.067285,0.086159,0.153444
34019,Hunterdon,NJ Out,128349,4569,15,50,65,0.003283,0.010943,0.014226
34021,Mercer,NJ Out,366513,18805,497,347,844,0.026429,0.018453,0.044882
34023,Middlesex,NJ In,809858,23835,365,399,764,0.015314,0.01674,0.032054
34025,Monmouth,NJ Out,630380,7670,260,362,622,0.033898,0.047197,0.081095


In [15]:
dff.to_excel('nycmetro_homeless_county_2010.xlsx')

## Aggregate counties to subregions 
### (Subregion 1) - 7 Subregion Definition

In [18]:
dfff = dff.groupby('subreg1').sum()
dfff['ShelPop_10P'] = dfff['ShelPop_10'] / dfff['GQPop_10']
dfff['TNSPop_10P'] = dfff['TNSPop_10'] / dfff['GQPop_10']
dfff['HomPop_10P'] = dfff['HomPop_10'] / dfff['GQPop_10']

In [19]:
dfff

Unnamed: 0_level_0,TotPop_10,GQPop_10,ShelPop_10,TNSPop_10,HomPop_10,ShelPop_10P,TNSPop_10P,HomPop_10P
subreg1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CT,1969233,51170,1269,2601,3870,0.0248,0.050831,0.07563
LI,2832882,51072,1501,2361,3862,0.02939,0.046229,0.075619
Low Hud,1360510,38479,1349,2991,4340,0.035058,0.077731,0.112789
Mid Hud,930341,47793,739,3314,4053,0.015463,0.069341,0.084803
NJ In,4986654,98066,4198,7328,11526,0.042808,0.074725,0.117533
NJ Out,1959766,41917,898,1149,2047,0.021423,0.027411,0.048835
NYC,8175133,185530,29562,15541,45103,0.159338,0.083765,0.243104


In [21]:
dfff.to_excel('nycmetro_homeless_subregion_2010.xlsx')