Skip to content

Commit

Permalink
Merge 3825fc9 into 218bffe
Browse files Browse the repository at this point in the history
  • Loading branch information
PyMap committed Aug 12, 2020
2 parents 218bffe + 3825fc9 commit 3e4aaea
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 12 deletions.
20 changes: 14 additions & 6 deletions synthpop/census_helpers.py
Expand Up @@ -6,15 +6,23 @@

# code to retry when census api fails
sess = requests.Session()
adapter = requests.adapters.HTTPAdapter(max_retries = 100)
adapter = requests.adapters.HTTPAdapter(max_retries=100)
sess.mount('https://', adapter)

# TODO DOCSTRING!!


class Census:

def __init__(self, key):
self.c = census.Census(key, session = sess)
self.base_url = "https://s3-us-west-1.amazonaws.com/synthpop-data2/"
def __init__(self, key, acsyear=2016):
self.c = census.Census(key, session=sess)

if acsyear >= 2018:
storage = "https://storage.googleapis.com/synthpop-public/PUMS2018/pums_2018_acs5/"
else:
storage = "https://s3-us-west-1.amazonaws.com/synthpop-data2/"
self.base_url = storage
self.acsyear_files = acsyear
self.pums_relationship_file_url = self.base_url + "tract10_to_puma.csv"
self.pums_relationship_df = None
self.pums10_population_base_url = \
Expand Down Expand Up @@ -180,7 +188,7 @@ def download_population_pums(self, state, puma10=None, puma00=None, **kargs):
if (puma10 is None) & (puma00 is None):
return self._read_csv(self.pums_population_state_base_url % (state), **kargs)
pums = self._read_csv(self.pums10_population_base_url % (state, puma10), **kargs)
if puma00 is not None:
if (puma00 is not None) & (self.acsyear_files < 2018):
pums00 = self._read_csv(self.pums00_population_base_url % (state, puma00), **kargs)
pums = pd.concat([pums, pums00], ignore_index=True)
return pums
Expand All @@ -190,7 +198,7 @@ def download_household_pums(self, state, puma10=None, puma00=None, **kargs):
if (puma10 is None) & (puma00 is None):
return self._read_csv(self.pums_household_state_base_url % (state), **kargs)
pums = self._read_csv(self.pums10_household_base_url % (state, puma10), **kargs)
if puma00 is not None:
if (puma00 is not None) & (self.acsyear_files < 2018):
pums00 = self._read_csv(self.pums00_household_base_url % (state, puma00), **kargs)
pums = pd.concat([pums, pums00], ignore_index=True)

Expand Down
15 changes: 12 additions & 3 deletions synthpop/recipes/starter.py
Expand Up @@ -38,8 +38,9 @@ class Starter:
tract_to_puma_map : dictionary
keys are tract ids and pumas are puma ids
"""

def __init__(self, key, state, county, tract=None, acsyear=2016):
self.c = c = Census(key)
self.c = c = Census(key, acsyear)
self.state = state
self.county = county
self.tract = tract
Expand Down Expand Up @@ -117,9 +118,17 @@ def __init__(self, key, state, county, tract=None, acsyear=2016):

# Put the needed PUMS variables here. These are also the PUMS variables
# that will be in the outputted synthetic population
self.h_pums_cols = ('serialno', 'PUMA00', 'PUMA10', 'RT', 'NP',
self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP',
'TYPE', 'VEH', 'WIF', 'NOC', 'FINCP')
self.p_pums_cols = ('serialno', 'PUMA00', 'PUMA10', 'AGEP', 'RAC1P', 'SEX')
self.p_pums_cols = ('serialno', 'PUMA10', 'AGEP', 'RAC1P', 'SEX')

if self.acsyear < 2018:
self.h_pums_cols = list(self.h_pums_cols)
self.h_pums_cols.insert(1, 'PUMA00')
self.h_pums_cols = tuple(self.h_pums_cols)
self.p_pums_cols = list(self.p_pums_cols)
self.p_pums_cols.insert(1, 'PUMA00')
self.p_pums_cols = tuple(self.p_pums_cols)

def get_geography_name(self):
# this synthesis is at the block group level for most variables
Expand Down
14 changes: 11 additions & 3 deletions synthpop/recipes/starter2.py
Expand Up @@ -42,8 +42,9 @@ class Starter:
tract_to_puma_map : dictionary
keys are tract ids and pumas are puma ids
"""

def __init__(self, key, state, county, tract=None, acsyear=2016):
self.c = c = Census(key)
self.c = c = Census(key, acsyear)
self.state = state
self.county = county
self.tract = tract
Expand Down Expand Up @@ -179,11 +180,18 @@ def __init__(self, key, state, county, tract=None, acsyear=2016):

# Put the needed PUMS variables here. These are also the PUMS variables
# that will be in the outputted synthetic population
self.h_pums_cols = ('serialno', 'PUMA00', 'PUMA10', 'RT', 'NP', 'TYPE',
self.h_pums_cols = ('serialno', 'PUMA10', 'RT', 'NP', 'TYPE',
'R65', 'HINCP', 'VEH', 'MV', 'TEN', 'BLD', 'R18')
self.p_pums_cols = ('serialno', 'PUMA00', 'PUMA10', 'RELP', 'AGEP',
self.p_pums_cols = ('serialno', 'PUMA10', 'RELP', 'AGEP',
'ESR', 'RAC1P', 'HISP', 'SEX', 'SPORDER',
'PERNP', 'SCHL', 'WKHP', 'JWTR', 'SCH')
if self.acsyear < 2018:
self.h_pums_cols = list(self.h_pums_cols)
self.h_pums_cols.insert(1, 'PUMA00')
self.h_pums_cols = tuple(self.h_pums_cols)
self.p_pums_cols = list(self.p_pums_cols)
self.p_pums_cols.insert(1, 'PUMA00')
self.p_pums_cols = tuple(self.p_pums_cols)

def get_geography_name(self):
# this synthesis is at the block group level for most variables
Expand Down

0 comments on commit 3e4aaea

Please sign in to comment.