# Population change between 2005 and 2019

In [1]:
import cenpy as cen
import pandas as pd

from getpass import getpass



In [2]:
CENSUS_API_KEY = getpass('Enter your Census API Key: ')

Enter your Census API Key: ········


In [3]:
acs = cen.products.ACS()

In [4]:
# 2005
con = cen.remote.APIConnection('ACSDT1Y2005',apikey=CENSUS_API_KEY)

# The columns to analyze
columns = ['B01001_001E']

g_unit = 'county'
g_filter = {'state':'36'}

df_05 = con.query(columns, geo_unit=g_unit, geo_filter=g_filter)
df_05.head()

Unnamed: 0,B01001_001E,state,county
0,280570,36,1
1,1309640,36,5
2,186680,36,7
3,79201,36,9
4,77016,36,11


In [5]:
# 2010
con = cen.remote.APIConnection('ACSDT1Y2010',apikey=CENSUS_API_KEY)

# The columns to analyze
columns = ['B01001_001E']

g_unit = 'county'
g_filter = {'state':'36'}

df_10 = con.query(columns, geo_unit=g_unit, geo_filter=g_filter)
df_10.head()

Unnamed: 0,B01001_001E,state,county
0,303833,36,1
1,1386657,36,5
2,200272,36,7
3,80229,36,9
4,79978,36,11


In [6]:
# 2015
con = cen.remote.APIConnection('ACSDT1Y2015',apikey=CENSUS_API_KEY)

# The columns to analyze
columns = ['B01001_001E']

g_unit = 'county'
g_filter = {'state':'36'}

df_15 = con.query(columns, geo_unit=g_unit, geo_filter=g_filter)
df_15.head()

Unnamed: 0,B01001_001E,state,county
0,160266,36,83
1,2339150,36,81
2,1644518,36,61
3,78288,36,11
4,326037,36,87


In [7]:
# 2015
con = cen.remote.APIConnection('ACSDT1Y2019',apikey=CENSUS_API_KEY)

# The columns to analyze
columns = ['B01001_001E']

g_unit = 'county'
g_filter = {'state':'36'}

df_19 = con.query(columns, geo_unit=g_unit, geo_filter=g_filter)
df_19.head()

Unnamed: 0,B01001_001E,state,county
0,1418207,36,5
1,460528,36,67
2,80485,36,19
3,98320,36,79
4,109777,36,69


In [9]:
# add year variable
df_05['year'] = '2005'
df_10['year'] = '2010'
df_15['year'] = '2015'
df_19['year'] = '2019'

In [10]:
# stack dataframes
df_stack = pd.concat([df_05, df_10, df_15, df_19])
df_stack.year.unique()

array(['2005', '2010', '2015', '2019'], dtype=object)

In [11]:
# rename column
df_stack = df_stack.rename(columns={'B01001_001E':'population'})

In [14]:
# convert population column from string to numeric
df_stack['population'] = df_stack.population.astype('int64')

In [15]:
# filter to just the counties we want
# Codes: https://www.census.gov/library/reference/code-lists/ansi.html
# NY codes: https://www2.census.gov/geo/docs/reference/codes/files/st36_ny_cou.txt

df_stack = df_stack[df_stack['county'].isin(['061', # Manhattan
                                             '081', # Queens
                                             '047', # Brooklyn (Kings)
                                             '005', # Bronx
                                             '085' # Staten Island (Richmond County)
                                            ])]

df_stack.shape

(20, 4)

In [16]:
# create variable for county ID          
df_stack.loc[df_stack['county'].str.contains('061'), 'borough'] = 'Manhattan'
df_stack.loc[df_stack['county'].str.contains('081'), 'borough'] = 'Queens'
df_stack.loc[df_stack['county'].str.contains('047'), 'borough'] = 'Brooklyn'
df_stack.loc[df_stack['county'].str.contains('005'), 'borough'] = 'Bronx'
df_stack.loc[df_stack['county'].str.contains('085'), 'borough'] = 'Staten Island'  

In [17]:
# reshape wide to long
df_pop = df_stack.pivot(index=['county', 'borough'], columns='year', values='population').reset_index()
df_pop.head()

year,county,borough,2005,2010,2015,2019
0,5,Bronx,1309640,1386657,1455444,1418207
1,47,Brooklyn,2446016,2508340,2636735,2559903
2,61,Manhattan,1529774,1586698,1644518,1628706
3,81,Queens,2215339,2233841,2339150,2253858
4,85,Staten Island,455344,469363,474558,476143


In [18]:
# make difference variable
df_pop['difference'] = df_pop['2019'] - df_pop['2005']

# make percent change variable
df_pop['percent_change'] = ((df_pop['2019']-df_pop['2005'])/df_pop['2005'])*100

df_pop.head()

year,county,borough,2005,2010,2015,2019,difference,percent_change
0,5,Bronx,1309640,1386657,1455444,1418207,108567,8.289835
1,47,Brooklyn,2446016,2508340,2636735,2559903,113887,4.65602
2,61,Manhattan,1529774,1586698,1644518,1628706,98932,6.467099
3,81,Queens,2215339,2233841,2339150,2253858,38519,1.738741
4,85,Staten Island,455344,469363,474558,476143,20799,4.567755


In [19]:
df_pop.to_csv('data/population-over-time.csv', index=False)