# Population data for NYC, NYC counties, and Red Hook site

I took population data from the US Census using the American FactFinder portal in order to get population figures over time for NYC, its counties, and the Red Hook site (using the census tract values found in the geo_scope notebook - 53, 59, and 85). Other data sources had population figures for the years before 2010 or had estimates for each decade after 2000, but this seemed to be the best way to get population figures for each year.

In [4]:
import pandas as pd
import copy

In [5]:
def df_maker(sort):
    dfs = []
    for i in range(10,18):
        # Dataframe with raw data
        df = pd.read_csv('data/primary_source_data/census_pop/ACS_{}_5YR_B01003_with_ann.csv'.format(i), \
                         skiprows=1)
        # Make dictionary
        pop_dict = {'New York County': 0, 'Kings County': 0, 'Bronx County': 0, 'Queens County': 0, \
                    'Richmond County': 0, 'New York City Total': 0, 'Red Hook Site': 0}
        # Get populations for each county and the whole city
        total_pop = 0
        for k, v in pop_dict.items():
            pop = df[df['Geography'].str.contains(k)]['Estimate; Total'].sum()
            pop_dict[k] = pop
            total_pop += pop
        pop_dict['New York City Total'] = total_pop

        # Get Red Hook populations
        rh_tracts = [' 53,', ' 59,', ' 85,']
        rh_pop = 0
        for tract in rh_tracts:
            tract_pop = df[(df['Geography'].str.contains(tract)) & (df['Geography'].str.contains('Kings'))]\
                ['Estimate; Total'].sum()
            rh_pop += tract_pop
        pop_dict['Red Hook Site'] = rh_pop
        
        # Create a dataframe for all the populations
        pop_df = pd.DataFrame.from_dict(pop_dict, orient='index', columns=['20{}_pop'.format(i)])
        pop_df.reset_index(level=0, inplace=True)
        pop_df.rename(columns={'index':'Geography'}, inplace=True)
        dfs.append(pop_df)
    
    combined_df = copy.deepcopy(dfs[0])
    for i in range(1, len(dfs)):
        combined_df = combined_df.merge(dfs[i], how='left', left_on='Geography', right_on='Geography')
        
    return combined_df

In [6]:
pop_df = df_maker(sort=False)
pop_df

Unnamed: 0,Geography,2010_pop,2011_pop,2012_pop,2013_pop,2014_pop,2015_pop,2016_pop,2017_pop
0,New York County,1583345,1588257,1596735,1605272,1618398,1629507,1634989,1653877
1,Kings County,2466782,2486119,2512740,2539789,2570801,2595259,2606852,2635121
2,Bronx County,1365725,1374593,1386364,1397315,1413566,1428357,1436785,1455846
3,Queens County,2199169,2213977,2235008,2256400,2280602,2301139,2310011,2339280
4,Richmond County,463450,466034,468374,470223,471522,472481,473324,475948
5,New York City Total,8078471,8128980,8199221,8268999,8354889,8426743,8461961,8560072
6,Red Hook Site,10973,11374,10987,10670,10440,10768,11102,11316


In [9]:
# Export to csv
# pop_df.to_csv('data/created_data/pop_time.csv', index=False)