SOCIO ECONOMIC
- median income (Census data)
- % of unemployment (Census data)
- % of renters (Census data)
- % of rent burdened (>30%) and severe burdened (>50%)

In [1]:
from census import Census
import pandas as pd
from us import states

import censusgeocode as cg

import math

from shapely.geometry import Point, Polygon
import geopandas as gpd

In [2]:
api_key = '13c056d339e329f0a4fbef4384e3667e505a55da'

In [3]:
c = Census(api_key, year=2015)

# Define Address and Study Area

In [4]:
# addr = "228 Boerum St. Brooklyn, NY"
addr = "373 VAN BRUNT STREET Brooklyn, NY"
# addr = "1 EAST LOOP ROAD"
addr_census_result = cg.onelineaddress(addr)

In [5]:
# get coordinates of addr
addr_point = Point(addr_census_result[0]['coordinates']['x'], addr_census_result[0]['coordinates']['y'])

In [6]:
# get study area pluto data
radius = 0.5 # miles
study_area = addr_point.buffer(radius / 69)
study_area_polygon = Polygon(study_area.exterior.coords)

# Load Census block group shape file and Filter out blocks in study area

In [7]:
# Load census block groups shape file
block_groups = gpd.read_file('data/ny_block_groups/tl_2015_36_bg.shp')

In [8]:
print(len(block_groups))
block_groups.head()

15463


Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,GEOID,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,36,5,17500,2,360050175002,Block Group 2,G5030,S,23694,0,40.8296335,-73.9152037,"POLYGON ((-73.915699 40.830536, -73.914852 40...."
1,36,5,14100,1,360050141001,Block Group 1,G5030,S,55934,0,40.8245444,-73.9131209,"POLYGON ((-73.916614 40.82499199999999, -73.91..."
2,36,5,14500,1,360050145001,Block Group 1,G5030,S,47026,0,40.8309816,-73.905087,"POLYGON ((-73.90584 40.831059, -73.905051 40.8..."
3,36,5,7500,2,360050075002,Block Group 2,G5030,S,109058,0,40.8202395,-73.9080678,"POLYGON ((-73.910348 40.819947, -73.910219 40...."
4,36,5,41800,1,360050418001,Block Group 1,G5030,S,140793,0,40.8989195,-73.8564428,"POLYGON ((-73.862875 40.895154, -73.861465 40...."


# Map the census block groups

In [9]:
block_groups_study_area = block_groups[block_groups.apply(lambda row: study_area_polygon.contains(row['geometry'].centroid), axis=1)]

In [10]:
import folium

addr_location = (addr_point.y, addr_point.x)
m = folium.Map(location=addr_location,zoom_start=14,tiles="Stamen Toner")

folium.Circle(addr_location, radius=radius * 1609.34).add_to(m)

folium.GeoJson(
    block_groups_study_area['geometry'],
    name='geojson'
).add_to(m)

<folium.features.GeoJson at 0x119796be0>

In [11]:
m

# Get relevant Census columns

In [12]:
totalHU = 'B25001_001E'
owner = 'B25003_002E'
renter = 'B25003_003E'
population = 'B01003_001E'
income = 'B19013_001E'
total_labor = 'B23025_001E'
unemployed = 'B23025_005E'
vacancy = 'B25004_001E'

census_mapper = {'NAME': 'name', population: 'population', income: 'income', renter: 'renters', owner: 'owners', totalHU: 'total_housing', total_labor: 'total_labor', unemployed: 'total_unemployed', vacancy: 'vacancy'}
tuple(census_mapper.keys())

('NAME',
 'B01003_001E',
 'B19013_001E',
 'B25003_003E',
 'B25003_002E',
 'B25001_001E',
 'B23025_001E',
 'B23025_005E',
 'B25004_001E')

### Get Relevant Data for BK

In [13]:
bk_data = c.acs5.state_county(tuple(census_mapper.keys()), '36', '047')[0]
bk_data = pd.DataFrame(bk_data, index=[0])
# bk_data
bk_data = bk_data.rename(census_mapper, axis='columns')
bk_data

Unnamed: 0,name,population,income,renters,owners,total_housing,total_labor,total_unemployed,vacancy,state,county
0,"Kings County, New York",2595259.0,48201.0,659069.0,272717.0,1017282.0,2051603.0,129667.0,85496.0,36,47


### Get Relevant Data for Study Area Census Blocks

In [14]:
column_requests = []

block_group_study_area_data = []
for block_group in block_groups_study_area.iterrows():
    bg = block_group[1]
    bg_data = c.acs5.state_county_blockgroup(tuple(census_mapper.keys()), bg['STATEFP'], bg['COUNTYFP'], bg['BLKGRPCE'], bg['TRACTCE'])
    print(bg_data)
    bg_data = bg_data[0]
    bg_data['index'] = bg.name
    block_group_study_area_data.append(bg_data)
block_group_study_area_data = pd.DataFrame.from_dict(block_group_study_area_data)
block_group_study_area_data.set_index('index')


block_group_study_area_data = block_group_study_area_data.rename(census_mapper, axis='columns')

[{'NAME': 'Block Group 1, Census Tract 53, Kings County, New York', 'B01003_001E': 405.0, 'B19013_001E': None, 'B25003_003E': 127.0, 'B25003_002E': 62.0, 'B25001_001E': 214.0, 'B23025_001E': 351.0, 'B23025_005E': 25.0, 'B25004_001E': 25.0, 'state': '36', 'county': '047', 'tract': '005300', 'block group': '1'}]
[{'NAME': 'Block Group 2, Census Tract 53, Kings County, New York', 'B01003_001E': 1198.0, 'B19013_001E': 63000.0, 'B25003_003E': 325.0, 'B25003_002E': 81.0, 'B25001_001E': 457.0, 'B23025_001E': 950.0, 'B23025_005E': 88.0, 'B25004_001E': 51.0, 'state': '36', 'county': '047', 'tract': '005300', 'block group': '2'}]
[{'NAME': 'Block Group 3, Census Tract 53, Kings County, New York', 'B01003_001E': 468.0, 'B19013_001E': 70795.0, 'B25003_003E': 135.0, 'B25003_002E': 62.0, 'B25001_001E': 226.0, 'B23025_001E': 365.0, 'B23025_005E': 23.0, 'B25004_001E': 29.0, 'state': '36', 'county': '047', 'tract': '005300', 'block group': '3'}]
[{'NAME': 'Block Group 4, Census Tract 53, Kings County, 

In [15]:
# Drop nans for now.. this makes calculations easier
block_group_study_area_data = block_group_study_area_data.dropna()
block_group_study_area_data

Unnamed: 0,population,income,total_labor,total_unemployed,total_housing,owners,renters,vacancy,name,block group,county,index,state,tract
1,1198.0,63000.0,950.0,88.0,457.0,81.0,325.0,51.0,"Block Group 2, Census Tract 53, Kings County, ...",2,47,7501,36,5300
2,468.0,70795.0,365.0,23.0,226.0,62.0,135.0,29.0,"Block Group 3, Census Tract 53, Kings County, ...",3,47,7502,36,5300
3,352.0,117625.0,292.0,7.0,174.0,69.0,76.0,29.0,"Block Group 4, Census Tract 53, Kings County, ...",4,47,7503,36,5300
4,789.0,55833.0,657.0,43.0,359.0,24.0,310.0,25.0,"Block Group 1, Census Tract 59, Kings County, ...",1,47,9875,36,5900
5,3113.0,14879.0,2314.0,365.0,1113.0,0.0,1113.0,0.0,"Block Group 1, Census Tract 85, Kings County, ...",1,47,9926,36,8500


# Summary Stats for Study Area

In [16]:
# summary_columns = ['population', 'percent_renters', 'percent_owners', 'median_income', 'percent_vacancy', 'percent_unemployment']

study_area_summary = {}

### Population

In [17]:
total_population = block_group_study_area_data['population'].sum()
study_area_summary['population'] = total_population

print("Total population of Study Area: {}".format(total_population))

Total population of Study Area: 5920.0


### Study Area Income

In [18]:
weighted_median_income = 0

for block_group in block_group_study_area_data.iterrows():
    bg = block_group[1]
    weighted_median_income += bg['income'] * bg['population']
    
weighted_median_income = weighted_median_income / total_population    
study_area_summary['median_income'] = weighted_median_income

print("Weighted median of Study area income: {}".format(weighted_median_income))

Weighted median of Study area income: 40604.83513513514


### Calculate the Study Area percent renters

In [19]:
percent_renters = block_group_study_area_data['renters'].sum() / block_group_study_area_data['total_housing'].sum() * 100
percent_owners = block_group_study_area_data['owners'].sum() / block_group_study_area_data['total_housing'].sum() * 100

study_area_summary['percent_owners'] = percent_owners
study_area_summary['percent_renters'] = percent_renters

print("{}% of residence are renters.".format(percent_renters))
print("{}% of residence are owners.".format(percent_owners))

84.1133533705453% of residence are renters.
10.133104336625161% of residence are owners.


### Calculate the Study Area Employment

In [20]:
study_area_summary['unemployment_rate'] = block_group_study_area_data['total_unemployed'].sum() / block_group_study_area_data['total_labor'].sum() * 100
print("{}% unemployment.".format(study_area_summary['unemployment_rate']))

11.489733508082132% unemployment.


### Calculate the study Area vacancy rate

In [21]:
study_area_summary['vacancy_rate'] = block_group_study_area_data['vacancy'].sum() / block_group_study_area_data['total_housing'].sum() * 100
print("{}% vacancy.".format(study_area_summary['vacancy_rate']))

5.7535422928295405% vacancy.


# Summary Stats for BK

In [22]:
study_area_summary

{'population': 5920.0,
 'median_income': 40604.83513513514,
 'percent_owners': 10.133104336625161,
 'percent_renters': 84.1133533705453,
 'unemployment_rate': 11.489733508082132,
 'vacancy_rate': 5.7535422928295405}

In [23]:
bk_data

Unnamed: 0,name,population,income,renters,owners,total_housing,total_labor,total_unemployed,vacancy,state,county
0,"Kings County, New York",2595259.0,48201.0,659069.0,272717.0,1017282.0,2051603.0,129667.0,85496.0,36,47


In [24]:
bk_summary = {}
bk_summary['population'] = float(bk_data['population'])
bk_summary['median_income'] = float(bk_data['income'])
bk_summary['percent_owners'] = float(bk_data['owners'] / bk_data['total_housing'] * 100)
bk_summary['percent_renters'] = float(bk_data['renters'] / bk_data['total_housing'] * 100)
bk_summary['unemployment_rate'] = float(bk_data['total_unemployed'] / bk_data['total_labor'] * 100)
bk_summary['vacancy_rate'] = float(bk_data['vacancy'] / bk_data['total_housing'] * 100)



In [25]:
bk_summary

{'population': 2595259.0,
 'median_income': 48201.0,
 'percent_owners': 26.8083972782375,
 'percent_renters': 64.78724680078876,
 'unemployment_rate': 6.3202773636029965,
 'vacancy_rate': 8.404355920973732}

# Plot BK and Study Area Summaries

In [26]:
summaries = pd.DataFrame([bk_summary, study_area_summary], index=['Brooklyn', 'Study Area'])
summaries

Unnamed: 0,median_income,percent_owners,percent_renters,population,unemployment_rate,vacancy_rate
Brooklyn,48201.0,26.808397,64.787247,2595259.0,6.320277,8.404356
Study Area,40604.835135,10.133104,84.113353,5920.0,11.489734,5.753542


In [27]:
percentage_columns = ['percent_owners', 'percent_renters', 'unemployment_rate','vacancy_rate']
percentage_summaries = summaries[percentage_columns]

In [28]:
percentage_summaries.T.plot(y=list(summaries.index), kind='bar').set_ylim(0,100)

(0, 100)

# Housing Types
Descriptions of different types of housing: https://www1.nyc.gov/site/rentguidelinesboard/resources/housing-types.page


# Census Table Search

In [29]:
for table in c.acs5.tables():
#     print(table)
    if 'VACANCY' in table['description']:
        print(table)
        print()

{'name': 'B25004', 'description': 'VACANCY STATUS', 'variables': 'https://api.census.gov/data/2015/acs/acs5/groups/B25004.json'}

{'name': 'B99253', 'description': 'IMPUTATION OF VACANCY STATUS', 'variables': 'https://api.census.gov/data/2015/acs/acs5/groups/B99253.json'}

