# County Maps
The three maps below show maps of Fairfield County, South Carolina.
To generate these maps, first scroll down to the section titled "Run These Cells First".  Run all cells below that heading. Then, you may return to the top of this file and generate the three maps.

### Average Household Size

In [91]:
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)

folium.Choropleth(
    geo_data=geojson,
    name='Household Size',
    data=cbg_data,
    columns=['census_block_group', 'household_size'], # , # ['State', 'Unemployment'],
    key_on='feature.properties.CensusBlockGroup',
    fill_color='Reds',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Average Household Size (# of people)'
).add_to(m)

marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
    add_marker(building, marker_cluster)
m.add_child(marker_cluster)

m

### Median Household Income

In [92]:
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)

folium.Choropleth(
    geo_data=geojson,
    name='Median Household Income',
    data=cbg_data,
    columns=['census_block_group', 'B19013e1'], # B01001e1, # ['State', 'Unemployment'],
    key_on='feature.properties.CensusBlockGroup',
    fill_color='Greens',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Median Household Income ($)'
).add_to(m)

marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
    add_marker(building, marker_cluster)
m.add_child(marker_cluster)

m

### Population

In [89]:
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)  # , tiles='Stamen Toner')

folium.Choropleth(
    geo_data=geojson,
    name='Population',
    data=cbg_data,
    columns=['census_block_group', 'B01001e1'], # , # ['State', 'Unemployment'],
    key_on='feature.properties.CensusBlockGroup',
    fill_color='Blues',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Population (# of people)'
).add_to(m)


marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
    add_marker(building, marker_cluster)
m.add_child(marker_cluster)
    
# marker_cluster.add_to(m)
    
m

## No Health Insurance

In [118]:
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)  # , tiles='Stamen Toner')

folium.Choropleth(
    geo_data=geojson,
    name='Population',
    data=cbg_data,
    columns=['census_block_group', 'percent_uninsured'],
    key_on='feature.properties.CensusBlockGroup',
    fill_color='Oranges',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Percentage With No Health Insurance (%)'
).add_to(m)


marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
    add_marker(building, marker_cluster)
m.add_child(marker_cluster)
    
# marker_cluster.add_to(m)
    
m

In [86]:
def add_marker(building, marker_cluster):
    types = {
        'Apartments': {
            'icon': 'home',
            'color': 'orange'
        },
        'Nursing Home': {
            'icon': 'heart',
            'color': 'red'
        }
    }
    
    types_obj = types[building['type']]
    
    marker = folium.Marker(
        location=building['coordinates'],
        tooltip=f"<i>[{building['type']}]</i><br/>{building['name']}<br/>\n{building['address']}",
        icon=folium.Icon(color=types_obj['color'], icon=types_obj['icon'])
    ) # .add_to(map_obj)
    marker_cluster.add_child(marker)
    
    

# Run Cells Below First:
(after this, you can run the cells above to generate the maps)

In [85]:
import pandas as pd
import geopandas as gpd 
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from geopy.geocoders import Nominatim

In [36]:
locator = Nominatim(user_agent='myGeocoder')
location = locator.geocode('S-20-151, Fairfield, SC')
print(location, 'Latitude = {}, Longitude = {}'.format(location.latitude, location.longitude))

20, Redwood Road, Hanson, Rose Park, Salt Lake City, Salt Lake County, Utah, 84116, United States of America Latitude = 40.76892042845975, Longitude = -111.93893027676306


In [45]:
apartments = [
    {
        'name': 'Lamplighter Apartments',
        'address': 'Winnsboro Mills, SC 29180',
        'coordinates': (34.350296, -81.086537),
    },
    {
        'name': 'Deer Wood Apartments',
        'address': '647 US-321 BYP, Winnsboro, SC 29180',
        'coordinates': (34.361933, -81.098159),
    },
    {
        'name': 'Winnsboro Arms Apartments',
        'address': '61 Winnsboro Arms Dr, Winnsboro, SC 29180',
        'coordinates': (34.372125, -81.105137),
    },
    {
        'name': 'Gibson APT',
        'coordinates': (34.374366, -81.088088),
        'address': '308 Palmer St, Winnsboro, SC 29180',
    },
    {
        'name': 'Castlewood Apartments Phase I',
        'address': '200 Castlewood Dr, Winnsboro, SC 29180',
        'coordinates': (34.369261, -81.095067),
    },
    {
        'name': 'Laurelwood Aparrtments',
        'coordinates': (34.372969, -81.094324),
        'address': '16A Laurel Wood Ct, Winnsboro, SC 29180',
    },
]

In [72]:
nursing_homes = [
    {
        'name': 'PruittHealth - Ridgeway',
        'address': '213 Tanglewood Court, Ridgeway, SC 29130',
        'coordinates': (34.302030, -80.964782),
    },
    {
        'name': 'Blue Ridge in the Fields',
        'address': '117 Bellefield Rd, Ridgeway, SC 29130',
        'coordinates': (34.330878, -80.907010),
    },
    {
        'name': 'Ridgeway Manor Healthcare Center',
        'address': '117 Bellefield Rd, Ridgeway, SC 29130',
        'coordinates': (34.329630, -80.906810),
    },
]

In [73]:
buildings = []

for building in apartments:
    building['type'] = 'Apartments'
    buildings.append(building)

for building in nursing_homes:
    building['type'] = 'Nursing Home'
    buildings.append(building)

In [1]:
import os
from pathlib import Path
import json
import linecache
import functools

import pandas as pd
import geopandas as gpd 
import matplotlib.pyplot as plt
import folium 
from shapely.ops import nearest_points
from shapely.geometry import LineString

In [2]:
DATA_PATH = Path('/data/safegraph/safegraph_open_census_data')
PREPROCESSED_DATA_PATH = Path('../../../data/preprocessed/safegraph/safegraph_open_census_data')

In [3]:
# Only needs to be run once, to generate data slices
# This data is now included with the repo anyway -- so this does not need to be run any more

county_fips_code = '45039'  # Fairfield County, South Carolina

#!mkdir -p {PREPROCESSED_DATA_PATH}
#census_data_file_names = !ls {DATA_PATH}/data/ | grep [0-9] #  | cut -f 1 -d .  # eliminate .csv suffix
county_directory = PREPROCESSED_DATA_PATH / "data/county" / county_fips_code
#!mkdir -p {county_directory}
#for file_name in census_data_file_names:
#    !touch {county_directory}/{file_name}
#    print(county_directory/file_name)
#    !head -n 1 "{DATA_PATH}/data/{file_name}" > {county_directory}/{file_name}
#    !cat "{DATA_PATH}/data/{file_name}" | grep ^{county_fips_code}.*$ >> {county_directory}/{file_name}

In [99]:
# See: https://www.safegraph.com/blog/beginners-guide-to-census
    
table_ids = [
    'B01001e1',   # SEX BY AGE: Total: Total population -- (Estimate),Sex By Age, Total, Total Population -- (Estimate),,,,,
    #'B00001e1',   # UNWEIGHTED SAMPLE COUNT OF THE POPULATION: Total: Total population -- (Estimate),Unweighted Sample Count Of The Population, Total, Total Population -- (Estimate),,,,,
    #'B00001m1',   # UNWEIGHTED SAMPLE COUNT OF THE POPULATION: Total: Total population -- (Margin of Error),Unweighted Sample Count Of The Population, Total, Total Population -- (Margin Of Error),,,,,
    'B19013e1',   # Median Household Income
    #'B00002e1',   # UNWEIGHTED SAMPLE HOUSING UNITS: Total: Housing units -- (Estimate),Unweighted Sample Housing Units, Total, Housing Units -- (Estimate),,,,,
    #'B00002m1',   # UNWEIGHTED SAMPLE HOUSING UNITS: Total: Housing units -- (Margin of Error),Unweighted Sample Housing Units, Total, Housing Units -- (Margin Of Error),,,,,
    'B25001e1',   # HOUSING UNITS: Total: Housing units -- (Estimate),Housing Units, Total, Housing Units -- (Estimate),,,,,
    #'B25001m1',   # HOUSING UNITS: Total: Housing units -- (Margin of Error),Housing Units, Total, Housing Units -- (Margin Of Error),,,,,
    
    # 'B27010e35',  # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 to 64 years: With one type of health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 35 To 64 Years, With One Type Of Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,

    'B27010e17',  # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: Under 18 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, Under 18 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
    'B27010e33',  # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 18 to 34 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 18 To 34 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
    'B27010e50',  # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 to 64 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 35 To 64 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
    'B27010e66',  # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 65 years and over: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 65 Years And Over, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
    
    'C17002e1',   #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: Total: Population for whom poverty status is determined -- (Estimate),Ratio Of Income To Poverty Level In The Past 12 Months, Total, Population For Whom Poverty Status Is Determined -- (Estimate),,,,,
    
    # unavailable:
    # 'B17017e10',   # POVERTY STATUS IN THE PAST 12 MONTHS BY HOUSEHOLD TYPE BY AGE OF HOUSEHOLDER: Total: Households -- (Estimate),Poverty Status In The Past 12 Months By Household Type By Age Of Householder, Total, Households -- (Estimate),,,,,
]

cbg_field_desc = pd.read_csv(PREPROCESSED_DATA_PATH / 'metadata/cbg_field_descriptions.csv')
cbg_field_desc[cbg_field_desc.table_id.isin(table_ids)]

Unnamed: 0,table_id,field_full_name,field_level_1,field_level_2,field_level_3,field_level_4,field_level_5,field_level_6,field_level_7,field_level_8
4,B01001e1,SEX BY AGE: Total: Total population -- (Estimate),Sex By Age,Total,Total Population -- (Estimate),,,,,
3094,B19013e1,MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS ...,Median Household Income In The Past 12 Months ...,Total,Households -- (Estimate),,,,,
4394,B25001e1,HOUSING UNITS: Total: Housing units -- (Estimate),Housing Units,Total,Housing Units -- (Estimate),,,,,
6140,B27010e17,TYPES OF HEALTH INSURANCE COVERAGE BY AGE: Und...,Types Of Health Insurance Coverage By Age,Under 18 Years,No Health Insurance Coverage,Civilian Noninstitutionalized Population -- (...,,,,
6158,B27010e33,TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 18 ...,Types Of Health Insurance Coverage By Age,18 To 34 Years,No Health Insurance Coverage,Civilian Noninstitutionalized Population -- (...,,,,
6177,B27010e50,TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 ...,Types Of Health Insurance Coverage By Age,35 To 64 Years,No Health Insurance Coverage,Civilian Noninstitutionalized Population -- (...,,,,
6194,B27010e66,TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 65 ...,Types Of Health Insurance Coverage By Age,65 Years And Over,No Health Insurance Coverage,Civilian Noninstitutionalized Population -- (...,,,,
7016,C17002e1,RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 1...,Ratio Of Income To Poverty Level In The Past 1...,Total,Population For Whom Poverty Status Is Determi...,,,,,


In [113]:
county_files = ['cbg_b00.csv', 'cbg_b01.csv', 'cbg_b19.csv', 'cbg_b25.csv', 'cbg_b27.csv', 'cbg_c17.csv'] #  !ls {county_directory}

dfs = [pd.read_csv(county_directory / file, dtype={'census_block_group': str}) for file in county_files]
merged = dfs[0]
for df in dfs[1:]:
    merged = pd.merge(merged, df, on=['census_block_group'])
cbg_data = merged
    
#combiner = lambda left, right: pd.merge(left, right, on=['census_block_group'])
#cbg_data = functools.reduce(combiner, dfs[0], dfs[1:])


'''cbg_b19 = pd.read_csv(county_directory / 'cbg_b19.csv', dtype={'census_block_group': str})
cbg_b01 = pd.read_csv(county_directory / 'cbg_b01.csv', dtype={'census_block_group': str})
cbg_data = pd.merge(cbg_b01, cbg_b19, on=['census_block_group'])'''
cbg_data = cbg_data[['census_block_group'] + table_ids]
#cbg_data.dropna().head()
cbg_data

Unnamed: 0,census_block_group,B01001e1,B19013e1,B25001e1,B27010e17,B27010e33,B27010e50,B27010e66,C17002e1
0,450399601001,1101,29250.0,624,0,67,125,0,1101
1,450399601002,931,39931.0,523,29,0,62,0,931
2,450399602001,1015,46898.0,517,54,66,108,0,1015
3,450399602002,1260,30481.0,782,0,123,79,0,1260
4,450399602003,1467,48125.0,642,0,52,185,0,1409
5,450399603001,1046,68902.0,1079,0,0,0,0,1046
6,450399603002,890,,449,82,0,167,0,890
7,450399603003,1482,30625.0,678,61,118,160,0,1387
8,450399603004,2257,31346.0,982,4,170,126,0,2132
9,450399604001,962,26250.0,451,0,30,73,0,933


In [114]:
sum(cbg_data['B01001e1'])  # total population

23025

In [115]:
cbg_data['household_size'] = cbg_data['B01001e1'] / cbg_data['B25001e1']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cbg_data['household_size'] = cbg_data['B01001e1'] / cbg_data['B25001e1']


In [116]:
cbg_data['num_uninsured'] = sum([cbg_data[col_id] for col_id in ['B27010e17', 'B27010e33', 'B27010e50', 'B27010e66']])
cbg_data['percent_uninsured'] = cbg_data['num_uninsured'] * 100.0 / cbg_data['B01001e1']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cbg_data['num_uninsured'] = sum([cbg_data[col_id] for col_id in ['B27010e17', 'B27010e33', 'B27010e50', 'B27010e66']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cbg_data['percent_uninsured'] = cbg_data['num_uninsured'] * 100.0 / cbg_data['B01001e1']


In [117]:
cbg_data

Unnamed: 0,census_block_group,B01001e1,B19013e1,B25001e1,B27010e17,B27010e33,B27010e50,B27010e66,C17002e1,household_size,num_uninsured,percent_uninsured
0,450399601001,1101,29250.0,624,0,67,125,0,1101,1.764423,192,17.438692
1,450399601002,931,39931.0,523,29,0,62,0,931,1.780115,91,9.774436
2,450399602001,1015,46898.0,517,54,66,108,0,1015,1.96325,228,22.463054
3,450399602002,1260,30481.0,782,0,123,79,0,1260,1.611253,202,16.031746
4,450399602003,1467,48125.0,642,0,52,185,0,1409,2.285047,237,16.155419
5,450399603001,1046,68902.0,1079,0,0,0,0,1046,0.969416,0,0.0
6,450399603002,890,,449,82,0,167,0,890,1.982183,249,27.977528
7,450399603003,1482,30625.0,678,61,118,160,0,1387,2.185841,339,22.874494
8,450399603004,2257,31346.0,982,4,170,126,0,2132,2.298371,300,13.291981
9,450399604001,962,26250.0,451,0,30,73,0,933,2.133038,103,10.706861


In [9]:
# Census Block Groups

In [10]:
def geojson_for_county(state_abbreviation="SC",
                       county_name="Fairfield County",
                       county_fips_code='45039',
                       data_path=DATA_PATH):
    '''
    Will only run if you have the full geometry/cbg.geojson file from the SafeGraph census dataset.
    Otherwise, don't run this function: call
    '''
    
    path_suffix = 'geometry/cbg.geojson'
    
    if county_fips_code == '45039':
        file_name = PREPROCESSED_DATA_PATH / 'geometry/fips' / county_fips_code / 'cbg.geojson'
        with open(file_name, 'r') as f:
            return json.loads('\n'.join(f.readlines()))
    
    else:
        geojson_path = data_path / path_suffix

        header = !head -n 5 {geojson_path}
        footer = !tail -n 2 {geojson_path}

        # lines to search file for county of interest.
        # must be found by inspection using "tail | head" method below, and checking whether 
        # the state of interest is included.
        # If not included, search up or down via binary search (file is sorted by state)
        # TODO: write the binary search explicitly here, if we need to generalize to other states/counties
        line_start_search = 170000
        line_end_search = 180000
        num_lines = line_end_search - line_start_search

        # stream = os.popen(f"""< {geojson_path} tail -n +{line_start_search} | head -n {num_lines} | grep '"State": "{state_abbreviation}", "County": "{county_name}"'  """)
        #stream = os.popen(f"""cat {geojson_path} | tail -n +{line_start_search} | head -n {num_lines} | grep '"State": "{state_abbreviation}", "County": "{county_name}"'  """)
        #county_cbgs = stream.readlines()

        county_cbgs = [linecache.getline(str(geojson_path), line_number).strip() for line_number in range(line_start_search, line_end_search)]
        county_cbgs = [line for line in county_cbgs if f'"State": "{state_abbreviation}", "County": "{county_name}"' in line]

        print(len(county_cbgs))

        # remove final character from last entry in list:
        # a trailing "," that will mess up the JSON parsing
        if county_cbgs[-1][-1] == ',':
            county_cbgs[-1] = county_cbgs[-1][:-1]

        geojson = '\n'.join(header + county_cbgs + footer)
        with open(PREPROCESSED_DATA_PATH / 'geometry/fips/' / county_fips_code / 'cbg.geojson', 'w') as f:
            f.write(geojson)
        return json.loads(geojson)
    

In [11]:
# only needs to be run once
geojson = geojson_for_county()

In [12]:
len([f['properties']['CensusBlockGroup'] for f in geojson['features']])
#[f['properties']['CensusBlockGroup'] for f in cbgs_json['features']]

18

In [13]:
#geojson['features'][0]['properties']