In [1]:
import geopandas as gpd
import geojson
import json
import pandas as pd
import folium
from pystac import ItemCollection
from pyproj import CRS
from branca.colormap import LinearColormap


In [2]:
# Read the shapefiles in as GeoDataFrames
US_LE= pd.read_csv('InputData/LifeExpectency_2010-2015.csv')
woo_tracts = gpd.read_file('InputData/woo_tracts_32619.shp')
#boundary = gpd.read_file('Boundary/WorcesterBoundary.shp')

In [3]:
# US_LE is a dataset with life expectancy numbers for every census tract in the US
# Lets cut it down a bit
woo_le = US_LE[US_LE['County'] == 'Worcester County, MA']
woo_le.head()

Unnamed: 0,State,County,Census Tract Number,Life Expectancy,Life Expectancy Range,Life Expectancy Standard Error
32546,Massachusetts,"Worcester County, MA",7001.0,,,
32547,Massachusetts,"Worcester County, MA",7011.0,79.7,79.6-81.6,1.319
32548,Massachusetts,"Worcester County, MA",7022.0,79.4,77.6-79.5,1.48
32549,Massachusetts,"Worcester County, MA",7031.0,73.7,56.9-75.1,1.4293
32550,Massachusetts,"Worcester County, MA",7032.0,78.5,77.6-79.5,1.3108


In [4]:
woo_tracts.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,AREA_ACRES,POP100_RE,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry
0,25,27,731900,25027731900,7319.0,Census Tract 7319,G5020,1089768.0,0.0,42.2774512,...,269.2685,5227,2255,190207,171752,4131,171752,1089695.0,5186.509038,"POLYGON ((270059.170 4684950.719, 270040.382 4..."
1,25,27,731700,25027731700,7317.0,Census Tract 7317,G5020,1258339.0,0.0,42.2638178,...,310.9188,2761,1511,190197,171742,4129,171742,1258249.0,5244.712623,"POLYGON ((269583.224 4682557.243, 269552.618 4..."
2,25,27,733101,25027733101,7331.01,Census Tract 7331.01,G5020,2904414.0,161203.0,42.2338553,...,757.4725,2058,860,190276,171821,4148,171821,3065395.0,8565.051482,"POLYGON ((267842.540 4680277.271, 267815.289 4..."
3,25,27,730100,25027730100,7301.0,Census Tract 7301,G5020,5633432.0,9059.0,42.3224582,...,1394.1897,5923,2459,190089,171634,4106,171634,5642108.0,12561.787121,"POLYGON ((269820.112 4689563.097, 269801.016 4..."
4,25,27,731102,25027731102,7311.02,Census Tract 7311.02,G5020,893826.0,0.0,42.2632052,...,220.8532,2511,1063,190160,171705,4121,171705,893764.6,4092.460307,"POLYGON ((267378.482 4682961.112, 267365.217 4..."


In [5]:
len(woo_tracts)

44

In [6]:
# The tract name formats don't fully line up. 
# Compare woo_tracts['NAME10'] and US_LE['Census Tract Number']
# We need to do some string and datatype operations to fix that
woo_tracts['NAME10'] = woo_tracts['NAME10'].apply(lambda x: str(x) + '.0' if '.' not in str(x) else str(x))
woo_tracts.head()

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,AREA_ACRES,POP100_RE,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry
0,25,27,731900,25027731900,7319.0,Census Tract 7319,G5020,1089768.0,0.0,42.2774512,...,269.2685,5227,2255,190207,171752,4131,171752,1089695.0,5186.509038,"POLYGON ((270059.170 4684950.719, 270040.382 4..."
1,25,27,731700,25027731700,7317.0,Census Tract 7317,G5020,1258339.0,0.0,42.2638178,...,310.9188,2761,1511,190197,171742,4129,171742,1258249.0,5244.712623,"POLYGON ((269583.224 4682557.243, 269552.618 4..."
2,25,27,733101,25027733101,7331.01,Census Tract 7331.01,G5020,2904414.0,161203.0,42.2338553,...,757.4725,2058,860,190276,171821,4148,171821,3065395.0,8565.051482,"POLYGON ((267842.540 4680277.271, 267815.289 4..."
3,25,27,730100,25027730100,7301.0,Census Tract 7301,G5020,5633432.0,9059.0,42.3224582,...,1394.1897,5923,2459,190089,171634,4106,171634,5642108.0,12561.787121,"POLYGON ((269820.112 4689563.097, 269801.016 4..."
4,25,27,731102,25027731102,7311.02,Census Tract 7311.02,G5020,893826.0,0.0,42.2632052,...,220.8532,2511,1063,190160,171705,4121,171705,893764.6,4092.460307,"POLYGON ((267378.482 4682961.112, 267365.217 4..."


In [7]:
woo_tracts_duplicates = woo_tracts[woo_tracts.duplicated(subset=['NAME10'], keep=False)]
woo_tracts_duplicates

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,AREA_ACRES,POP100_RE,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry


In [8]:
LE_duplicates = woo_le[woo_le.duplicated(subset=['Census Tract Number'], keep=False)]
print(len(woo_le))
print(len(LE_duplicates))
LE_duplicates

172
0


Unnamed: 0,State,County,Census Tract Number,Life Expectancy,Life Expectancy Range,Life Expectancy Standard Error


In [9]:
woo_le['Census Tract Number'] = woo_le['Census Tract Number'].astype(str)

# Perform the join
woo_tracts_le = woo_tracts.merge(woo_le[['Census Tract Number', 'Life Expectancy']], left_on='NAME10', right_on='Census Tract Number', how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  woo_le['Census Tract Number'] = woo_le['Census Tract Number'].astype(str)


In [10]:
# Display the resulting GeoDataFrame
print(len(woo_tracts_le))
woo_tracts_le.head()

44


Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,GEOID10,NAME10,NAMELSAD10,MTFCC10,ALAND10,AWATER10,INTPTLAT10,...,HU100_RE,LOGPL94171,LOGSF1,LOGACS0610,LOGSF1C,SHAPE_AREA,SHAPE_LEN,geometry,Census Tract Number,Life Expectancy
0,25,27,731900,25027731900,7319.0,Census Tract 7319,G5020,1089768.0,0.0,42.2774512,...,2255,190207,171752,4131,171752,1089695.0,5186.509038,"POLYGON ((270059.170 4684950.719, 270040.382 4...",7319.0,78.9
1,25,27,731700,25027731700,7317.0,Census Tract 7317,G5020,1258339.0,0.0,42.2638178,...,1511,190197,171742,4129,171742,1258249.0,5244.712623,"POLYGON ((269583.224 4682557.243, 269552.618 4...",7317.0,77.9
2,25,27,733101,25027733101,7331.01,Census Tract 7331.01,G5020,2904414.0,161203.0,42.2338553,...,860,190276,171821,4148,171821,3065395.0,8565.051482,"POLYGON ((267842.540 4680277.271, 267815.289 4...",7331.01,77.0
3,25,27,730100,25027730100,7301.0,Census Tract 7301,G5020,5633432.0,9059.0,42.3224582,...,2459,190089,171634,4106,171634,5642108.0,12561.787121,"POLYGON ((269820.112 4689563.097, 269801.016 4...",7301.0,79.2
4,25,27,731102,25027731102,7311.02,Census Tract 7311.02,G5020,893826.0,0.0,42.2632052,...,1063,190160,171705,4121,171705,893764.6,4092.460307,"POLYGON ((267378.482 4682961.112, 267365.217 4...",7311.02,82.5


In [11]:
# Great, now we have a life expectency number for each tract in Worcester
# We also have a lot of uncessary columns
print((woo_tracts_le.columns).tolist())

['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'SHAPE_AREA', 'SHAPE_LEN', 'geometry', 'Census Tract Number', 'Life Expectancy']


In [12]:
# Get rid of More columns
columns_to_delete = ['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'GEOID10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'SHAPE_AREA', 'SHAPE_LEN', 'Census Tract Number']

woo_tracts_le = woo_tracts_le.drop(columns=columns_to_delete)

woo_tracts_le.rename(columns={'NAME10': 'Tract'}, inplace=True)

In [13]:
woo_tracts_le

Unnamed: 0,Tract,geometry,Life Expectancy
0,7319.0,"POLYGON ((270059.170 4684950.719, 270040.382 4...",78.9
1,7317.0,"POLYGON ((269583.224 4682557.243, 269552.618 4...",77.9
2,7331.01,"POLYGON ((267842.540 4680277.271, 267815.289 4...",77.0
3,7301.0,"POLYGON ((269820.112 4689563.097, 269801.016 4...",79.2
4,7311.02,"POLYGON ((267378.482 4682961.112, 267365.217 4...",82.5
5,7325.0,"POLYGON ((269569.081 4682264.672, 269567.981 4...",72.9
6,7305.0,"POLYGON ((269091.659 4687128.029, 269093.735 4...",77.1
7,7304.02,"POLYGON ((270463.517 4685784.119, 270439.303 4...",79.8
8,7309.02,"POLYGON ((265513.664 4684567.206, 265554.903 4...",80.1
9,7303.0,"POLYGON ((270061.440 4686842.810, 270053.851 4...",79.9


In [14]:
# # Lets check out worcesterCensus
# print((worcester2020Census.columns).tolist())

In [15]:
# Lets visualize to investigate
# To visualize this data we have to reproject to 4326, which is the best CRS for folium
woo_tracts4326 = woo_tracts_le.to_crs(epsg=4326)
woo_tracts4326.head()

Unnamed: 0,Tract,geometry,Life Expectancy
0,7319.0,"POLYGON ((-71.78875 42.28287, -71.78896 42.282...",78.9
1,7317.0,"POLYGON ((-71.79357 42.26120, -71.79393 42.261...",77.9
2,7331.01,"POLYGON ((-71.81373 42.24018, -71.81405 42.239...",77.0
3,7301.0,"POLYGON ((-71.79348 42.32429, -71.79371 42.324...",79.2
4,7311.02,"POLYGON ((-71.82043 42.26418, -71.82057 42.263...",82.5


In [16]:
import folium
from branca.colormap import LinearColormap
import numpy as np

# Create a Folium map
m = folium.Map(location=[42.2626, -71.8023], zoom_start=12)

# Define colors for the colormap
colors = ['red', 'black', 'white']  # Red for NaN values, black for minimum, white for maximum

# Define the colormap
colormap = LinearColormap(colors=colors, vmin=50, vmax=90)

# Preprocess NaN values in the 'Life Expectancy' column
woo_tracts4326['Life Expectancy'] = woo_tracts4326['Life Expectancy'].fillna(-1)  # Fill NaN values with a unique value for red color

# Iterate over the rows of the LE_tracts4326 GeoDataFrame and add polygons to the map with color based on the "Life Expectancy" column
for idx, row in woo_tracts4326.iterrows():
    # Check the value of "Life Expectancy" for the current row
    #print(f"Census Tract: {row['Tract']}, Life Expectancy: {row['Life Expectancy']}")
    
    # Determine tooltip content
    life_expectancy = row['Life Expectancy']
    tooltip_text = f"Census Tract: {row['Tract']}, Life Expectancy: "
    tooltip_text += "NA" if life_expectancy == -1 else f"{life_expectancy}"
    
    # Style function for GeoJSON features
    style_function = lambda x, row=row: {
        'fillColor': colormap(row['Life Expectancy']),
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.7
    }
    
    # Add GeoJSON feature to the map with customized tooltip
    folium.GeoJson(row.geometry.__geo_interface__, 
                   style_function=style_function,
                   tooltip=tooltip_text).add_to(m)

# Display the map
m

In [17]:
woo_tracts_le.crs

<Projected CRS: EPSG:32619>
Name: WGS 84 / UTM zone 19N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 72°W and 66°W, northern hemisphere between equator and 84°N, onshore and offshore. Aruba. Bahamas. Brazil. Canada - New Brunswick (NB); Labrador; Nunavut; Nova Scotia (NS); Quebec. Colombia. Dominican Republic. Greenland. Netherlands Antilles. Puerto Rico. Turks and Caicos Islands. United States. Venezuela.
- bounds: (-72.0, 0.0, -66.0, 84.0)
Coordinate Operation:
- name: UTM zone 19N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [18]:
#woo_tracts_le.to_file("LE_tracts.shp")