In [1]:
#!pip install rioxarray
import dask.array as da
import pystac_client
from pystac_client import Client
import leafmap
from datetime import datetime
import dask
import planetary_computer as pc
import geogif
import numpy as np
import xarray as xr
import rioxarray
import geopandas as gpd
import matplotlib.pyplot as plt
import geojson
import json
from geogif import dgif, gif
import pandas as pd
import shapely
from shapely.geometry import mapping
from shapely.geometry import box
import folium
from pystac import ItemCollection
from pyproj import CRS
from branca.colormap import LinearColormap


In [4]:
# Read the shapefiles in as GeoDataFrames
censusTracts = gpd.read_file('CENSUS2010TRACTS_POLY.shp')
p1 = pd.read_csv('census2010_p1.csv')
p2 = pd.read_csv('census2010_p2.csv')
boundary = gpd.read_file('Boundary/WorcesterBoundary.shp')

In [96]:
# Check CRSs
# The census tracts are in Mass Mainland
print(boundary.crs)
print(censusTracts.crs)

EPSG:3585
EPSG:26986


In [5]:
# Lets bring the layers into 3857 as the CRS to work with here, as it is preferable for web mapping
# Reproject the boundary layer to EPSG:3857
boundary = boundary.to_crs(epsg=3857)
censusTracts = censusTracts.to_crs(epsg=3857)


In [7]:
print(type(p1))
print(type(p2))
print(type(censusTracts))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'geopandas.geodataframe.GeoDataFrame'>


In [8]:
print((censusTracts.columns).tolist())
print((p1.columns).tolist())
print((p2.columns).tolist())

['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'SHAPE_AREA', 'SHAPE_LEN', 'geometry']
['OID_', 'FILEID', 'STUSAB', 'CHARITER', 'CIFSN', 'LOGRECNO', 'P0010001', 'P0010002', 'P0010003', 'P0010004', 'P0010005', 'P0010006', 'P0010007', 'P0010008', 'P0010009', 'P0010010', 'P0010011', 'P0010012', 'P0010013', 'P0010014', 'P0010015', 'P0010016', 'P0010017', 'P0010018', 'P0010019', 'P0010020', 'P0010021', 'P0010022', 'P0010023', 'P0010024', 'P0010025', 'P0010026', 'P0010027', 'P0010028', 'P0010029', 'P0010030', 'P0010031', 'P0010032', 'P0010033', 'P0010034', 'P0010035', 'P0010036', 'P0010037', 'P0010038', 'P0010039', 'P0010040', 'P0010041', 'P0010042', 'P0010043', 'P0010044', 'P0010045', 'P0010046', 'P0010047', 'P0010048', 'P0010049', 'P0010050', 'P0010051', 'P0010052', 'P0010053', 'P0010054', 'P0010055', 

In [9]:
# Join attributes from p1 and p1 to census tract polygons
# According to https://www.mass.gov/info-details/massgis-data-2010-us-census :
# [the column] "LOGPL94171	Joins to the LOGRECNO in the PL-94-171 tables"
# Feels fairly esoteric, this whole scheme. Census could learn a thing or two about table normalization???

# To merge we have to change the data type of the common column
censusTracts['LOGPL94171'] = censusTracts['LOGPL94171'].astype('int64')

# First merge p1 and p2 on LOGRECNO
p_merged = p1.merge(p2, on='LOGRECNO', how='inner')

# Perform the merge with censusTracts
census2010 = censusTracts.merge(p_merged, left_on='LOGPL94171', right_on='LOGRECNO', how='inner')


In [10]:
print(census2010.shape[0])
print(type(census2010))

1472
<class 'geopandas.geodataframe.GeoDataFrame'>


In [11]:
# Check that it now has all the columns
print((census2010.columns).tolist())

['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'SHAPE_AREA', 'SHAPE_LEN', 'geometry', 'OID__x', 'FILEID_x', 'STUSAB_x', 'CHARITER_x', 'CIFSN_x', 'LOGRECNO', 'P0010001', 'P0010002', 'P0010003', 'P0010004', 'P0010005', 'P0010006', 'P0010007', 'P0010008', 'P0010009', 'P0010010', 'P0010011', 'P0010012', 'P0010013', 'P0010014', 'P0010015', 'P0010016', 'P0010017', 'P0010018', 'P0010019', 'P0010020', 'P0010021', 'P0010022', 'P0010023', 'P0010024', 'P0010025', 'P0010026', 'P0010027', 'P0010028', 'P0010029', 'P0010030', 'P0010031', 'P0010032', 'P0010033', 'P0010034', 'P0010035', 'P0010036', 'P0010037', 'P0010038', 'P0010039', 'P0010040', 'P0010041', 'P0010042', 'P0010043', 'P0010044', 'P0010045', 'P0010046', 'P0010047', 'P0010048', 'P0010049', 'P0010050', 'P0010051', 'P0010052', 'P0010053', 'P0010054', 'P0

In [12]:
# Clip it to Worcester city boundary
worcester2010Census = gpd.clip(census2010, boundary)

In [13]:
# At this point the gdf has a lot of columns, most of which are, for the purposes of this project, unecessary
# Rename the ones we need
# I am referencing https://www.arcgis.com/home/item.html?id=abd94a6cc94645f88811ae91802909a0
# and the column aliases as viewed in the attribute table

# Mapping of old column names to new column names
column_mapping = {
    'P0010001': 'Total Pop',
    'P0010002': 'One race',
    'P0010003': 'White alone',
    'P0010004': 'Black',
    'P0010005': 'American Indian',
    'P0010006': 'Asian',
    'P0010007': 'Pacific Islander',
    'P0010008': 'Other Race',
    'P0020002': 'Hispanic',
    'PCT_P00300': 'Percent 18 Years and Over',
    'PCT_P00200': 'Percent Hispanic',
    'PCT_P002_1': 'Percent White',
    'PCT_P002_2': 'Percent Black',
    'PCT_P002_3': 'Percent American Indian',
    'PCT_P002_4': 'Percent Asian',
    'PCT_P002_5': 'Percent Pacific Islander',
    'PCT_P002_6': 'Percent Other Race',
    'PCT_P002_7': 'Percent two or more races',
    'PCT_H00100': 'Percent of Housing Occupied',
    'PCT_H001_1': 'Percent of Housing Vacant'
}

# Rename columns
worcester2010Census = worcester2010Census.rename(columns=column_mapping)


In [14]:
# Now lets get rid of unecessary columns
# Filter out columns that start with "P00"
columns_to_keep = [col for col in worcester2010Census.columns if not (col.startswith('P00') or col.startswith('H00'))]

# Select only the columns you want to keep
worcester2010Census = worcester2010Census[columns_to_keep]


In [15]:
print((worcester2010Census.columns).tolist())

['STATEFP10', 'COUNTYFP10', 'TRACTCE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'SHAPE_AREA', 'SHAPE_LEN', 'geometry', 'OID__x', 'FILEID_x', 'STUSAB_x', 'CHARITER_x', 'CIFSN_x', 'LOGRECNO', 'Total Pop', 'One race', 'White alone', 'Black', 'American Indian', 'Asian', 'Pacific Islander', 'Other Race', 'OID__y', 'FILEID_y', 'STUSAB_y', 'CHARITER_y', 'CIFSN_y', 'Hispanic']


In [16]:
# Get rid of More columns
columns_to_delete = ['STATEFP10', 'COUNTYFP10', 'TRACTCE10','NAME10', 'NAMELSAD10', 'MTFCC10', 'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'AREA_SQFT', 'AREA_ACRES', 'POP100_RE', 'HU100_RE', 'LOGPL94171', 'LOGSF1', 'LOGACS0610', 'LOGSF1C', 'OID__x', 'FILEID_x', 'STUSAB_x', 'CHARITER_x', 'CIFSN_x', 'LOGRECNO','OID__y', 'FILEID_y', 'STUSAB_y', 'CHARITER_y', 'CIFSN_y']

worcester2010Census = worcester2010Census.drop(columns=columns_to_delete)

In [17]:
# Because this data did not come from ESRI's well curated census data,
# it does not have percentages calculated. So lets do that ourselves
# List of population columns
population_columns = ['White alone', 'Black', 'American Indian', 'Asian', 'Pacific Islander', 'Other Race', 'Hispanic']

# Calculate the percentage columns
for column in population_columns:
    percentage_column = 'Percent ' + column.split()[0]  # Extracting the race/ethnicity from the column name
    worcester2010Census[percentage_column] = (worcester2010Census[column] / worcester2010Census['Total Pop']) * 100


In [18]:
print((worcester2010Census.columns).tolist())

['GEOID10', 'SHAPE_AREA', 'SHAPE_LEN', 'geometry', 'Total Pop', 'One race', 'White alone', 'Black', 'American Indian', 'Asian', 'Pacific Islander', 'Other Race', 'Hispanic', 'Percent White', 'Percent Black', 'Percent American', 'Percent Asian', 'Percent Pacific', 'Percent Other', 'Percent Hispanic']


In [19]:
# create a nice 'Tract' column
worcester2010Census['GEOID10'] = worcester2010Census['GEOID10'].astype(str)

# Extract the last 6 digits of each 'GEOID' and assign it to the 'Tract' column
worcester2010Census['Tract'] = worcester2010Census['GEOID10'].str[-6:]

In [22]:
worcester2010Census = worcester2010Census.drop(columns=['GEOID10', 'SHAPE_AREA', 'SHAPE_LEN'])

In [23]:
worcester2010Census.sort_values(by='Tract')

Unnamed: 0,geometry,Total Pop,One race,White alone,Black,American Indian,Asian,Pacific Islander,Other Race,Hispanic,Percent White,Percent Black,Percent American,Percent Asian,Percent Pacific,Percent Other,Percent Hispanic,Tract
1289,"MULTIPOLYGON (((-8001194.472 5203956.333, -800...",4806,4744,4551,110,4,62,1,16,151,94.694132,2.288806,0.083229,1.290054,0.020807,0.332917,3.141906,727100
1273,"MULTIPOLYGON (((-7996506.189 5207810.392, -799...",4126,4058,3845,48,4,144,0,17,98,93.18953,1.163354,0.096946,3.490063,0.0,0.412021,2.375182,728100
1309,"MULTIPOLYGON (((-7993447.257 5210348.298, -799...",3713,3664,3502,41,2,103,0,16,71,94.317264,1.104228,0.053865,2.774037,0.0,0.430918,1.9122,728200
712,"MULTIPOLYGON (((-7989270.436 5207875.417, -798...",4997,4962,4589,282,17,29,1,44,343,91.835101,5.643386,0.340204,0.580348,0.020012,0.880528,6.864118,729100
1276,"MULTIPOLYGON (((-7990987.331 5211737.907, -799...",2672,2656,2569,41,0,23,1,22,61,96.14521,1.534431,0.0,0.860778,0.037425,0.823353,2.282934,729200
663,"POLYGON ((-7992039.289 5209664.258, -7992064.8...",5923,5765,4761,590,17,294,3,100,399,80.381563,9.961168,0.287017,4.963701,0.05065,1.688334,6.736451,730100
1400,"POLYGON ((-7990797.196 5211232.064, -7990481.9...",5583,5470,4643,489,9,173,2,154,435,83.163174,8.758732,0.161204,3.098692,0.035823,2.758374,7.79151,730200
737,"POLYGON ((-7991575.293 5205918.653, -7991579.5...",4559,4399,3860,260,18,150,0,111,378,84.66769,5.703005,0.394823,3.290195,0.0,2.434744,8.291292,730300
1360,"POLYGON ((-7989747.209 5206351.212, -7989535.5...",5444,5244,3689,1043,25,96,1,390,1164,67.762675,19.158707,0.459221,1.763409,0.018369,7.16385,21.381337,730401
734,"POLYGON ((-7991010.897 5204572.632, -7991084.8...",1660,1594,1116,234,3,74,0,167,417,67.228916,14.096386,0.180723,4.457831,0.0,10.060241,25.120482,730402


In [32]:
len(worcester2010Census)

60

In [24]:
worcester2010Census['Tract'].nunique()

60

In [25]:
# To visualize this data we have to reproject to 4326, which is the best CRS for folium
worcester2010Census4326 = worcester2010Census.to_crs(epsg=4326)

In [31]:
import folium

# Create a Folium map
m = folium.Map(location=[42.2626, -71.8023], zoom_start=10)
colormap = LinearColormap(colors=['green', 'white'], vmin=17.7, vmax=88.7)

# Iterate over the rows of the worcesterCensus4326 GeoDataFrame and add polygons to the map with color based on the "Percent White" column
for idx, row in worcester2010Census4326.iterrows():
    # Check the value of "Percent White" for the current row
    print(f"Census Tract: {row['Tract']}, Percent White: {row['Percent White']}")
    
    # Style function for GeoJSON features
    style_function = lambda x, row=row: {
        'fillColor': colormap(row['Percent White']),
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.7
    }
    # Add GeoJSON feature to the map
    #folium.GeoJson(row.geometry.__geo_interface__, style_function=style_function).add_to(m)
    # Add GeoJSON feature to the map with tooltip showing "Percent White" value
    
    folium.GeoJson(row.geometry.__geo_interface__, 
                   style_function=style_function,
                   tooltip=f"Census Tract: {row['Tract']}<br>Percent White: {row['Percent White']}").add_to(m)
# Display the map
m


Census Tract: 737100, Percent White: 94.83971853010165
Census Tract: 736500, Percent White: 96.63538567177844
Census Tract: 736300, Percent White: 94.60016488046166
Census Tract: 736200, Percent White: 94.35787211176788
Census Tract: 737300, Percent White: 93.63863707408565
Census Tract: 736400, Percent White: 93.91077824812764
Census Tract: 732901, Percent White: 70.11529126213593
Census Tract: 732802, Percent White: 71.25181950509462
Census Tract: 733101, Percent White: 79.83479105928085
Census Tract: 733102, Percent White: 72.84172661870504
Census Tract: 732801, Percent White: 79.97515013460344
Census Tract: 735200, Percent White: 91.00305379375146
Census Tract: 761300, Percent White: 93.59267734553775
Census Tract: 732902, Percent White: 86.05124034160228
Census Tract: 732700, Percent White: 64.06805485017776
Census Tract: 731204, Percent White: 49.03923138510808
Census Tract: 733000, Percent White: 50.79744816586922
Census Tract: 732600, Percent White: 66.6588290618387
Census Trac

In [27]:
# Check the range of values in the "Percent White" column
print(worcester2010Census['Percent White'].describe())

count    60.000000
mean     75.082830
std      15.460251
min      39.729237
25%      65.355804
50%      75.005656
75%      88.099622
max      96.635386
Name: Percent White, dtype: float64


In [28]:
# # to further inspect the data:
# worcester2010Census.to_csv("wooCensus2010.csv")

In [28]:
# For this project we are using 32619, so transform the CRS before downloading as .shp file
wooCensus2010 = worcester2010Census.to_crs(epsg=32619)

In [29]:
wooCensus2010.crs

<Projected CRS: EPSG:32619>
Name: WGS 84 / UTM zone 19N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 72°W and 66°W, northern hemisphere between equator and 84°N, onshore and offshore. Aruba. Bahamas. Brazil. Canada - New Brunswick (NB); Labrador; Nunavut; Nova Scotia (NS); Quebec. Colombia. Dominican Republic. Greenland. Netherlands Antilles. Puerto Rico. Turks and Caicos Islands. United States. Venezuela.
- bounds: (-72.0, 0.0, -66.0, 84.0)
Coordinate Operation:
- name: UTM zone 19N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [30]:
wooCensus2010.to_file("wooCensus2010.shp")