In [None]:
# Set Up Analysis Part 1 of 

# Import libraries to help with ...

# Reproducible file paths
import os # Reproducible file paths
from glob import glob # Find files by pattern
import pathlib # Find the home folder
import time # formatting time
import warnings # Filter warning messages
import zipfile # Work with zip files
from io import BytesIO # Stream binary (zip) files

# Find files by pattern
import numpy as np # adjust images 
import matplotlib.pyplot as plt # Overlay pandas and xarry plots, Overlay raster and vector data
import requests # Request data over HTTP

# Work with tabular, vector, and raster data
import cartopy.crs as ccrs # CRSs (Coordinate Reference Systems)
import geopandas as gpd # work with vector data
import geoviews as gv # holoviews extension for data visualization
import hvplot.pandas # Interactive tabular and vector data
import hvplot.xarray # Interactive raster
import pandas as pd # Group and aggregate
import pystac_client # Modify returns from API
import shapely # Perform geometric operations on spatial data
import xarray as xr # Adjust images
import rioxarray as rxr # Work with geospatial raster data
from rioxarray.merge import merge_arrays # Merge rasters

# Processing and regression related
from scipy.ndimage import convolve # Image and signal processing
from sklearn.model_selection import KFold # Cross validation
from scipy.ndimage import label # Labels connected features in an array
from sklearn.linear_model import LinearRegression # Work with linear regression models
from sklearn.model_selection import train_test_split # Split data into subsets - evaluate model
from tqdm.notebook import tqdm # Visualize progress of iterative operations

# import to be able to save plots
import holoviews as hv # be able to save hvplots

# Suppress third party warnings - 'ignore'
warnings.simplefilter('ignore')

# Prevent GDAL from quitting due to momentary disruptions
os.environ["GDAL_HTTP_MAX_RETRY"] = "5"
os.environ["GDAL_HTTP_RETRY_DELAY"] = "1"

In [None]:
# Set Up Analysis Part 2 of 

# Set up census tract path
# Define and create the project data directory
den_census_tracts_data_dir = os.path.join(
    pathlib.Path.home(),
    'documents',
    'earth-analytics',
    'urban_greenspace_denver'
)
os.makedirs(den_census_tracts_data_dir, exist_ok=True)

# Call the data dir to confirm location
den_census_tracts_data_dir

In [None]:
# Download the census tracts from CDC (only once) Part 1 of 1

# Define info for census tract download
den_census_tracts_dir = os.path.join(den_census_tracts_data_dir, 'denver-tract')
os.makedirs(den_census_tracts_dir, exist_ok=True)
den_census_tracts_path = os.path.join(den_census_tracts_dir, '*.shp')

# Only download once (conditional statement)
if not os.path.exists(den_census_tracts_path):
    den_census_tracts_url = (
    'https://data.cdc.gov/download/x7zy-2xmx/application%2Fzip'
    )
    den_census_tracts_gdf = gpd.read_file(den_census_tracts_url)
    denver_tracts_gdf = den_census_tracts_gdf[den_census_tracts_gdf.PlaceName=='Denver']
    denver_tracts_gdf.to_file(den_census_tracts_path, index=False)

# Load in the census tract data
denver_tracts_gdf = gpd.read_file(den_census_tracts_path)

# Call the chicago tracts gdf to see it
denver_tracts_gdf.head()

In [None]:
# Download the census tracts for state of CO (only once) Part 1 of 1

# Define info for census tract download
den_tiger_tracts_dir = os.path.join(den_census_tracts_data_dir, 'colorado-tracts')
os.makedirs(den_tiger_tracts_dir, exist_ok=True)
den_tiger_tracts_path = os.path.join(den_tiger_tracts_dir, '*.shp')

# Only download once (conditional statement)
if not os.path.exists(den_tiger_tracts_path):
    co_tiger_tracts_url = (
    'https://www2.census.gov/geo/tiger/TIGER2024/TRACT/tl_2024_08_tract.zip'
    )
    co_tiger_tracts_gdf = gpd.read_file(co_tiger_tracts_url)
    # COUNTYFP 031 is Cook County which contains the city of Chicago
    den_tiger_tracts_gdf = co_tiger_tracts_gdf[co_tiger_tracts_gdf.COUNTYFP=='031']
    den_tiger_tracts_gdf.to_file(den_tiger_tracts_path, index=False)

# Load in the census tract data
den_tiger_tracts_gdf = gpd.read_file(den_tiger_tracts_path)

# Call the chicago tracts gdf to see it
den_tiger_tracts_gdf.head()

In [None]:
# Perform a spatial join for census tracts at least partially 
# within City of Chicago boundary

# this new gdf needs to be joined to the previous one from CDC which
# is already clipped to the city boundary, so no need to download a 
# seperate city boundary shapefile which reduces the amount of things 
# being downloaded

# Define new variable for the joind gdf
joined_den_tracts_gdf = (
    gpd.sjoin(
        # TIGER tracts gdf - only need tracts that intersect with..
        den_tiger_tracts_gdf.to_crs(ccrs.Mercator()),
        # CDC tracts gdf - which are already clipped to the Chicago city boundary
        denver_tracts_gdf.to_crs(ccrs.Mercator()), 
        # Specify type of join ("inner", "left", "right")
        how="inner", 
        # Specify the spatial relationship ("intersects", "within", "contains")
        predicate="intersects"
        )
)

# Explore the result
joined_den_tracts_gdf.head()

In [None]:
# Try to see how many rows there are because I think there's duplicates
num_rows = joined_den_tracts_gdf.shape[0]
print("Number of rows:", num_rows)

In [None]:
# Drop duplicate geometries

# Normalize the geometry column to ensure consistent representation
joined_den_tracts_gdf['geometry'] = joined_den_tracts_gdf.geometry.normalize()

# Drop duplicate rows based on the geometry column
dropped_joined_den_tracts_gdf = joined_den_tracts_gdf.drop_duplicates(subset='geometry')

# Call the gdf to see it
dropped_joined_den_tracts_gdf.head()

In [None]:
# Site plot -- Census tracts with satellite imagery in the background
# Create new variable for plot in order to save it later
joined_denver_tracts_plot = dropped_joined_den_tracts_gdf.to_crs(
# Use hvplot to plot and set parameters
ccrs.Mercator()).hvplot(
    geo=True, crs=ccrs.Mercator(),
    tiles='EsriImagery',
    title='City of Denver - Site Plot',
    fill_color=None, line_color='darkorange', 
    line_width=3, frame_width=600,
    xaxis=None, yaxis=None,
)

# Save the plot as html to be able to display online
hv.save(joined_denver_tracts_plot, 'joined_den_site_plot_using_tiger_and_cdc.html')  

# Display the plot
joined_denver_tracts_plot