### Load Libraries

In [5]:
import os
import math
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import numpy.ma as ma
import pandas as pd
import rioxarray as rxr
import rasterio
from rasterio import plot as rioplot
from rasterio.plot import plotting_extent
import geopandas as gpd

import rasterstats as rs # zonalstatistics function used to extract raster values
import earthpy as et
import earthpy.plot as ep

import uuid


### Load Raster Layers - Tree Data

In [6]:
# Load the CHM geotiff
file_name = '../data/greenness/NY_CHM_10Int260m.tif'
chm = rxr.open_rasterio(file_name, masked=True).squeeze()

# carbon density tiff
file_name = '../data/greenness/NY_CHM529_CCset0_Carbondensity.tif'
cd = rxr.open_rasterio(file_name, masked = True).squeeze()
# CD is really zero inflated which will impact zonal stats - set 0 equal to NaN
cd = cd.where(cd != 0, np.nan)

# crown area tiff
file_name = '../data/L1/crown_area_ha.tiff'
ca = rxr.open_rasterio(file_name, masked = True).squeeze()



In [7]:
# tree density
file_name = '../data/L1/tree_kde_dens_1ha_scaled.tiff'
td = rxr.open_rasterio(file_name, masked = True).squeeze()

In [8]:
# heat anomalies
file_name = '../data/heat/f_deviation_smooth_UTM18N_1ha.tiff'
ha = rxr.open_rasterio(file_name, masked = True).squeeze()

### Load Raster Layers - Housing Density

In [25]:
# Subsidized Kernel Density - Scaled
file_name = '../data/L1/KDE_subsidized_1ha_scaled.tiff'
sub_kde_scaled = rxr.open_rasterio(file_name, masked=True).squeeze()

# Rent Stabilized Kernel Density - Scaled
file_name = '../data/L1/KDE_RentStabilized_1ha_scaled.tiff'
rstab_kde_scaled = rxr.open_rasterio(file_name, masked=True).squeeze()

# All affordable housing
file_name = '../data/L1/KDE_allaffordable_1ha_scaled_match.tiff'
aff_kde_scaled = rxr.open_rasterio(file_name, masked=True).squeeze()

### Load the NYC Neighborhood Tabulation Areas (NTAs)

- https://www.nyc.gov/assets/planning/download/pdf/planning-level/nyc-population/census2010/ntas.pdf

Neighborhood Tabulation Areas or NTAs, are aggregations of
census tracts that are subsets of New York City's 55 Public Use
Microdata Areas (PUMAs). Primarily due to these constraints,
NTA boundaries and their associated names may not definitively
represent neighborhoods.

In [10]:
nta = gpd.read_file("../data/2010 Neighborhood Tabulation Areas (NTAs)/", layer = "geo_export_65077844-70de-4bb7-91ce-729f59546bad")
nta.to_crs(epsg=6347, inplace=True)

### Extract Values to NTAs

In [11]:
# If the dir does not exist, create it
output_path = os.path.join("../data/L1/", 
                           "outputs")

if not os.path.isdir(output_path):
    os.mkdir(output_path)

# Export the buffered point layer as a shapefile to use in zonal stats
NTA_path = os.path.join(output_path, 
                                "NTA_EPSG6347.shp")
nta = nta[["ntacode", "geometry"]]
nta.to_file(NTA_path)

In [12]:
# Extract NTA zonal stats from canopy height model
# Extract zonal stats from chm
extracted_chm = rs.zonal_stats(NTA_path,
                               chm.values,
                               affine=chm.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

In [13]:
# Turn extracted data into a pandas geodataframe
chm_df = gpd.GeoDataFrame.from_features(extracted_chm)



chm_df = chm_df.rename(columns = {"min": "chm_min",
                      "max": "chm_max",
                      "mean": "chm_mean",
                      "median": "chm_median",
                    "percentile_25": "chm_25_percentile",
                    "percentile_75": "chm_75_percentile"})

In [14]:
# Extract NTA zonal stats from carbon density
# Extract zonal stats from chm
extracted_cd = rs.zonal_stats(NTA_path,
                               cd.values,
                               affine=cd.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

In [15]:
# Turn extracted data into a pandas geodataframe
cd_df = gpd.GeoDataFrame.from_features(extracted_cd)



cd_df = cd_df.rename(columns = {"min": "cd_min",
                      "max": "cd_max",
                      "mean": "cd_mean",
                      "median": "cd_median",
                    "percentile_25": "cd_25_percentile",
                    "percentile_75": "cd_75_percentile"})

In [47]:
# Extract NTA zonal stats from canopy area
# Extract zonal stats from chm
extracted_ca = rs.zonal_stats(NTA_path,
                               ca.values,
                               affine=ca.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
ca_df = gpd.GeoDataFrame.from_features(extracted_ca)



ca_df = ca_df.rename(columns = {"min": "cc_min",
                      "max": "cc_max",
                      "mean": "cc_mean",
                      "median": "cc_median",
                    "percentile_25": "cc_25_percentile",
                    "percentile_75": "cc_75_percentile"})

ca_df.drop('geometry', axis = 1, inplace = True)

In [48]:
# Extract NTA zonal stats from tree density
# Extract zonal stats from chm
extracted_td = rs.zonal_stats(NTA_path,
                               td.values,
                               affine=td.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
td_df = gpd.GeoDataFrame.from_features(extracted_td)



td_df = td_df.rename(columns = {"min": "td_min",
                      "max": "td_max",
                      "mean": "td_mean",
                      "median": "td_median",
                    "percentile_25": "td_25_percentile",
                    "percentile_75": "td_75_percentile"})

td_df.drop('geometry', axis = 1, inplace = True)

In [49]:
# Extract NTA zonal stats from subsidized housing kde
# Extract zonal stats from chm
extracted_sub = rs.zonal_stats(NTA_path,
                               sub_kde_scaled.values,
                               affine=sub_kde_scaled.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
sub_df = gpd.GeoDataFrame.from_features(extracted_sub)



sub_df = sub_df.rename(columns = {"min": "sub_min",
                      "max": "sub_max",
                      "mean": "sub_mean",
                      "median": "sub_median",
                    "percentile_25": "sub_25_percentile",
                    "percentile_75": "sub_75_percentile"})

sub_df.drop('geometry', axis = 1, inplace = True)

In [50]:
# Extract NTA zonal stats from rent stabilized housing kde
# Extract zonal stats from chm
extracted_rstab = rs.zonal_stats(NTA_path,
                               rstab_kde_scaled.values,
                               affine=rstab_kde_scaled.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
rstab_df = gpd.GeoDataFrame.from_features(extracted_rstab)



rstab_df = rstab_df.rename(columns = {"min": "rstab_min",
                      "max": "rstab_max",
                      "mean": "rstab_mean",
                      "median": "rstab_median",
                    "percentile_25": "rstab_25_percentile",
                    "percentile_75": "rstab_75_percentile"})
rstab_df.drop('geometry', axis = 1, inplace = True)


In [51]:
# ALL affordable housing
extracted_aff = rs.zonal_stats(NTA_path,
                               aff_kde_scaled.values,
                               affine=aff_kde_scaled.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
aff_df = gpd.GeoDataFrame.from_features(extracted_aff)



aff_df = aff_df.rename(columns = {"min": "aff_min",
                      "max": "aff_max",
                      "mean": "aff_mean",
                      "median": "aff_median",
                    "percentile_25": "aff_25_percentile",
                    "percentile_75": "aff_75_percentile"})
aff_df.drop('geometry', axis = 1, inplace = True)

In [52]:
# ALL hean anomalies
extracted_ha = rs.zonal_stats(NTA_path,
                               ha.values,
                               affine=ha.rio.transform(),
                               nodata = -math.inf,
                               geojson_out=True,
                               copy_properties=True,
                               stats="min mean max median percentile_25 percentile_75")

# Turn extracted data into a pandas geodataframe
ha_df = gpd.GeoDataFrame.from_features(extracted_ha)



ha_df = ha_df.rename(columns = {"min": "ha_min",
                      "max": "ha_max",
                      "mean": "ha_mean",
                      "median": "ha_median",
                    "percentile_25": "ha_25_percentile",
                    "percentile_75": "ha_75_percentile"})
ha_df.drop('geometry', axis = 1, inplace = True)

In [53]:
# MERGE

housing = pd.merge(rstab_df, sub_df, on="ntacode")

housing = pd.merge(housing, aff_df, on = "ntacode")
len(housing) == len(rstab_df) == len(sub_df) == len(aff_df)

True

In [54]:
housing.columns

Index(['ntacode', 'rstab_min', 'rstab_max', 'rstab_mean', 'rstab_median',
       'rstab_25_percentile', 'rstab_75_percentile', 'sub_min', 'sub_max',
       'sub_mean', 'sub_median', 'sub_25_percentile', 'sub_75_percentile',
       'aff_min', 'aff_max', 'aff_mean', 'aff_median', 'aff_25_percentile',
       'aff_75_percentile'],
      dtype='object')

In [55]:
# merge tree and temp anomoly data
trees = pd.merge(chm_df, td_df, on="ntacode")
trees = pd.merge(trees, ca_df, on = "ntacode")
trees = pd.merge(trees, cd_df, on = "ntacode")
trees = pd.merge(trees, ha_df, on = 'ntacode')


In [56]:
# merge full
full = pd.merge(trees, housing, on = "ntacode")

In [57]:
full.columns

Index(['geometry', 'ntacode', 'chm_min', 'chm_max', 'chm_mean', 'chm_median',
       'chm_25_percentile', 'chm_75_percentile', 'td_min', 'td_max', 'td_mean',
       'td_median', 'td_25_percentile', 'td_75_percentile', 'cc_min', 'cc_max',
       'cc_mean', 'cc_median', 'cc_25_percentile', 'cc_75_percentile',
       'cd_min', 'cd_max', 'cd_mean', 'cd_median', 'cd_25_percentile',
       'cd_75_percentile', 'ha_min', 'ha_max', 'ha_mean', 'ha_median',
       'ha_25_percentile', 'ha_75_percentile', 'rstab_min', 'rstab_max',
       'rstab_mean', 'rstab_median', 'rstab_25_percentile',
       'rstab_75_percentile', 'sub_min', 'sub_max', 'sub_mean', 'sub_median',
       'sub_25_percentile', 'sub_75_percentile', 'aff_min', 'aff_max',
       'aff_mean', 'aff_median', 'aff_25_percentile', 'aff_75_percentile'],
      dtype='object')

In [60]:
full_df = pd.DataFrame(full)

In [64]:
full_df.head()

Unnamed: 0,geometry,ntacode,chm_min,chm_max,chm_mean,chm_median,chm_25_percentile,chm_75_percentile,td_min,td_max,...,sub_mean,sub_median,sub_25_percentile,sub_75_percentile,aff_min,aff_max,aff_mean,aff_median,aff_25_percentile,aff_75_percentile
0,"POLYGON ((605421.903 4506784.225, 605448.810 4...",QN08,21.0,310.0,104.842549,102.0,67.0,140.0,-0.861108,-0.570612,...,-0.99839,-0.999623,-0.999945,-0.997748,-0.999967,-0.919046,-0.983191,-0.989676,-0.996343,-0.975634
1,"POLYGON ((593764.952 4526515.005, 593765.373 4...",BX28,21.0,596.0,116.107804,115.0,73.0,155.0,-0.879047,0.029427,...,-0.870812,-0.882188,-0.943205,-0.809557,-0.995159,-0.65379,-0.880516,-0.889148,-0.951447,-0.831231
2,"POLYGON ((600917.576 4504246.165, 600939.518 4...",QN55,21.0,320.0,98.258749,94.0,63.0,131.0,-0.943098,-0.635548,...,-0.959981,-0.963207,-0.985469,-0.939123,-0.999932,-0.94743,-0.987632,-0.994901,-0.998649,-0.979581
3,"POLYGON ((586207.563 4501643.060, 586204.695 4...",BK40,21.0,494.0,103.394134,102.0,72.0,131.0,-0.811371,-0.446587,...,-0.421639,-0.4225,-0.503374,-0.348141,-0.908128,-0.380261,-0.70623,-0.777376,-0.85501,-0.552042
4,"MULTIPOLYGON (((593990.695 4500133.738, 593989...",BK50,21.0,492.0,96.78554,95.0,64.0,126.0,-0.993603,-0.599328,...,-0.836995,-0.858412,-0.924801,-0.774021,-0.999629,-0.825703,-0.978464,-0.988881,-0.991963,-0.97581


In [63]:
full_df.to_csv("../data/L1/NYC_NTA_extracted.csv")