# Data Alignment
---

## Set up Notebook
---

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import fiona
import geopandas as gpd
import matplotlib.pyplot as plt
import rasterio
from rasterio import features
from rasterio.enums import MergeAlg
from rasterio.plot import show
import numpy as np
from shapely.geometry import Polygon, MultiPolygon, shape, Point

from osgeo import gdal
from osgeo import ogr

# from dask_rasterio import read_raster, write_raster

import os
from sklearn.preprocessing import OrdinalEncoder

## Set Primary Raster
---

This is the raster we will be aligning all of our other data sets to.

Raster: NYC_DEM_1ft_Int (elevation)

In [3]:
rst = '/workspace/data-sets/NYC_DEM_1ft_Int/DEM_LiDAR_1ft_2010_Improved_NYC_int.tif'
raster = rasterio.open(rst)
print('bands: ', raster.count)
print('width: ', raster.width)
print('height: ', raster.height)
print('Bounds: ', raster.bounds)
print('dtype: ', raster.dtypes)

base_wdith = raster.width
base_height = raster.height
base_count = raster.count
base_bounds = raster.bounds
base_transform = raster.transform
base_dtype = raster.dtypes

base_crs = raster.crs
print(base_crs)

ras_gd = gdal.Open(rst)
rst_proj = ras_gd.GetProjection()
rst_gtm = ras_gd.GetGeoTransform()

bands:  1
width:  158100
height:  156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
dtype:  ('uint16',)
EPSG:2263


In [4]:
# Set Data Ouput
rev = '10.9.2022'
op = '/workspace/data-sets/transformations/'
out_path = os.path.join(op, rev)

# Make output directory
# os.mkdir(out_path)

In [5]:
rst_proj 

'PROJCS["NAD83 / New York Long Island (ftUS)",GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.257222100887,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4269"]],PROJECTION["Lambert_Conformal_Conic_2SP"],PARAMETER["latitude_of_origin",40.1666666666667],PARAMETER["central_meridian",-74],PARAMETER["standard_parallel_1",41.0333333333333],PARAMETER["standard_parallel_2",40.6666666666667],PARAMETER["false_easting",984250],PARAMETER["false_northing",0],UNIT["US survey foot",0.304800609601219,AUTHORITY["EPSG","9003"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","2263"]]'

## Write Raster Method
---

In [6]:
def raster_writer(out_file, rasterized, unit, ras_gd):
    """
    This function takes a rasterized 2d array and writes a raster file out
    
    :param [out_path]: This is the full output file name for the raster
    :type [out_path]: string
    :param [rasterized]: 2D Array to write to raster image
    :type [rasterized]: np array
    :param [unit]: gdal unit for data encoding (i.e. GDT_UInt16)
    :type [unit]: gdal data unit
    :param [rst_proj]: base raster projection
    :type [rst_proj]: gdal.projection
    :param[rst_gtm]: base raster geotransform
    :type [rst_gtm]: list
    
    """
    # out_file = filepath
    driver = gdal.GetDriverByName("GTiff")
    # ds = driver.Create(out_file, rasterized.shape[1], rasterized.shape[0], 1, gdal.GDT_UInt16)
    out_ds = driver.Create(out_file, rasterized.shape[1], rasterized.shape[0], 1, unit) #gdal.GDT_UInt16
    out_ds.SetProjection(ras_gd.GetProjection())
    out_ds.SetGeoTransform(ras_gd.GetGeoTransform())
    band = out_ds.GetRasterBand(1)
    band.WriteArray(rasterized)
    band.FlushCache()
    band.ComputeStatistics(False)

In [7]:
def validate_raster(path, base_raster=raster):
    rasterized = rasterio.open(path)
    
    print('width: ', raster.width, rasterized.width)
    print('height: ', raster.height, rasterized.height)
    print('Bounds: ', raster.bounds, rasterized.bounds)
    print('Crs: ', raster.crs, rasterized.crs)

In [None]:
## Alternate methods but may crash kernal

## rasterio
# # Write to Tif
# out_file =os.path.join(out_path, 'DEPCatchbasins.tif')

# with rasterio.open(
#         out_file, "w",
#         driver = "GTiff",
#         transform = base_transform,
#         dtype = rasterio.uint16,
#         count = 1,
#         width = base_wdith,
#         height = base_height) as dst:
#     dst.write(rasterized, indexes = 1)

## dask-rasterio
# out_file =os.path.join(out_path, 'DEPCatchbasins.tif')

# write_raster(out_file, 
#              rasterized,
#              transform = base_transform,
#              dtype = rasterio.uint16,
#              count = 1,
#              width = base_wdith,
#              height = base_height)
             

## Raster Transformation Methods
---

In [8]:
## Write Raster Burn Methods
def simple_raster(v_df):
    geom = [shapes for shapes in v_df.geometry]

    rasterized = features.rasterize(geom,
                                    out_shape = raster.shape,
                                    fill = 0,
                                    out = None,
                                    transform = raster.transform,
                                    all_touched = False,
                                    default_value = 1,
                                    dtype = None)
    
    return rasterized

def attribute_raster(v_df, attribute, fill_value):
    geom_value = ((geom,value) for geom, value in zip(v_df.geometry, v_df[attribute]))
    
    rasterized = features.rasterize(geom_value,
                                    out_shape = raster.shape,
                                    transform = raster.transform,
                                    all_touched = True,
                                    fill = fill_value,   # background value
                                    merge_alg = MergeAlg.replace) # overwrite existing
    return rasterized

## Primary Data
---

### Catch Basins

In [18]:
# Load Data
shape = '/workspace/data-sets/DEPCatchbasins/DEPCATCHBASINS.shp'
v_df = gpd.read_file(shape)
# v_df.head()

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'DEPCatchbasins.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster('/workspace/data-sets/transformations/10.9.2022/DEPCatchbasins.tif')

Min:  0
Max:  1
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### Subway Enterances

In [20]:
# Load Data
shape = '/workspace/data-sets/doitt_subway_entrances/DOITT_SUBWAY_ENTRANCE_04JAN2017.shp'
v_df = gpd.read_file(shape)
# v_df.head()

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'DOITT_SUBWAY_ENTRANCE_04JAN2017.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  0
Max:  1
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### Shoreline (office hours follow up)
---

Follow up - how to extract height from geopandas data frame. For now we will use type

In [55]:
# Load Data
shape = '/workspace/data-sets/nyc_shoreline/NYC_2017_LiDAR_Low_Tide_Shoreline.shp'
v_df = gpd.read_file(shape)
# v_df.head()

# encode text to int
ord_enc = OrdinalEncoder()
v_df['labels'] = ord_enc.fit_transform(v_df[['Type']]).astype(int)

# Burn Raster
rasterized = None
rasterized = attribute_raster(v_df, 'labels', -5)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'NYC_2017_LiDAR_Low_Tide_Shoreline.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  -5
Max:  2
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


In [57]:
# Export key
v_df[['Type', 'labels']].drop_duplicates().to_csv(os.path.join(out_path, 'NYC_2017_LiDAR_Low_Tide_Shoreline.csv'))

### Retaining Walls (Office Hours Follow Up Z Value Extraction)
---

TODO Follow up on Z value extraction from Linestring. Convert to Feet and Feed in as attribute

In [None]:
# Load Data
shape = '/workspace/data-sets/RETAININGWALL/RETAININGWALL.shp'
v_df = gpd.read_file(shape)
# v_df.head()

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'RETAININGWALL.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

### Hydrography
---

In [72]:
# Load Data
shape = '/workspace/data-sets/HYDRO/HYDROGRAPHY.shp'
v_df = gpd.read_file(shape)
# v_df.head()

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'HYDROGRAPHY.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  0
Max:  1
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### Land Cover Reprojection
---

In [64]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

In [60]:
srcRst = rasterio.open('/workspace/data-sets/raster_data/NYC_2017_LiDAR_LandCover.img')
print('Land Use CRS:', srcRst.crs)
print('Elevation CRS:', raster.crs)

Land Use CRS: EPSG:2263
Elevation CRS: EPSG:2263


In [62]:
transform, width, height = calculate_default_transform(srcRst.crs, raster.crs,srcRst.width,srcRst.height, *srcRst.bounds)

In [63]:
print(srcRst.transform)
print(transform)

| 0.50, 0.00, 912286.93|
| 0.00,-0.50, 273618.30|
| 0.00, 0.00, 1.00|
| 0.50, 0.00, 912286.93|
| 0.00,-0.50, 273618.30|
| 0.00, 0.00, 1.00|


In [66]:
# sicne both are in the same crs and there is no transform we can just scale the landuse to 1X1
filepath = os.path.join(out_path, 'NYC_2017_LiDAR_LandCover.tif')
reduc = gdal.Warp(filepath, '/workspace/data-sets/raster_data/NYC_2017_LiDAR_LandCover.img', warpoptions=dict(xRes=1, yRes=1, resampleAlg=mode

SystemError: <built-in function wrapper_GDALWarpDestName> returned a result with an error set

In [68]:
reduc = None

In [None]:
# Validation

## Planametrics Data
---

In [9]:
data = '/workspace/data-sets/doitt_planimetrics.gdb'
layer_list = fiona.listlayers(data)
layer_list.remove('RETAININGWALL')
layer_list.remove('HYDROGRAPHY')
layer_list.remove('SHORELINE')
layer_list.remove('ELEVATION')

In [82]:
# print(layer_list)

### PAVEMENT_EDGE (TODO Validate Z Value Extraction/Converstion)
---

Z point has been extracted from multiline-validate conversion)

Note - No data value is -30 due to values up to -14 as part of the data set)

In [12]:
# Load Data
v_df = gpd.read_file(data, layer='PAVEMENT_EDGE')
# v_df.head()

#----------------------------Extract Z Values

z_value = []
for feature in v_df.geometry:
    # Extract the 3 dimensional features
    coords = [list(line.coords) for line in feature][0]
    z = []
    
    for tup in coords:
        z.append(tup[2])
    z_value.append(z)
    
# Select minimum z value (as water ingressess through the lowest point) (truncating to reduce unique values)
min_z = []
for i in z_value:
    min_z.append(int(min(i)))

v_df['z'] = min_z
#--------------------------------------------

# Burn Raster
rasterized = None
rasterized = attribute_raster(v_df, 'z', -30)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-PAVEMENT_EDGE.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  -30
Max:  396
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### HYDRO_STRUCTURE

In [14]:
# Load Data
v_df = gpd.read_file(data, layer='HYDRO_STRUCTURE')

# # encode text to int
v_df['Elevation'] = v_df['Elevation'].astype(int)

# Burn Raster
rasterized = None
rasterized = attribute_raster(v_df, 'Elevation', -5)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-HYDRO_STRUCTURE.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  -5
Max:  146
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### SIDEWALK

In [16]:
v_df = gpd.read_file(data, layer='SIDEWALK')

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-SIDEWALK.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  0
Max:  1
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### PARK

In [17]:
v_df = gpd.read_file(data, layer='PARK')
v_df.head()

Unnamed: 0,PARK_NAME,SOURCE_ID,FEATURE_CODE,SUB_FEATURE_CODE,LANDUSE,PARKNUM,STATUS,SYSTEM,SHAPE_Length,SHAPE_Area,geometry
0,Commodore Barry Park,21491000000.0,4910,491050,Community Park,B021,Updated,,829.420107,41539.801363,"MULTIPOLYGON (((990011.028 193576.923, 990003...."
1,Pierrepont Playground,21498000000.0,4980,498000,Neighborhood Park,B222,Unchanged,,607.555193,21960.49492,"MULTIPOLYGON (((984993.079 192850.815, 984992...."
2,Cobble Hill Park,21498000000.0,4980,498000,Neighborhood Park,B326,Unchanged,,784.42199,28079.076265,"MULTIPOLYGON (((985477.412 189907.616, 985477...."
3,Commodore Barry Park,21498000000.0,4980,498000,Community Park,B021,Unchanged,,2672.71015,438416.114511,"MULTIPOLYGON (((990498.402 193034.421, 990499...."
4,Brooklyn Heights Promenade,21498000000.0,4980,498000,Triangle/Plaza,B223DG,Unchanged,,1376.106398,14465.699799,"MULTIPOLYGON (((984952.331 193184.601, 984898...."


In [15]:
# Load Data
v_df = gpd.read_file(data, layer='PARK')

# encode text to int
ord_enc = OrdinalEncoder()
v_df['labels'] = ord_enc.fit_transform(v_df[['LANDUSE']]).astype(int)

# Burn Raster
rasterized = None
rasterized = attribute_raster(v_df, 'labels', -5)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-PARK.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  -5
Max:  26
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


In [16]:
# Export key
v_df[['LANDUSE', 'labels']].drop_duplicates().to_csv(os.path.join(out_path, 'PLANIMETRICS-PARK.csv'))

### MEDIAN

In [19]:
# Load Data
v_df = gpd.read_file(data, layer='MEDIAN')

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-MEDIAN.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  0
Max:  1
width:  158100 158100
height:  156100 156100
Bounds:  BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675) BoundingBox(left=910719.3, bottom=119060.67499999999, right=1068819.3, top=275160.675)
Crs:  EPSG:2263 EPSG:2263


### SWIMMING_POOL

In [None]:
# Load Data
v_df = gpd.read_file(data, layer='SWIMMING_POOL')

# Burn Raster
rasterized = None
rasterized = simple_raster(v_df)

# initial Validate
print('Min: ', rasterized.min())
print('Max: ', rasterized.max())

# Save Raster
filepath = os.path.join(out_path, 'PLANIMETRICS-SWIMMING_POOL.tif')
raster_writer(filepath, rasterized, gdal.GDT_UInt16, ras_gd)

# Validate
validate_raster(filepath)

Min:  0
Max:  1


### OPEN_SPACE_NO_PARK

### PARKING_LOT

### BOARDWALK

### RAILROAD

### TRANSPORT_STRUCTURE

### MISC_STRUCTURE_POLY

### CURB

### ROADBED

### PLAZA

### SIDEWALK_LINE

### RAILROAD_STRUCTURE

## Secondary Data
---

### Zip Codes
---