## initial mapping workflow for supratidal forests (STF)
- select 'time_range' and if to 'export' results as tif  


TODO
- Raf to check working as expected and outputs as expected
- outputs to gdata1 folder


In [1]:
import time
start = time.time()

In [2]:
%matplotlib inline

import sys
from osgeo import gdal
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd

sys.path.insert(0, "/home/jovyan/code/dea-notebooks/Tools")
import datacube
from dea_tools.plotting import display_map, map_shapefile
from datacube.utils.cog import write_cog
from datacube.utils.geometry import Geometry
from dea_tools.spatial import xr_rasterize
from datacube.testutils.io import rio_slurp_xarray
dc = datacube.Datacube()

sys.path.insert(1, "/home/jovyan/code/xarray-spatial")
from xrspatial.proximity import proximity

### user inputs: geojson AOI, time, export geotiffs?

In [3]:
vector_file = '../data/geojson/ga_summary_grid_c3_coastal.geojson'
# vector_file = '../data/geojson/ga_summary_grid_c3_mainland_extended.gpkg'
attribute_col = 'geometry'

gdf = gpd.read_file(vector_file)

# add time (not a range, just repeat year input here)
time_range = ("2020", "2020")

# export as geotiff?
export = True

In [4]:
mainland_grid = gdf[gdf['type'] == 'mainland']

In [5]:
mainland_grid

Unnamed: 0,region_code,ix,iy,utc_offset,id,type,geometry
11,x27y44,27,44,9,12,mainland,"POLYGON ((131.14236 -10.95979, 131.13657 -11.8..."
12,x28y44,28,44,9,13,mainland,"POLYGON ((132.00000 -10.96278, 132.00000 -11.8..."
13,x29y44,29,44,9,14,mainland,"POLYGON ((132.85764 -10.95979, 132.86343 -11.8..."
14,x30y44,30,44,9,15,mainland,"POLYGON ((133.71520 -10.95084, 133.72679 -11.8..."
18,x39y44,39,44,9,19,mainland,"POLYGON ((141.41707 -10.60201, 141.48049 -11.4..."
...,...,...,...,...,...,...,...
317,x40y07,40,7,10,318,mainland,"POLYGON ((145.64093 -42.46921, 145.76292 -43.3..."
318,x41y07,41,7,10,319,mainland,"POLYGON ((146.76796 -42.37058, 146.89986 -43.2..."
319,x42y07,42,7,10,320,mainland,"POLYGON ((147.89269 -42.26423, 148.03442 -43.1..."
320,x40y06,40,6,10,321,mainland,"POLYGON ((145.76292 -43.35558, 145.88711 -44.2..."


#### add in HAT and storm surge to gdf

In [6]:
HAT_path = '../data/HAT_MLP_Regression.gpkg'
HAT_gpd = gpd.read_file(HAT_path)
HAT_gpd_EPSG4326 = HAT_gpd.to_crs('EPSG:4326')

HAT_SS_path = '../data/STF_SS_ElevationClasses.geojson'
HAT_SS_gpd = gpd.read_file(HAT_SS_path)

In [7]:
# HAT
# Using sjoin to add mainland_grid to HAT values 
mainland_grid_HAT = gpd.sjoin(HAT_gpd_EPSG4326, mainland_grid, predicate='within')
# get maximum HAT value within coastal tile
max_values_HAT = mainland_grid_HAT.groupby('index_right')['HAT'].max()
# # append to new column
mainland_grid['HAT'] = max_values_HAT.astype(float)

# checking NaN values and replacing them with values from adjacent tiles ---NaN values in ID 52 (51) and 235 (234) will be replaced by nearby ID 53 (4.213) and ID 243 (1.904), respectively
# TODO: need to make this automated and not hardcoded as it is problematic with any changes in indexing #
mainland_grid.loc[51,'HAT'] = 4.213
mainland_grid.loc[234,'HAT'] = 1.904

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
# HAT_SS
# Spatial join to find which geometries in gdf1 are within any polygon of gdf2
joined = gpd.sjoin(mainland_grid, HAT_SS_gpd, how = 'left', predicate='intersects')

# Dissolve duplicates the result based on the index
dissolved_joined = joined.dissolve(by=joined.index, aggfunc='first')

# Reset the index of the dissolved GeoDataFrame
dissolved_joined = dissolved_joined.reset_index(drop=True)

# Reset the index of mainland_grid to avoid duplicate index labels
mainland_grid = mainland_grid.reset_index(drop=True)

# add SS value
mainland_grid['SS'] = dissolved_joined['SSElev']
# generate new col for HAT+SS
mainland_grid['HAT_SS'] = mainland_grid['HAT'] + mainland_grid['SS']

In [9]:
id_list = []
for index, row in mainland_grid.iterrows():
    id_list.append(row['id'])
print(id_list)

[12, 13, 14, 15, 19, 20, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 100, 101, 102, 103, 104, 105, 106, 107, 108, 115, 116, 117, 125, 126, 136, 137, 138, 139, 140, 147, 148, 149, 150, 151, 152, 153, 158, 159, 160, 161, 162, 166, 167, 168, 169, 177, 178, 179, 180, 186, 187, 191, 192, 193, 196, 197, 198, 199, 201, 202, 203, 205, 206, 207, 208, 209, 210, 212, 213, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 277, 278, 279, 280, 281, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 306, 307, 309, 311, 312, 313, 31

#### user selection of tiles to run

In [10]:
# mainland_grid_selection = mainland_grid[268:270]
mainland_grid_selection = mainland_grid.loc[(mainland_grid['id'] >= 279) & (mainland_grid['id'] <= 284)]
mainland_grid_selection

Unnamed: 0,region_code,ix,iy,utc_offset,id,type,geometry,HAT,SS,HAT_SS
182,x34y16,34,16,9,279,mainland,"POLYGON ((138.33190 -35.03239, 138.38460 -35.8...",1.356415,0.5,1.856415
183,x45y16,45,16,10,280,mainland,"POLYGON ((149.83826 -34.13579, 149.98501 -34.9...",1.100446,0.5,1.600446
184,x46y16,46,16,10,281,mainland,"POLYGON ((150.87278 -34.01249, 151.02780 -34.8...",1.014191,0.5,1.514191
185,x34y15,34,15,9,284,mainland,"POLYGON ((138.38460 -35.89526, 138.43818 -36.7...",0.85266,0.5,1.35266


In [11]:
map_shapefile(mainland_grid_selection, attribute=attribute_col)



Label(value='')

Map(center=[-35.321258113373844, 145.19997130585483], controls=(ZoomControl(options=['position', 'zoom_in_text…

In [12]:
# Create the 'query' dictionary object
res = (-30, 30)

query = {
    "time": time_range,
    'resolution':res}

### loop through gdf to derive connectivity, probability and confidence layer

In [13]:
# Loop through polygons in geodataframe and add geom to queries
for index, row in mainland_grid_selection.iterrows():
    print(f'Feature: {index + 1}/{len(mainland_grid_selection)}')
    
    # Extract the feature's geometry as a datacube geometry object
    geom = Geometry(geom=row.geometry, crs=mainland_grid_selection.crs)
    
    # Update the query to include our geopolygon
    query.update({'geopolygon': geom})

    # Extracting specific keys from dictionary (removing time to load things like item and srtm)
    query_notime = {key: query[key] for key in query.keys()
           & {'resolution', 'geopolygon'}}

    

Feature: 183/4
Feature: 184/4
Feature: 185/4
Feature: 186/4


### loop through gdf to derive supratidal confidence layers and export

In [14]:
# Loop through polygons in geodataframe and add geom to queries
for index, row in mainland_grid_selection.iterrows():
    print(f'Feature: {index + 1}/{len(mainland_grid_selection)}')
    
    # Extract the feature's geometry as a datacube geometry object
    geom = Geometry(geom=row.geometry, crs=mainland_grid_selection.crs)
    
    # Update the query to include our geopolygon
    query.update({'geopolygon': geom})

    # Extracting specific keys from dictionary (removing time to load things like item and srtm)
    query_notime = {key: query[key] for key in query.keys()
           & {'resolution', 'geopolygon'}}

    
    # Load datasets #
    
    # Load STRM
    srtm_ds = dc.load(product = 'ga_srtm_dem1sv1_0', output_crs="EPSG:3577", **query_notime)
    srtm = srtm_ds.dem_h

    # Load in water from wofs
    wofs = dc.load(product="ga_ls_wo_fq_cyear_3", output_crs="EPSG:3577", measurements=["frequency"], **query)
    # get water class
    water = xr.where((wofs.frequency >= 0.2), 1, 0).astype('int8')
    
    # Load item
    item_ds = dc.load(product = 'item_v2', output_crs="EPSG:3577", **query_notime)
    item = item_ds.relative

    # Load in mangrove cover
    DEAmangrove = dc.load(product = 'ga_ls_mangrove_cover_cyear_3', output_crs="EPSG:3577", **query)

    # if no mangroves within AOI, create dummy xr.dataarray
    if DEAmangrove.data_vars == {}:
        mangrove = xr.DataArray(np.zeros_like(srtm), coords=srtm.coords, dims=srtm.dims, attrs=srtm.attrs)
    else:
        # get output of mangrove == 1, not mangrove == 0
        mangrove = (DEAmangrove.canopy_cover_class != 255)

    # Load in saltmarsh
    geotiff_path = '/home/jovyan/gdata1/data/saltmarsh/JCU_Australia-saltmarsh-extent_v1-0.tif'
    # load in geotiff again but with identical extent from srtm
    saltmarsh = rio_slurp_xarray(geotiff_path, gbox=srtm.geobox)
    saltmarsh.attrs['crs'] = 'EPSG:3577'

    # Load in saltflat
    geotiff_path = '/home/jovyan/gdata1/data/saltmarsh/JCU_Australia-saltflat-extent_v1-0.tif'
    # load in geotiff again but with identical extent from srtm
    saltflat = rio_slurp_xarray(geotiff_path, gbox=srtm.geobox)
    saltflat.attrs['crs'] = 'EPSG:3577'
    
    # Load in Geofabric mapped stream   
    streams_gdf = gpd.read_file('/home/jovyan/gdata1/projects/coastal/supratidal_forests/data/Geofabric/AHGFMappedStream.shp', bbox=row.geometry)
    # if no streams within AOI, create dummy xr.dataarray
    if streams_gdf.empty:
        streams_mask = xr.DataArray(np.zeros_like(srtm), coords=srtm.coords, dims=srtm.dims, attrs=srtm.attrs)
        streams_mask = streams_mask.squeeze('time')
    else:
        # get output of streams == 1, not streams == 0
        streams_mask = xr_rasterize(streams_gdf, srtm_ds)   
    
    
    # Threshold datasets as required #
    
    # elevation
    # greater than -6m AHD and less than 10m AHD == True
    # some areas in NT are below 0 AHD and need to be included in potential supratidal extent, hence value of -6 that Raf has checked is sensible.
    # for connectivity model less than 10m AHD == True (this needs to be thresholded as minimum at 0 for STF extent product due to supratidal areas not being below 0 AHD
    # in the original connectivity code a lower limit wasn't being used. see what outputs look like but might need to look into this closely
    AHD_min = -6
    AHD_max = 10
    lessthan_AHD = srtm <= AHD_max
    greaterthan_AHD = srtm >= AHD_min
    srtm_mask = lessthan_AHD & greaterthan_AHD
 
    # not water
    not_water = (1 - water)
    not_water = not_water == 1

    # exposed intertidal
    intertidal = (item >= 2) & (item <= 8)
    
    # not exposed intertidal == True
    not_intertidal = (1 - intertidal)

    # not mangrove == True
    not_mangrove = (1 - mangrove)
    not_mangrove = not_mangrove == 1


    # Remove time dim on some variables #
    
    srtm_mask = srtm_mask.squeeze('time').astype('int8')
    water = water.squeeze('time')
    not_water = not_water.squeeze('time')
    intertidal = intertidal.squeeze('time')
    not_intertidal = not_intertidal.squeeze('time')
    mangrove = mangrove.squeeze('time')
    not_mangrove = not_mangrove.squeeze('time')

    # Connectivity #
    
    # combine masks
    aquatic = xr.where((water == True) | (intertidal == True) | 
                       (mangrove == True) | (saltmarsh == True) | 
                       (saltflat == True) | (streams_mask == True), 1, 0).astype('int8')

    # xrspatial proximity - https://xarray-spatial.org/reference/_autosummary/xrspatial.proximity.proximity.html
    # seems it is in same units as crs (EPSG3577 = metres)
    proximity_agg = proximity(aquatic)

    # mask with srtm_mask (need to do before normalisation so that min and max are within bounds of 0-10m elevation)
    proximity_agg_mask = proximity_agg.where(srtm_mask)


    # Find the minimum and maximum values in the data array - taking a percentile just to ensure any extreme odd values are not considered
    min_value = np.nanpercentile(proximity_agg_mask, 0.01)
    max_value = np.nanpercentile(proximity_agg_mask, 99.99)

    # Clip values above max_value percentile
    proximity_agg_mask = xr.where(proximity_agg_mask >= max_value, max_value, proximity_agg_mask.values)

    # Normalize the data to the range [0, 1] by subtracting the minimum and dividing by the range
    proximity_norm = (proximity_agg_mask - min_value) / (max_value - min_value)

    # invert the normalisation to make connectivity layer output
    supratidal_connectivity = (1 - proximity_norm)


    # Supratidal elevation extent with HAT and storm surge probability #

    # combine masks
    # where its not mangrove or exposed intertidal, but is within -6 to 10m AHD
    supratidal = xr.where((srtm_mask == True) & (not_water == True) & (not_intertidal == True) & (not_mangrove == True) , 1, 0).astype('int8')

    # Generate a polygon mask to keep only data within the polygon
    # mask = xr_rasterize(row, srtm_ds)

    # Mask dataset to set pixels outside the polygon to `NaN`
    supratidal_mask = supratidal

    # get elevation values for supratidal_mask
    supratidal_elev = srtm * supratidal_mask
    supratidal_elev = xr.where(supratidal_elev == 0, np.nan, supratidal_elev.values)


    # generate elevation probability product
    # values of 1 for <= HAT
    # values normalised between 1 and 0.5 > HAT and <= HAT_SS
    # values normalised between 0.5 and 1 > HAT_SS and <= 10m AHD
    HAT = xr.where(supratidal_elev <= row.HAT, 1, np.nan)

    # HAT + storm
    HAT_storm = xr.where((supratidal_elev > row.HAT) & (supratidal_elev <= row.HAT_SS), supratidal_elev.values, np.nan)

    # normalise between HAT and HAT_SS
    # Find the minimum and maximum values in the data array
    min_value = row.HAT
    max_value = row.HAT_SS
    # Normalize the data to the range [0, 1] by subtracting the minimum and dividing by the range
    HAT_storm_norm = (HAT_storm - min_value) / (max_value - min_value)

    # invert the normalisation and normalise between 0.5 and 1
    HAT_storm_norm_05_1 = (((1 - HAT_storm_norm)/2) + 0.5)
    
    # HAT + storm to 10m
    HAT_storm_10AHD = xr.where((supratidal_elev > row.HAT_SS) & (supratidal_elev <= 10), supratidal_elev.values, np.nan)
    
    # normalise between HAT_SS and 10m AHD
    # Find the minimum and maximum values in the data array
    min_value = row.HAT_SS
    max_value = 10
    # Normalize the data to the range [0, 1] by subtracting the minimum and dividing by the range
    HAT_storm_10AHD_norm = (HAT_storm_10AHD - min_value) / (max_value - min_value)

    # invert the normalisation and normalise between 0.5 and 1
    HAT_storm_10AHD_norm_05_0 = ((1 - HAT_storm_10AHD_norm)/2)
    
    # combine layers back together
    supratidal_combine = ((HAT.fillna(0)) + (HAT_storm_norm_05_1.fillna(0)) + (HAT_storm_10AHD_norm_05_0.fillna(0))).squeeze('time')
    # remove outside extent (make np.nan)
    supratidal_elevation_model = xr.where(supratidal_mask == 1, supratidal_combine.values, np.nan)
    
    
    # Generate supratidal extent confidence model #
    # combine supratidal_connectivity and supratidal_elevation_model
    supratidal_extent_confidence = ((supratidal_connectivity + supratidal_elevation_model)/2)


    if export == False:
        pass
    else:
        write_cog(geo_im=supratidal_connectivity,
                  fname=vector_file.rsplit('/', 1)[-1].split('.')[0] + '_gridID_' + str(row['id']) +'_supratidal_connectivity_' + time_range[0] + '.tif', # first part gets AOI name
                  overwrite=True,
                  nodata=0.0)
        write_cog(geo_im=supratidal_elevation_model,
                  fname=vector_file.rsplit('/', 1)[-1].split('.')[0] + '_gridID_' + str(row['id']) +'_supratidal_elevation_model_' + time_range[0] + '.tif', # first part gets AOI name
                  overwrite=True,
                  nodata=0.0)
        write_cog(geo_im=supratidal_extent_confidence,
                  fname=vector_file.rsplit('/', 1)[-1].split('.')[0] + '_gridID_' + str(row['id']) +'_supratidal_extent_confidence_model_' + time_range[0] + '.tif', # first part gets AOI name
                  overwrite=True,
                  nodata=0.0)

Feature: 183/4
Feature: 184/4
Feature: 185/4
Feature: 186/4


In [15]:
end = time.time()
elapsed_time = end - start
minutes = int(elapsed_time // 60)
seconds = elapsed_time % 60
print(f"Elapsed time: {minutes} minutes and {seconds:.2f} seconds")

Elapsed time: 1 minutes and 21.17 seconds
