In [1]:
# globals (dev)
FOLDER_MODULES = r'C:\Users\Lewis\Documents\GitHub\tenement-tools\modules'  
FOLDER_SHARED = r'C:\Users\Lewis\Documents\GitHub\tenement-tools\shared'
GRP_LYR_FILE = r"C:\Users\Lewis\Documents\GitHub\tenement-tools\arc\lyr\group_template.lyrx"

# set gdal global environ
import os
os.environ['GDAL_DISABLE_READDIR_ON_OPEN'] = 'EMPTY_DIR'
os.environ['CPL_VSIL_CURL_ALLOWED_EXTENSIONS '] = 'tif'
os.environ['VSI_CACHE '] = 'TRUE'
os.environ['GDAL_HTTP_MULTIRANGE '] = 'YES'
os.environ['GDAL_HTTP_MERGE_CONSECUTIVE_RANGES '] = 'YES'

# also set rasterio env variables
rasterio_env = {
    'GDAL_DISABLE_READDIR_ON_OPEN': 'EMPTY_DIR',
    'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': 'tif',
    'VSI_CACHE': True,
    'GDAL_HTTP_MULTIRANGE': 'YES',
    'GDAL_HTTP_MERGE_CONSECUTIVE_RANGES': 'YES'
}

# disable future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# safe imports
import sys                  # arcgis comes with these
import datetime                 # arcgis comes with these
import numpy as np              # arcgis comes with these
import arcpy                    # arcgis comes with these
from datetime import datetime   # arcgis comes with these

# risky imports (not native to arcgis)
try:
    from osgeo import gdal
    from osgeo import ogr
    import xarray as xr
    import dask
    import rasterio
    import pystac_client
    from odc import stac
except:
    arcpy.AddError('Python libraries xarray, dask, rasterio, pystac, or odc not installed.')
    raise

# import tools
try:
    # shared folder
    sys.path.append(FOLDER_SHARED)
    import arc, satfetcher, tools

    # module folder
    sys.path.append(FOLDER_MODULES)
    import nrt, cog_odc
except:
    arcpy.AddError('Could not find tenement tools python scripts (modules, shared).')
    raise

In [2]:
import os
import shutil
import datetime
import arcpy

tbx = r"C:\Users\Lewis\Documents\GitHub\tenement-tools\arc\toolbox\tenement-tools-toolbox.pyt"
arcpy.ImportToolbox(tbx)

<module 'toolbox'>

In [3]:
import matplotlib.pyplot as plt

In [4]:
# need button to create shapefile with relevent attributes
# set a folder
# create a shapefile of polygons
# need a id field, out netcdf, platform, start/end monitor date, other

### Create monitoring areas

In [4]:
out_folder = r'C:\Users\Lewis\Desktop\nrt_projects'
out_filename = 'ophthalmia_monitoring'

# create a new nrt project
arcpy.NRT_Create_Project_toolbox(out_folder, out_filename)

Creating new monitoring project database...


Traceback (most recent call last):
  File "<string>", line 7212, in execute
  File "C:\Users\Lewis\Documents\GitHub\tenement-tools\modules\nrt.py", line 55, in create_nrt_project
    raise ValueError('Requested file location arleady exists. Choose a different name.')
ValueError: Requested file location arleady exists. Choose a different name.


ExecuteError: ERROR 000582: Error occurred during execution.


### Make monitoring areas

In [6]:
# this is up to user using usual arcgis pro tools?

### Iterate monitoring areas and update cubes

In [16]:
def sync_nrt_cubes(in_feat, in_epsg=3577, temp=None):
    """
    Queries DEA AWS via STAC and obtains latest imagery 
    from start of training period in monitoring shapefile
    and now. Appends all new images on to end of netcdf and
    exports it as a new version.
    
    Parameters
    ----------
    in_feat: str
        A path to an existing monitoring areas gdb feature class.
    in_epsg: int
        A integer representing a specific epsg code for coordinate system.
    """   
    
    # notify
    print('Syncing monitoring area cubes...')
        
    # check input feature is not none and strings
    if in_feat is None:
        raise ValueError('Monitoring area feature class not provided.')
    elif not isinstance(in_feat, str):
        raise TypeError('Monitoring area feature class not string.')

    # get input featureclass file, get dir and filename
    in_gdb = os.path.dirname(in_feat)
    in_path = os.path.splitext(in_gdb)[0]
    in_data_path = in_path + '_' + 'cubes'
    in_name = os.path.basename(in_feat)
    
    
    # notify
    print('Iterating through monitoring areas...')
    
    # set required fields and iterate
    fields = ['area_id', 'platform', 's_year', 'index', 'last_run', 'Shape@']
    with arcpy.da.UpdateCursor(in_feat, fields) as cursor:
        for row in cursor:
            
            # do checks here
            # inputs
            # epsg of featureclass
                        
            # check platform exists
            if row[1] is None:
                print('No platform provided, skipping current area.')
                continue
                
            # check start year
            if row[2] is None:
                print('Must set a start year, skipping current area. ')
                continue
            elif not isinstance(row[2], int):
                print('Start year must be integer, skipping current area. ')
                continue     
                
            # check input epsg
            if in_epsg != 3577 or not isinstance(in_epsg, int):
                print('Only GDA94 Albers (EPSG 3577) supported currently. Changing to 3577.')
                in_epsg = 3577
            
            
            # notify
            print('Syncing monitoring area: {}'.format(row[0]))
            
            # prepare start year from input, get latest year for end
            s_year = '{}-01-01'.format(row[2])
            #e_year = '{}-12-31'.format(datetime.datetime.now().year)  # e.g. always latest
            e_year = '{}-12-31'.format(temp)
            
            # get parameters for platform
            params = nrt.get_satellite_params(platform=row[1])  
            
            # convert to wgs84 temporarily
            srs = arcpy.SpatialReference(4326)
            geom = row[5].projectAs(srs)
                        
            # get bbox of current bbox
            bbox = [geom.extent.XMin, geom.extent.YMin, 
                    geom.extent.XMax, geom.extent.YMax]

            # build expected monitoring area data cube and check exists
            ds_existing = None
            out_nc = os.path.join(in_data_path, 'cube' + '_' + row[0] + '.nc')
            if os.path.exists(out_nc):
                try:
                    # open existing ds
                    ds_existing = xr.open_dataset(out_nc)
                    
                    # get latest datetime to reduce download 
                    s_year = ds_existing.isel(time=-1)
                    s_year = str(s_year['time'].dt.strftime('%Y-%m-%d').values)                       
                    
                except:
                    raise ValueError('Could not open existing dataset - skipping.')
                    continue
           
            
            ds = nrt.sync_nrt_cube(out_nc=out_nc,
                                   collections=params.get('collections'),
                                   bands=params.get('bands'),
                                   start_dt=s_year,
                                   end_dt=e_year,
                                   bbox=bbox,
                                   in_epsg=in_epsg,
                                   slc_off=False,
                                   resolution=params.get('resolution'),
                                   ds_existing=ds_existing,
                                   chunks={})
            
            
            with rasterio.Env(**rasterio_env):
                tools.export_xr_as_nc(ds=ds, filename=out_nc)
            
                              

#ds_new, ds_existing = sync_nrt_cubes(in_feat=r'C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring.gdb\monitoring_areas', temp=2016)
sync_nrt_cubes(in_feat=r'C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring.gdb\monitoring_areas', temp=2021)

Syncing monitoring area cubes...
Iterating through monitoring areas...
Syncing monitoring area: A001


In [124]:
def mask_xr_via_polygon(geom, bbox, transform, ncols, nrows, mask_value=1):
    """
    """
    
    # extract bounding box extents
    xmin, ymin, xmax, ymax = bb.left, bb.bottom, bb.right, bb.top

    # create ogr transform structure
    geotransform = (transform[2], transform[0], 0.0, 
                    transform[5], 0.0, transform[4])
    


    dst_ds = gdal.GetDriverByName('MEM').Create('', ncols, nrows, 1 , gdal.GDT_Byte)
    dst_rb = dst_ds.GetRasterBand(1)
    dst_rb.Fill(0) #initialise raster with zeros
    dst_rb.SetNoDataValue(0)
    dst_ds.SetGeoTransform(geotransform)

    err = gdal.RasterizeLayer(dst_ds, [1], src_lyr, burn_values=[maskvalue])

    dst_ds.FlushCache()

    arr = dst_ds.GetRasterBand(1).ReadAsArray()
    
    
    
    
    

In [125]:
shapefile = r'C:\Users\Lewis\Desktop\testing\area.shp'

bbox = ds.geobox.extent.boundingbox
ncols, nrows = [len(ds['x']), len(ds['y'])]
transform = ds.geobox.transform

shp = ogr.Open(shapefile)
lyr = src_ds.GetLayer()

In [79]:



ds = xr.open_dataset(r"C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring_cubes\cube_A001.nc")

#xmin, ymin, xmax, ymax = [139.8, -39.2, 150.0, -33.6] #Your extents as given above
bb = ds.geobox.extent.boundingbox
xmin, ymin, xmax, ymax = bb.left, bb.bottom, bb.right, bb.top
#ncols, nrows= [193, 106]                              #Your rows/cols as given above
ncols, nrows = [len(ds['x']), len(ds['y'])]
maskvalue = 1

#xres = (xmax - xmin) / float(ncols)
#yres = (ymax - ymin) / float(nrows)
#geotransform = (xmin, xres, 0, ymax, 0, -yres)




In [95]:
plt.plot(mask_arr)
plt.show()

In [96]:
x, y = ds['x'].data, ds['y'].data

In [99]:
mask = xr.DataArray(
    data=arr,
    dims=['y', 'x'],
    coords={
        'y': ds['y'].data,
        'x': ds['x'].data
        }
    )

In [100]:
mask.plot()
plt.show()

In [106]:
mask

In [107]:
ds['nbart_red']

In [120]:
out = ds.where(mask).copy(deep=True)
out['nbart_red'].isel(time=0).plot()
plt.show()

In [122]:
out.to_netcdf(r'C:\Users\Lewis\Desktop\testing\test.nc')

In [72]:
ds = xr.open_dataset(r"C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring_cubes\cube_A001.nc")
print(ds.compute())
ds.close()

<xarray.Dataset>
Dimensions:       (time: 784, x: 4, y: 6)
Coordinates:
  * time          (time) datetime64[ns] 2000-01-05T01:49:43 ... 2021-11-22T01...
  * y             (y) float64 -2.572e+06 -2.572e+06 ... -2.572e+06 -2.572e+06
  * x             (x) float64 -1.228e+06 -1.228e+06 -1.228e+06 -1.228e+06
    spatial_ref   int32 3577
Data variables:
    nbart_red     (time, y, x) float64 3.393e+03 3.372e+03 ... 8.459e+03
    nbart_green   (time, y, x) float64 3.652e+03 3.285e+03 ... 8.412e+03
    nbart_blue    (time, y, x) float64 2.952e+03 2.936e+03 ... 8.472e+03
    nbart_nir     (time, y, x) float64 4.648e+03 4.146e+03 ... 8.183e+03
    nbart_swir_1  (time, y, x) float64 4.34e+03 3.975e+03 ... 1.964e+03
    nbart_swir_2  (time, y, x) float64 3.63e+03 3.193e+03 ... 2.298e+03
    oa_fmask      (time, y, x) float64 2.0 2.0 2.0 1.0 2.0 ... 2.0 2.0 2.0 2.0
Attributes:
    crs:           EPSG:3577
    grid_mapping:  spatial_ref


In [6]:
ds = xr.open_dataset(r"C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring_cubes\cube_A001.nc")
ds = ds.compute()

#ds.close()

In [16]:
ds['nbart_red'].isel(time=-50).plot()
plt.show()

In [34]:
# convert tif from vectors
arcpy.conversion.FeatureToRaster(in_features=r"C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring.gdb\monitoring_areas", 
                                 field='area_id', 
                                 out_raster=r"C:\Users\Lewis\Desktop\testing\area.tif", 
                                 cell_size=30)

In [126]:
ds_area = xr.open_dataset(r"C:\Users\Lewis\Desktop\nrt_projects\ophthalmia_monitoring_cubes\cube_A001.nc")

In [127]:
ds_ras = xr.open_rasterio(r"C:\Users\Lewis\Desktop\testing\area.tif")
ds_ras = ds_ras.where(ds_ras == 0, 1)
ds_ras = ds_ras.squeeze(drop=True)

In [128]:
mask = ds_ras.interp_like(ds_area, method='nearest')
mask = mask.where(mask == 1, 0)
ds_area = ds_area.where(mask)

In [129]:
ds_area['nbart_red'].isel(time=0).plot()
plt.show()

In [145]:
ds_medians = ds_area.median(dim=['x', 'y'], keep_attrs=True)
ds_medians['nbart_red'].data

array([1785.5, -999. , 3497. , -999. ,  513. , -999. , 3019. , -999. ,
       -999. ,  680. ,  889.5, -999. ,  457. ,  505. , -999. , -999. ,
        466. , -999. , 2501. , -999. ,  499. ,  538.5, -999. , -999. ,
        572. , -999. ,  655.5, -999. , 3751.5, -999. ,  792. , -999. ,
        844.5, -999. ,  838.5, -999. ,  697. , -999. ,  633. , -999. ,
       2211.5, -999. , 1874.5, -999. , 2520. , -999. ,  490.5, -999. ,
        544.5, -999. ,  516. , -999. ,  503. , -999. ,  497. , -999. ,
        457. , -999. , -999. ,  414.5, -999. ,  461. , -999. , -999. ,
        477.5, -999. ,  550.5, -999. ,  557. , -999. , -999. ,  629. ,
       -999. ,  633. , -999. ,  652.5, -999. , -999. ,  720.5,  740.5,
       -999. ,  637.5, -999. , 3438. , -999. , -999. ,  496. , -999. ,
        521.5, -999. ,  528.5, -999. ,  549. , -999. , 4357. , -999. ,
        741. , -999. ,  551.5, -999. ,  503.5,  544.5, -999. ,  590. ,
       -999. ,  613. , -999. ,  712.5, -999. ,  660.5, -999. ,  767.5,
      

In [None]:

    
    # check folder exists
    #if not os.path.exists(out_folder):
        #raise ValueError('Requested folder does not exist.')
        
    # check file does not already exist
    #if os.path.exists(out_filepath):
        #raise ValueError('Requested file location arleady exists. Choose a different name.')
    
    # build project geodatbase
    #out_filepath = arcpy.management.CreateFileGDB(out_folder, out_filename)
    
    
    # notify
    print('Generating database feature class...')
    
    # temporarily disable auto-visual of outputs
    arcpy.env.addOutputsToMap = False
    
    # create feature class and wgs84 spatial ref sys
    srs = arcpy.SpatialReference(4326)
    out_feat = arcpy.management.CreateFeatureclass(out_path=out_filepath, 
                                                   out_name='monitoring_areas', 
                                                   geometry_type='POLYGON',
                                                   spatial_reference=srs)
    
    
    # notify
    print('Generating database domains...')
    
    # create platform domain
    arcpy.management.CreateDomain(in_workspace=out_filepath, 
                                  domain_name='dom_platforms', 
                                  domain_description='Platform name (Landsat or Sentinel)',
                                  field_type='TEXT', 
                                  domain_type='CODED')
    
    # generate coded values to platform domain
    dom_values = {'Landsat': 'Landsat', 'Sentinel': 'Sentinel'}
    for dom_value in dom_values:
        arcpy.management.AddCodedValueToDomain(in_workspace=out_filepath, 
                                               domain_name='dom_platforms', 
                                               code=dom_value, 
                                               code_description=dom_values.get(dom_value))


    # notify
    print('Generating database fields...') 
    
    # add area id field to featureclass   
    arcpy.management.AddField(in_table=out_feat, 
                              field_name='area_id', 
                              field_type='TEXT', 
                              field_alias='Area ID',
                              field_length=200,
                              field_is_required='REQUIRED')
            
    
    # notify todo - delete if we dont want defaults
    print('Generating database defaults...')  
    
    # set default platform
    arcpy.management.AssignDefaultToField(in_table=out_feat, 
                                          field_name='platform',
                                          default_value='Landsat')   
           
           
    # notify
    print('Creating NetCDF data folder...') 
    
    # create output folder
    out_nc_folder = os.path.join(out_folder, '{}_cubes'.format(out_filename))
    if os.path.exists(out_nc_folder):
        try:
            shutil.rmtree(out_nc_folder)
        except:
            raise ValueError('Could not delete {}'.format(out_nc_folder))

    # create new folder
    os.makedirs(out_nc_folder)
    
    
    # notify
    print('Adding data to current map...') 
    
    # enable auto-visual of outputs
    arcpy.env.addOutputsToMap = True
    
    try:
        # get active map, add feat
        aprx = arcpy.mp.ArcGISProject('CURRENT')
        mp = aprx.activeMap
        mp.addDataFromPath(out_feat)
    
    except:
        arcpy.AddWarning('Could not find active map. Add monitor areas manually.')        
        
    # notify
    print('Created new monitoring project database successfully.')

In [None]:
# monitor tool
# select shapefile
# run checks
# loop through each record in shapefile
# get date time of last time
# query stac for all dates above this
# if new records, create new netcdf using odc-stac like func for bb, etc
# append to existing netcdf and save

# get input shapefile file, get dir and filename
out_path = os.path.dirname(out_shp)
out_name = os.path.basename(out_shp)

fields = ['AreaID', 'NetCDF', 'Platform', 'VegIdx', 'YrStart', 'YrEnd', 'Shape@']
with arcpy.da.UpdateCursor(out_shp, fields) as cursor:
    for row in cursor:
        area_id = row[0]
        nc = row[1]
        platform = row[2]
        veg_idx = row[3]
        year_start = row[4]
        year_end = row[5]
        geom = row[6]

        # temp
        in_epsg = 3577
        in_res = 30

        # get as bbox
        bbox = [geom.extent.XMin, geom.extent.YMin, 
                geom.extent.XMax, geom.extent.YMax]
        
        # get collections and bands based on platform
        if platform == 'Landsat':
            collections = ['ga_ls5t_ard_3', 'ga_ls7e_ard_3', 'ga_ls8c_ard_3']
            bands = ['nbart_red', 'nbart_green', 'nbart_blue']
        else:
            raise ValueError('Not yet implemented')

        # if netcdf field is empty, add path to row
        if nc == ' ' or nc == '' or nc is None:
            
            # notify
            print('Querying stac for new Area ID: {}'.format(area_id))
            
            # update row with nc path
            out_nc_path = os.path.join(out_path, 'area_{}.nc'.format(area_id))
            row[1] = out_nc_path
            
            # get dates
            in_from_date = '{}-01-01'.format(year_start)
            #in_to_date = '{}-12-31'.format(datetime.now().year)  # testing
            in_to_date = '2020-03-01'
            
            # notify
            print('Getting new data for period: {} to {}'.format(in_from_date, in_to_date))
            
            # get me the data!
            ds = fetch_odc_xr(collections=collections, 
                              in_from_date=in_from_date, 
                              in_to_date=in_to_date, 
                              bbox=bbox, 
                              bands=bands, 
                              in_epsg=3577, 
                              in_res=30, 
                              like=None)
                
            # download and export netcdf to output folder
            with rasterio.Env(**rasterio_env):
                tools.export_xr_as_nc(ds=ds, filename=out_nc_path)
           
        else:
            # notify
            print('Querying stac for existing Area ID: {}'.format(area_id))
            
            # get output nc path
            out_nc_path = nc
            
            # load existing netcdf
            ds_old = xr.open_dataset(out_nc_path)
                  
            # notify
            print('Existing cube has {} images'.format(len(ds_old['time'])))
            
            # get latest datetime from ds old
            latest_dt = ds_old.isel(time=-1)
            in_from_date = str(latest_dt['time'].dt.strftime('%Y-%m-%d').values)                                    
            
            # get now
            in_to_date = '{}-12-31'.format(datetime.now().year)  # testing
            #in_to_date = '2021-06-01'
            
            
                   
            # notify
            print('Adding data for period: {} to {}'.format(in_from_date, in_to_date))
                  
            # get me the data!
            ds_new = fetch_odc_xr(collections=collections, 
                                  in_from_date=in_from_date, 
                                  in_to_date=in_to_date, 
                                  bbox=bbox, 
                                  bands=bands, 
                                  in_epsg=None, 
                                  in_res=None, 
                                  like=ds_old)
                  
            # notify
            print('New cube has {} images'.format(len(ds_new['time'])))
            
            # download and compute
            with rasterio.Env(**rasterio_env):
                ds_new = ds_new.compute()            
            
            # combine but exclude duplicates CHECK THIS CAREFULLY
            ds_old = ds_old.combine_first(ds_new)
                  
            # notify
            print('Newly synced cube now has {} images'.format(len(ds_old['time'])))
                  
            # export new to named file temp, close old, then overwrite
            with tempfile.NamedTemporaryFile() as tmp:
                ds_old.to_netcdf(tmp.name + '.nc')
                
                ds_old.close()
                ds_new.close()
                del ds_old
                del ds_new
                
                ds = xr.open_dataset(tmp.name + '.nc')
                
                ds.to_netcdf(out_nc_path)

            
        # update cursor regardless
        cursor.updateRow(row)
