### Load packages
Please note the use of `datacube.utils` package `geometry`: 
this is important for saving the coordinate reference system of the incoming shapefile in a format that the Digital Earth Australia query can understand.

In [1]:
%matplotlib inline

import datacube
import rasterio.crs
import geopandas as gpd
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
from datacube.utils import geometry
from datetime import datetime, timedelta
import sys
sys.path.append('../Scripts')
from dea_plotting import map_shapefile
from dea_temporal import time_buffer
from dea_spatialtools import xr_rasterize
from dea_datahandling import nearest
from dea_dask import create_local_dask_cluster

  shapely_geos_version, geos_capi_version_string


In [2]:
dc = datacube.Datacube(app='Analyse_Ausplots_polygons')

  username=username, password=password,


## Analysis parameters

In [5]:
time_of_interest = '2011-09-01'


vector_file = 'Ausplots_polyg.shp'
attribute_col = 'plotID'
time_buff = '10 days'
products = ['fc_percentile_albers_annual']
measurements = ['BS_PC_50', 'PV_PC_50', 'NPV_PC_50']
output_crs = 'EPSG:3577'
align = (0, 0)
dask_chunks = {"time": 1}

### Look at the structure of the vector file
Import the file and take a look at how the file is structured so we understand what we are iterating through. 

In [6]:
gdf = gpd.read_file(vector_file)
gdf.head()

Unnamed: 0,plotID,date,Chenopod,Cycad,Epiphyte,Fern,Forb,Fungus,Grass_tree,Heath_shru,...,Vine,total_GF_c,latitude,longitude,BUFF_DIST,ORIG_FID,ORIG_FID_1,Shape_Leng,Shape_Area,geometry
0,NSABHC0001-53596,2012-05-29,0.0,0,0,0,7.121662,0,0,0,...,0,26.805143,-31.892898,141.434202,50.0,1,1,0.003914,9.514063e-07,"POLYGON ((141.43367 -31.89335, 141.43367 -31.8..."
1,NSABHC0002-53597,2012-05-31,12.079208,0,0,0,12.079208,0,0,0,...,0,30.891089,-31.926578,141.433412,50.0,2,2,0.003914,9.517463e-07,"POLYGON ((141.43288 -31.92703, 141.43288 -31.9..."
2,NSABHC0003-53598,2012-06-04,0.0,0,0,0,1.176471,0,0,0,...,0,25.294118,-31.828268,141.434482,50.0,3,3,0.003912,9.50751e-07,"POLYGON ((141.43395 -31.82872, 141.43395 -31.8..."
3,NSABHC0004-53599,2012-06-06,0.0,0,0,0,10.891089,0,0,0,...,0,36.732673,-31.846828,141.948522,50.0,4,4,0.003913,9.509383e-07,"POLYGON ((141.94799 -31.84728, 141.94799 -31.8..."
4,NSABHC0005-53600,2012-07-30,0.0,0,0,0,2.475248,0,0,0,...,0,47.128713,-31.844508,141.940952,50.0,5,5,0.003913,9.509148e-07,"POLYGON ((141.94042 -31.84496, 141.94042 -31.8..."


Subset to run quick tests (should also change the date range from analysis parameters)

In [7]:
gdfslice = gdf.head(6)
    
gdfslice    

Unnamed: 0,plotID,date,Chenopod,Cycad,Epiphyte,Fern,Forb,Fungus,Grass_tree,Heath_shru,...,Vine,total_GF_c,latitude,longitude,BUFF_DIST,ORIG_FID,ORIG_FID_1,Shape_Leng,Shape_Area,geometry
0,NSABHC0001-53596,2012-05-29,0.0,0,0,0,7.121662,0,0,0,...,0,26.805143,-31.892898,141.434202,50.0,1,1,0.003914,9.514063e-07,"POLYGON ((141.43367 -31.89335, 141.43367 -31.8..."
1,NSABHC0002-53597,2012-05-31,12.079208,0,0,0,12.079208,0,0,0,...,0,30.891089,-31.926578,141.433412,50.0,2,2,0.003914,9.517463e-07,"POLYGON ((141.43288 -31.92703, 141.43288 -31.9..."
2,NSABHC0003-53598,2012-06-04,0.0,0,0,0,1.176471,0,0,0,...,0,25.294118,-31.828268,141.434482,50.0,3,3,0.003912,9.50751e-07,"POLYGON ((141.43395 -31.82872, 141.43395 -31.8..."
3,NSABHC0004-53599,2012-06-06,0.0,0,0,0,10.891089,0,0,0,...,0,36.732673,-31.846828,141.948522,50.0,4,4,0.003913,9.509383e-07,"POLYGON ((141.94799 -31.84728, 141.94799 -31.8..."
4,NSABHC0005-53600,2012-07-30,0.0,0,0,0,2.475248,0,0,0,...,0,47.128713,-31.844508,141.940952,50.0,5,5,0.003913,9.509148e-07,"POLYGON ((141.94042 -31.84496, 141.94042 -31.8..."
5,NSABHC0006-53601,2012-08-01,0.0,0,0,0,3.663366,0,0,0,...,0,50.49505,-31.883758,141.782762,50.0,6,6,0.003913,9.513146e-07,"POLYGON ((141.78223 -31.88421, 141.78223 -31.8..."


Plot the polygons over a map

In [8]:
#map_shapefile(gdf, attribute=attribute_col)

### Create a datacube query object
We then create a dictionary that will contain the parameters that will be used to load data from the DEA data cube:

> **Note:** We do not include the usual `x` and `y` spatial query parameters here, as these will be taken directly from each of our vector polygon objects.

In [9]:
query = {'measurements': measurements,
         'output_crs': 'EPSG:3577',
         'align': align,
         'dask_chunks': dask_chunks,
         'resolution': (-25,25)
         }

query

{'measurements': ['BS_PC_50', 'PV_PC_50', 'NPV_PC_50'],
 'output_crs': 'EPSG:3577',
 'align': (0, 0),
 'dask_chunks': {'time': 1},
 'resolution': (-25, 25)}

In [10]:
# Create a csv file to save results 
with open('ausplot_fcp_results.csv', 'w') as f:
    f.write('ID,PV_mean_y1,PV_std_y1,PV_count_y1,NPV_mean_y1,NPV_std_y1,NPV_count_y1,PV_mean_y2,PV_std_y2,PV_count_y2,NPV_mean_y2,NPV_std_y2,NPV_count_y2,PV_mean_y3,PV_std_y3,PV_count_y3,NPV_mean_y3,NPV_std_y3,NPV_count_y3,PV_mean_y4,PV_std_y4,PV_count_y4,NPV_mean_y4,NPV_std_y4,NPV_count_y4,PV_mean_y5,PV_std_y5,PV_count_y5,NPV_mean_y5,NPV_std_y5,NPV_count_y5,PV_mean_y6,PV_std_y6,PV_count_y6,NPV_mean_y6,NPV_std_y6,NPV_count_y6,PV_mean_y7,PV_std_y7,PV_count_y7,NPV_mean_y7,NPV_std_y7,NPV_count_y7,PV_mean_y8,PV_std_y8,PV_count_y8,NPV_mean_y8,NPV_std_y8,NPV_count_y8,PV_mean_y9,PV_std_y9,PV_count_y9,NPV_mean_y9,NPV_std_y9,NPV_count_y9,PV_mean_y10,PV_std_y10,PV_count_y10,NPV_mean_y10,NPV_std_y10,NPV_count_y10\n')

## Loading satellite data

Here we will iterate through each row of the `geopandas.GeoDataFrame` and load satellite data.  The results will be appended to a dictionary object which we can later index to analyse each dataset.

In [11]:
create_local_dask_cluster()

0,1
Client  Scheduler: tcp://127.0.0.1:38553  Dashboard: /user/fishera/proxy/8787/status,Cluster  Workers: 1  Cores: 2  Memory: 13.11 GB


In [20]:
# Create a csv file to save results 
with open('ausplot_date_results.csv', 'w') as f:
    f.write('ID,Date,PV_mean,PV_std,PV_count,NPV_mean,NPV_std,NPV_count\n')

# Loop through polygons in geodataframe and extract satellite data
for index, row in gdfslice.iterrows():
    
    print(f'Feature: {index + 1}/{len(gdfslice)}')
    
    # Get site ID
    ID = str(row[attribute_col])
    
    # Extract the feature's geometry as a datacube geometry object
    geom = geometry.Geometry(geom=row.geometry, crs=gdfslice.crs)
    
    # set temporal bounds to 10 years
    toi = (datetime.strptime(row.date, '%Y-%m-%d') - timedelta(weeks=575), datetime.strptime(row.date, '%Y-%m-%d') - timedelta(weeks=52))
    
    # Update the query to include our geopolygon and time buffer
    query.update({'geopolygon': geom, 'time': toi}) 
    
    # Load landsat
    fcd = dc.load(product=products, **query)

    # Generate a polygon mask to keep only data within the polygon
    mask = xr_rasterize(gdf.iloc[[index]], fcd)
    
    # Mask dataset to set pixels outside the polygon to `NaN`
    fcplots = fcd.where(mask)
    
    # Loop through year
    for year in fcplots.time.values:
        fcplotsDate = fcplots.where(fcplots.time == year)
        
        # Obtain mean and std dev of pixels within each Ausplot, ignoring nan and -1 values
        stats = []
        for cover in ['PV_PC_50', 'NPV_PC_50']:
            d = fcplotsDate[cover].values
            d = d[(d != -1) & (np.isnan(d) != 1)]
            if d.size > 1:
                stats.append('%.2f'%np.mean(d))
                stats.append('%.2f'%np.std(d))
                stats.append('%i'%d.size)
            else:
                stats.append('No valid pixels')
                stats.append('No valid pixels')
                stats.append('No valid pixels')
        
        #Write to csv
        with open('ausplot_date_results.csv', 'a') as f:
            f.write('%s,%s,%s\n'%(ID, year, ','.join(stats)))
    

Feature: 1/6


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
